From a2aa9712ed57c1b6a8f0a80d0fc7ba1aa4462ba4 Mon Sep 17 00:00:00 2001 From: Eric Niebler Date: Sun, 1 Dec 2024 08:44:08 -0800 Subject: [PATCH] make the upper limit on TMP loop unrolling configurable (#2971) * make the upper limit on TMP loop unrolling configurable * drop the TMP PP unrolling limit on nvcc and nvhpc whose parsers are slow --- .../cuda/std/__type_traits/type_list.h | 36 +++++++++++-------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/libcudacxx/include/cuda/std/__type_traits/type_list.h b/libcudacxx/include/cuda/std/__type_traits/type_list.h index 1beb22b180..53fd4a44b4 100644 --- a/libcudacxx/include/cuda/std/__type_traits/type_list.h +++ b/libcudacxx/include/cuda/std/__type_traits/type_list.h @@ -40,6 +40,14 @@ //! For the purpose of this file, a "trait type" is a class type with a nested //! type alias named \c type. +#if !defined(_CCCL_META_UNROLL_LIMIT) +# if defined(_CCCL_CUDA_COMPILER_NVCC) || _CCCL_COMPILER(NVHPC) +# define _CCCL_META_UNROLL_LIMIT 10 +# else +# define _CCCL_META_UNROLL_LIMIT 16 +# endif +#endif + _LIBCUDACXX_BEGIN_NAMESPACE_STD #ifndef _CCCL_DOXYGEN_INVOKED // Do not document @@ -502,7 +510,7 @@ struct __type_index_small_size_fn; using __call _CCCL_NODEBUG_ALIAS = _Ty; \ }; -_CCCL_PP_REPEAT_REVERSE(16, _M1) +_CCCL_PP_REPEAT_REVERSE(_CCCL_META_UNROLL_LIMIT, _M1) # undef _M0 # undef _M1 @@ -524,7 +532,7 @@ struct __type_index_select_fn // Fast implementation for smaller indices } // namespace __detail template -using __type_index = __type_call<__detail::__type_index_select_fn<(_Ip::value < 16)>, _Ip, _Ts...>; +using __type_index = __type_call<__detail::__type_index_select_fn<(_Ip::value < _CCCL_META_UNROLL_LIMIT)>, _Ip, _Ts...>; template using __type_index_c = __type_index, _Ts...>; @@ -858,17 +866,17 @@ struct _CCCL_TYPE_VISIBILITY_DEFAULT __type_fold_left_fn; using __call _CCCL_NODEBUG_ALIAS = _CCCL_PP_REPEAT(_N, _M1) _State _CCCL_PP_REPEAT(_N, _M3); \ }; -_CCCL_PP_REPEAT_REVERSE(17, _LIBCUDACXX_TYPE_LIST_FOLD_RIGHT) +_CCCL_PP_REPEAT_REVERSE(_CCCL_PP_INC(_CCCL_META_UNROLL_LIMIT), _LIBCUDACXX_TYPE_LIST_FOLD_RIGHT) template struct _CCCL_TYPE_VISIBILITY_DEFAULT __type_fold_right_fn { - template - using __call _CCCL_NODEBUG_ALIAS = - __type_call_indirect<__type_fold_right_fn<_Np - 16>, - _Fn, - __type_call<__type_fold_right_fn<16>, _Fn, _State _CCCL_PP_REPEAT(16, _M2)>, - _Rest...>; + template + using __call _CCCL_NODEBUG_ALIAS = __type_call_indirect< + __type_fold_right_fn<_Np - _CCCL_META_UNROLL_LIMIT>, + _Fn, + __type_call<__type_fold_right_fn<_CCCL_META_UNROLL_LIMIT>, _Fn, _State _CCCL_PP_REPEAT(_CCCL_META_UNROLL_LIMIT, _M2)>, + _Rest...>; }; template @@ -886,17 +894,17 @@ struct _CCCL_TYPE_VISIBILITY_DEFAULT __type_fold_right_select_fn using __call _CCCL_NODEBUG_ALIAS = _CCCL_PP_REPEAT(_N, _M1) _State _CCCL_PP_REPEAT(_N, _M4, _N, _CCCL_PP_DEC); \ }; -_CCCL_PP_REPEAT_REVERSE(17, _LIBCUDACXX_TYPE_FOLD_LEFT) +_CCCL_PP_REPEAT_REVERSE(_CCCL_PP_INC(_CCCL_META_UNROLL_LIMIT), _LIBCUDACXX_TYPE_FOLD_LEFT) template struct _CCCL_TYPE_VISIBILITY_DEFAULT __type_fold_left_fn { - template + template using __call _CCCL_NODEBUG_ALIAS = - __type_call<__type_fold_left_fn<16>, + __type_call<__type_fold_left_fn<_CCCL_META_UNROLL_LIMIT>, _Fn, - __type_call_indirect<__type_fold_left_fn<_Np - 16>, _Fn, _State, _Rest...> // - _CCCL_PP_REPEAT(16, _M2, 0, _CCCL_PP_INC)>; + __type_call_indirect<__type_fold_left_fn<_Np - _CCCL_META_UNROLL_LIMIT>, _Fn, _State, _Rest...> // + _CCCL_PP_REPEAT(_CCCL_META_UNROLL_LIMIT, _M2, 0, _CCCL_PP_INC)>; }; template