Skip to content

Commit

Permalink
Improve uninitialized_{async_}buffer API (#2713)
Browse files Browse the repository at this point in the history
This implements some improvements to the buffers.

* We want to be able to convert them if the properties match.
* Add some improvements regarding symbol visibility
* Add a way to grow a vector
* Ensure we can simply swap them
  • Loading branch information
miscco authored Nov 7, 2024
1 parent 09be27a commit 78922b5
Show file tree
Hide file tree
Showing 6 changed files with 220 additions and 74 deletions.
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
//===----------------------------------------------------------------------===//
//
// Part of the CUDA Toolkit, under the Apache License v2.0 with LLVM Exceptions.
// Part of CUDA Experimental in CUDA C++ Core Libraries,
// under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
Expand All @@ -24,6 +25,7 @@
#include <cuda/__memory_resource/resource_ref.h>
#include <cuda/std/__memory/align.h>
#include <cuda/std/__new/launder.h>
#include <cuda/std/__type_traits/type_set.h>
#include <cuda/std/__utility/exchange.h>
#include <cuda/std/__utility/move.h>
#include <cuda/std/__utility/swap.h>
Expand Down Expand Up @@ -76,20 +78,32 @@ private:
"execution space property!");

using __async_resource = ::cuda::experimental::mr::any_async_resource<_Properties...>;

__async_resource __mr_;
::cuda::stream_ref __stream_ = {};
size_t __count_ = 0;
void* __buf_ = nullptr;

template <class, class...>
friend class uninitialized_async_buffer;

//! @brief Helper to check whether a different buffer still statisfies all properties of this one
template <class... _OtherProperties>
static constexpr bool __properties_match =
!_CCCL_TRAIT(_CUDA_VSTD::is_same,
_CUDA_VSTD::__make_type_set<_Properties...>,
_CUDA_VSTD::__make_type_set<_OtherProperties...>)
&& _CUDA_VSTD::__type_set_contains_v<_CUDA_VSTD::__make_type_set<_OtherProperties...>, _Properties...>;

//! @brief Determines the allocation size given the alignment and size of `T`
_CCCL_NODISCARD static constexpr size_t __get_allocation_size(const size_t __count) noexcept
_CCCL_NODISCARD _CCCL_HIDE_FROM_ABI static constexpr size_t __get_allocation_size(const size_t __count) noexcept
{
constexpr size_t __alignment = alignof(_Tp);
return (__count * sizeof(_Tp) + (__alignment - 1)) & ~(__alignment - 1);
}

//! @brief Determines the properly aligned start of the buffer given the alignment and size of `T`
_CCCL_NODISCARD constexpr _Tp* __get_data() const noexcept
_CCCL_NODISCARD _CCCL_HIDE_FROM_ABI constexpr _Tp* __get_data() const noexcept
{
constexpr size_t __alignment = alignof(_Tp);
size_t __space = __get_allocation_size(__count_);
Expand All @@ -101,7 +115,8 @@ private:
//! @brief Causes the buffer to be treated as a span when passed to cudax::launch.
//! @pre The buffer must have the cuda::mr::device_accessible property.
template <class _Tp2 = _Tp>
_CCCL_NODISCARD_FRIEND auto __cudax_launch_transform(::cuda::stream_ref, uninitialized_async_buffer& __self) noexcept
_CCCL_NODISCARD_FRIEND _CCCL_HIDE_FROM_ABI auto
__cudax_launch_transform(::cuda::stream_ref, uninitialized_async_buffer& __self) noexcept
_LIBCUDACXX_TRAILING_REQUIRES(_CUDA_VSTD::span<_Tp>)(
_CUDA_VSTD::same_as<_Tp, _Tp2>&& _CUDA_VSTD::__is_included_in_v<_CUDA_VMR::device_accessible, _Properties...>)
{
Expand All @@ -112,7 +127,7 @@ private:
//! @brief Causes the buffer to be treated as a span when passed to cudax::launch
//! @pre The buffer must have the cuda::mr::device_accessible property.
template <class _Tp2 = _Tp>
_CCCL_NODISCARD_FRIEND auto
_CCCL_NODISCARD_FRIEND _CCCL_HIDE_FROM_ABI auto
__cudax_launch_transform(::cuda::stream_ref, const uninitialized_async_buffer& __self) noexcept
_LIBCUDACXX_TRAILING_REQUIRES(_CUDA_VSTD::span<const _Tp>)(
_CUDA_VSTD::same_as<_Tp, _Tp2>&& _CUDA_VSTD::__is_included_in_v<_CUDA_VMR::device_accessible, _Properties...>)
Expand All @@ -127,35 +142,50 @@ public:
using pointer = _Tp*;
using size_type = size_t;

//! @brief Constructs an \c uninitialized_async_buffer, allocating sufficient storage for \p __count elements using
//! @brief Constructs an \c uninitialized_async_buffer, allocating sufficient storage for \p __count elements through
//! \p __mr
//! @param __mr The async memory resource to allocate the buffer with.
//! @param __stream The CUDA stream used for stream-ordered allocation.
//! @param __count The desired size of the buffer.
//! @note Depending on the alignment requirements of `T` the size of the underlying allocation might be larger
//! than `count * sizeof(T)`. Only allocates memory when \p __count > 0
_CCCL_HIDE_FROM_ABI
uninitialized_async_buffer(__async_resource __mr, const ::cuda::stream_ref __stream, const size_t __count)
: __mr_(_CUDA_VSTD::move(__mr))
, __stream_(__stream)
, __count_(__count)
, __buf_(__count_ == 0 ? nullptr : __mr_.allocate_async(__get_allocation_size(__count_), __stream_))
{}

uninitialized_async_buffer(const uninitialized_async_buffer&) = delete;
uninitialized_async_buffer& operator=(const uninitialized_async_buffer&) = delete;
_CCCL_HIDE_FROM_ABI uninitialized_async_buffer(const uninitialized_async_buffer&) = delete;
_CCCL_HIDE_FROM_ABI uninitialized_async_buffer& operator=(const uninitialized_async_buffer&) = delete;

//! @brief Move construction
//! @brief Move-constructs a \c uninitialized_async_buffer from \p __other
//! @param __other Another \c uninitialized_async_buffer
uninitialized_async_buffer(uninitialized_async_buffer&& __other) noexcept
//! Takes ownership of the allocation in \p __other and resets it
_CCCL_HIDE_FROM_ABI uninitialized_async_buffer(uninitialized_async_buffer&& __other) noexcept
: __mr_(_CUDA_VSTD::move(__other.__mr_))
, __stream_(_CUDA_VSTD::exchange(__other.__stream_, {}))
, __count_(_CUDA_VSTD::exchange(__other.__count_, 0))
, __buf_(_CUDA_VSTD::exchange(__other.__buf_, nullptr))
{}

//! @brief Move-constructs a \c uninitialized_async_buffer from \p __other
//! @param __other Another \c uninitialized_async_buffer with matching properties
//! Takes ownership of the allocation in \p __other and resets it
_LIBCUDACXX_TEMPLATE(class... _OtherProperties)
_LIBCUDACXX_REQUIRES(__properties_match<_OtherProperties...>)
_CCCL_HIDE_FROM_ABI uninitialized_async_buffer(uninitialized_async_buffer<_Tp, _OtherProperties...>&& __other) noexcept
: __mr_(_CUDA_VSTD::move(__other.__mr_))
, __stream_(_CUDA_VSTD::exchange(__other.__stream_, {}))
, __count_(_CUDA_VSTD::exchange(__other.__count_, 0))
, __buf_(_CUDA_VSTD::exchange(__other.__buf_, nullptr))
{}

//! @brief Move assignment
//! @brief Move-assings a \c uninitialized_async_buffer from \p __other
//! @param __other Another \c uninitialized_async_buffer
uninitialized_async_buffer& operator=(uninitialized_async_buffer&& __other) noexcept
//! Deallocates the current allocation and then takes ownership of the allocation in \p __other and resets it
_CCCL_HIDE_FROM_ABI uninitialized_async_buffer& operator=(uninitialized_async_buffer&& __other) noexcept
{
if (this == _CUDA_VSTD::addressof(__other))
{
Expand All @@ -172,45 +202,46 @@ public:
__buf_ = _CUDA_VSTD::exchange(__other.__buf_, nullptr);
return *this;
}

//! @brief Destroys an \c uninitialized_async_buffer and deallocates the buffer in stream order on the stream that was
//! used to create the buffer.
//! @warning The destructor does not destroy any objects that may or may not reside within the buffer. It is the
//! user's responsibility to ensure that all objects within the buffer have been properly destroyed.
~uninitialized_async_buffer()
_CCCL_HIDE_FROM_ABI ~uninitialized_async_buffer()
{
if (__buf_)
{
__mr_.deallocate_async(__buf_, __get_allocation_size(__count_), __stream_);
}
}

//! @brief Returns an aligned pointer to the buffer
_CCCL_NODISCARD constexpr pointer begin() const noexcept
//! @brief Returns an aligned pointer to the first element in the buffer
_CCCL_NODISCARD _CCCL_HIDE_FROM_ABI constexpr pointer begin() const noexcept
{
return __get_data();
}

//! @brief Returns an aligned pointer to the element following the last element of the buffer.
//! This element acts as a placeholder; attempting to access it results in undefined behavior.
_CCCL_NODISCARD constexpr pointer end() const noexcept
_CCCL_NODISCARD _CCCL_HIDE_FROM_ABI constexpr pointer end() const noexcept
{
return __get_data() + __count_;
}

//! @brief Returns an aligned pointer to the buffer
_CCCL_NODISCARD constexpr pointer data() const noexcept
//! @brief Returns an aligned pointer to the first element in the buffer
_CCCL_NODISCARD _CCCL_HIDE_FROM_ABI constexpr pointer data() const noexcept
{
return __get_data();
}

//! @brief Returns the size of the buffer
_CCCL_NODISCARD constexpr size_type size() const noexcept
_CCCL_NODISCARD _CCCL_HIDE_FROM_ABI constexpr size_type size() const noexcept
{
return __count_;
}

//! @brief Returns the size of the buffer in bytes
_CCCL_NODISCARD constexpr size_type size_bytes() const noexcept
_CCCL_NODISCARD _CCCL_HIDE_FROM_ABI constexpr size_type size_bytes() const noexcept
{
return __count_ * sizeof(_Tp);
}
Expand All @@ -219,21 +250,21 @@ public:
//! Returns a \c const reference to the :ref:`any_async_resource <cudax-memory-resource-any-async-resource>`
//! that holds the memory resource used to allocate the buffer
//! @endrst
_CCCL_NODISCARD const __async_resource& get_resource() const noexcept
_CCCL_NODISCARD _CCCL_HIDE_FROM_ABI const __async_resource& get_resource() const noexcept
{
return __mr_;
}

//! @brief Returns the stored stream
_CCCL_NODISCARD constexpr ::cuda::stream_ref get_stream() const noexcept
_CCCL_NODISCARD _CCCL_HIDE_FROM_ABI constexpr ::cuda::stream_ref get_stream() const noexcept
{
return __stream_;
}

//! @brief Replaces the stored stream
//! @param __new_stream the new stream
//! @note Always synchronizes with the old stream
constexpr void change_stream(::cuda::stream_ref __new_stream)
_CCCL_HIDE_FROM_ABI constexpr void change_stream(::cuda::stream_ref __new_stream)
{
if (__new_stream != __stream_)
{
Expand All @@ -242,22 +273,26 @@ public:
__stream_ = __new_stream;
}

//! @brief Swaps the contents with those of another \c uninitialized_async_buffer
//! @param __other The other \c uninitialized_async_buffer.
constexpr void swap(uninitialized_async_buffer& __other) noexcept
{
_CUDA_VSTD::swap(__mr_, __other.__mr_);
_CUDA_VSTD::swap(__count_, __other.__count_);
_CUDA_VSTD::swap(__buf_, __other.__buf_);
}

# ifndef DOXYGEN_SHOULD_SKIP_THIS // friend functions are currently broken
//! @brief Forwards the passed properties
_LIBCUDACXX_TEMPLATE(class _Property)
_LIBCUDACXX_REQUIRES(
(!property_with_value<_Property>) _LIBCUDACXX_AND _CUDA_VSTD::__is_included_in_v<_Property, _Properties...>)
friend constexpr void get_property(const uninitialized_async_buffer&, _Property) noexcept {}
_CCCL_HIDE_FROM_ABI friend constexpr void get_property(const uninitialized_async_buffer&, _Property) noexcept {}
# endif // DOXYGEN_SHOULD_SKIP_THIS

//! @brief Internal method to grow the allocation to a new size \p __count.
//! @param __count The new size of the allocation.
//! @return An \c uninitialized_async_buffer that holds the previous allocation
//! @warning This buffer must outlive the returned buffer
_CCCL_HIDE_FROM_ABI uninitialized_async_buffer __replace_allocation(const size_t __count)
{
// Create a new buffer with a reference to the stored memory resource and swap allocation information
uninitialized_async_buffer __ret{_CUDA_VMR::async_resource_ref<_Properties...>{__mr_}, __stream_, __count};
_CUDA_VSTD::swap(__count_, __ret.__count_);
_CUDA_VSTD::swap(__buf_, __ret.__buf_);
return __ret;
}
};

template <class _Tp>
Expand Down
Loading

0 comments on commit 78922b5

Please sign in to comment.