diff --git a/libcudacxx/include/cuda/__memory_resource/cuda_async_memory_resource.h b/libcudacxx/include/cuda/__memory_resource/cuda_async_memory_resource.h index 7057af7522e..edb6b88846a 100644 --- a/libcudacxx/include/cuda/__memory_resource/cuda_async_memory_resource.h +++ b/libcudacxx/include/cuda/__memory_resource/cuda_async_memory_resource.h @@ -79,7 +79,6 @@ class cuda_async_memory_resource /** * @brief Constructs the cuda_async_memory_resource from a cuda_memory_pool by calling pool_handle() - * @throws cuda_error if retrieving the default cudaMemPool_t fails */ cuda_async_memory_resource(cuda_memory_pool& __cuda_pool) noexcept : __pool_(__cuda_pool.pool_handle()) @@ -170,7 +169,8 @@ class cuda_async_memory_resource * @param __ptr Pointer to be deallocated. Must have been allocated through a call to `allocate_async` * @param __bytes The number of bytes that was passed to the `allocate_async` call that returned \p __ptr. * @param __alignment The alignment that was passed to the `allocate_async` call that returned \p __ptr. - * @param __stream Stream that was passed to the `allocate_async` call that returned \p __ptr. + * @param __stream A stream that has a stream ordering relationship with the stream used in the `allocate_async` call + * that returned \p __ptr. See https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY__POOLS.html */ void deallocate_async(void* __ptr, const size_t __bytes, const size_t __alignment, const ::cuda::stream_ref __stream) { @@ -185,7 +185,8 @@ class cuda_async_memory_resource * @brief Deallocate memory pointed to by \p __ptr. * @param __ptr Pointer to be deallocated. Must have been allocated through a call to `allocate_async` * @param __bytes The number of bytes that was passed to the `allocate_async` call that returned \p __ptr. - * @param __stream Stream that was passed to the `allocate_async` call that returned \p __ptr. + * @param __stream A stream that has a stream ordering relationship with the stream used in the `allocate_async` call + * that returned \p __ptr. See https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY__POOLS.html */ void deallocate_async(void* __ptr, size_t, const ::cuda::stream_ref __stream) { diff --git a/libcudacxx/include/cuda/__memory_resource/cuda_memory_pool.h b/libcudacxx/include/cuda/__memory_resource/cuda_memory_pool.h index dcbee5fefae..272712d1af2 100644 --- a/libcudacxx/include/cuda/__memory_resource/cuda_memory_pool.h +++ b/libcudacxx/include/cuda/__memory_resource/cuda_memory_pool.h @@ -141,11 +141,11 @@ class cuda_memory_pool * @returns The created cuda memory pool */ _CCCL_NODISCARD static cudaMemPool_t __create_cuda_mempool( + const int __device_id, size_t __initial_pool_size, size_t __release_threshold, const cudaMemAllocationHandleType __allocation_handle_type) noexcept { - const int __device_id = _CUDA_VMR::__get_current_cuda_device(); _LIBCUDACXX_ASSERT(_CUDA_VMR::__device_supports_pools(__device_id), "cudaMallocAsync not supported"); _LIBCUDACXX_ASSERT(__cuda_supports_export_handle_type(__device_id, __allocation_handle_type), "Requested IPC memory handle type not supported"); @@ -210,16 +210,20 @@ class cuda_memory_pool * * @throws ::cuda::cuda_error if the CUDA version does not support `cudaMallocAsync` * - * @param initial_pool_size Optional initial size in bytes of the pool. If no value is provided, + * @param __device_id The device id of the device the stream pool is constructed on. + * @param __initial_pool_size Optional initial size in bytes of the pool. If no value is provided, * initial pool size is half of the available GPU memory. - * @param release_threshold Optional release threshold size in bytes of the pool. If no value is + * @param __release_threshold Optional release threshold size in bytes of the pool. If no value is * provided, the release threshold is set to the total amount of memory on the current device. + * @param __allocation_handle_type Optional allocation handle for export. */ - cuda_memory_pool( - const size_t initial_pool_size = 0, - const size_t release_threshold = 0, + explicit cuda_memory_pool( + const int __device_id, + const size_t __initial_pool_size = 0, + const size_t __release_threshold = 0, const cudaMemAllocationHandleType __allocation_handle_type = cudaMemAllocationHandleType::cudaMemHandleTypeNone) - : __pool_handle_(__create_cuda_mempool(initial_pool_size, release_threshold, __allocation_handle_type)) + : __pool_handle_( + __create_cuda_mempool(__device_id, __initial_pool_size, __release_threshold, __allocation_handle_type)) {} /** diff --git a/libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_async_memory_resource/construct.pass.cpp b/libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_async_memory_resource/construct.pass.cpp index a8d4c5fdff1..8eecdc6a54c 100644 --- a/libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_async_memory_resource/construct.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_async_memory_resource/construct.pass.cpp @@ -137,7 +137,7 @@ void test() { const size_t initial_pool_size = 42; - cuda::mr::cuda_memory_pool pool{initial_pool_size}; + cuda::mr::cuda_memory_pool pool{current_device, initial_pool_size}; async_resource from_initial_pool_size{pool}; ::cudaMemPool_t pool_handle = from_initial_pool_size.pool_handle(); @@ -156,7 +156,7 @@ void test() { const size_t initial_pool_size = 42; const size_t release_threshold = 20; - cuda::mr::cuda_memory_pool pool{initial_pool_size, release_threshold}; + cuda::mr::cuda_memory_pool pool{current_device, initial_pool_size, release_threshold}; async_resource with_threshold{pool}; ::cudaMemPool_t pool_handle = with_threshold.pool_handle(); @@ -179,7 +179,7 @@ void test() const size_t release_threshold = 20; const cuda::mr::cudaMemAllocationHandleType allocation_handle{ cuda::mr::cudaMemAllocationHandleType::cudaMemHandleTypePosixFileDescriptor}; - cuda::mr::cuda_memory_pool pool{initial_pool_size, release_threshold, allocation_handle}; + cuda::mr::cuda_memory_pool pool{current_device, initial_pool_size, release_threshold, allocation_handle}; async_resource with_allocation_handle{pool}; ::cudaMemPool_t pool_handle = with_allocation_handle.pool_handle(); diff --git a/libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_memory_pool/construct.pass.cpp b/libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_memory_pool/construct.pass.cpp index bbfc0579e72..d70fda28222 100644 --- a/libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_memory_pool/construct.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_memory_pool/construct.pass.cpp @@ -78,13 +78,13 @@ void test() } { - cuda::mr::cuda_memory_pool default_constructed{}; - assert(default_constructed.pool_handle() != current_default_pool); + cuda::mr::cuda_memory_pool from_device{current_device}; + assert(from_device.pool_handle() != current_default_pool); } { const size_t initial_pool_size = 42; - cuda::mr::cuda_memory_pool from_initial_pool_size{initial_pool_size}; + cuda::mr::cuda_memory_pool from_initial_pool_size{current_device, initial_pool_size}; ::cudaMemPool_t pool_handle = from_initial_pool_size.pool_handle(); assert(pool_handle != current_default_pool); @@ -102,7 +102,7 @@ void test() { const size_t initial_pool_size = 42; const size_t release_threshold = 20; - cuda::mr::cuda_memory_pool with_threshold{initial_pool_size, release_threshold}; + cuda::mr::cuda_memory_pool with_threshold{current_device, initial_pool_size, release_threshold}; ::cudaMemPool_t pool_handle = with_threshold.pool_handle(); assert(pool_handle != current_default_pool); @@ -124,7 +124,8 @@ void test() const size_t release_threshold = 20; const cuda::mr::cudaMemAllocationHandleType allocation_handle{ cuda::mr::cudaMemAllocationHandleType::cudaMemHandleTypePosixFileDescriptor}; - cuda::mr::cuda_memory_pool with_allocation_handle{initial_pool_size, release_threshold, allocation_handle}; + cuda::mr::cuda_memory_pool with_allocation_handle{ + current_device, initial_pool_size, release_threshold, allocation_handle}; ::cudaMemPool_t pool_handle = with_allocation_handle.pool_handle(); assert(pool_handle != current_default_pool); diff --git a/libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_memory_pool/equality.pass.cpp b/libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_memory_pool/equality.pass.cpp index 203ae07b8ab..cdda2b434b5 100644 --- a/libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_memory_pool/equality.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_memory_pool/equality.pass.cpp @@ -39,9 +39,9 @@ void test() current_device); } - cuda::mr::cuda_memory_pool first{}; + cuda::mr::cuda_memory_pool first{current_device}; { // comparison against a plain cuda_memory_pool - cuda::mr::cuda_memory_pool second{}; + cuda::mr::cuda_memory_pool second{current_device}; assert(first == first); assert(first != second); } diff --git a/libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_memory_pool/traits.pass.cpp b/libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_memory_pool/traits.pass.cpp index fe9a87da219..7e30076f807 100644 --- a/libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_memory_pool/traits.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_memory_pool/traits.pass.cpp @@ -20,7 +20,7 @@ using pool = cuda::mr::cuda_memory_pool; static_assert(!cuda::std::is_trivial::value, ""); static_assert(!cuda::std::is_trivially_default_constructible::value, ""); -static_assert(cuda::std::is_default_constructible::value, ""); +static_assert(!cuda::std::is_default_constructible::value, ""); static_assert(!cuda::std::is_copy_constructible::value, ""); static_assert(!cuda::std::is_move_constructible::value, ""); static_assert(!cuda::std::is_copy_assignable::value, "");