Skip to content

Commit

Permalink
SHMEM/MCA/SSHMEM/UCX: Fixing DEVICE_NIC_MEM support to use RDMA memory
Browse files Browse the repository at this point in the history
type

Signed-off-by: Roie Danino <[email protected]>
  • Loading branch information
roiedanino committed Aug 21, 2023
1 parent 385c791 commit 946a131
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 109 deletions.
23 changes: 0 additions & 23 deletions oshmem/mca/sshmem/ucx/configure.m4
Original file line number Diff line number Diff line change
Expand Up @@ -34,29 +34,6 @@ AC_DEFUN([MCA_oshmem_sshmem_ucx_CONFIG],[
LDFLAGS+=" $sshmem_ucx_LDFLAGS $alloc_dm_LDFLAGS"
LIBS+=" $sshmem_ucx_LIBS $alloc_dm_LIBS"
AC_LANG_PUSH([C])
AC_LINK_IFELSE([AC_LANG_PROGRAM(
[[
#include <ucp/core/ucp_resource.h>
#include <uct/ib/base/ib_alloc.h>
]],
[[
uct_md_h md = ucp_context_find_tl_md((ucp_context_h)NULL, "");
(void)uct_ib_md_alloc_device_mem(md, NULL, NULL, 0, "", NULL);
uct_ib_md_release_device_mem(NULL);
]])],
[
AC_MSG_NOTICE([UCX device memory allocation is supported])
AC_DEFINE([HAVE_UCX_DEVICE_MEM], [1], [Support for device memory allocation])
sshmem_ucx_LIBS+=" $alloc_dm_LIBS"
sshmem_ucx_LDFLAGS+=" $alloc_dm_LDFLAGS"
],
[
AC_MSG_NOTICE([UCX device memory allocation is not supported])
AC_DEFINE([HAVE_UCX_DEVICE_MEM], [0], [Support for device memory allocation])
])
AC_LANG_POP([C])
CPPFLAGS="$save_CPPFLAGS"
LDFLAGS="$save_LDFLAGS"
LIBS="$save_LIBS"
Expand Down
2 changes: 1 addition & 1 deletion oshmem/mca/sshmem/ucx/sshmem_ucx.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "oshmem/mca/sshmem/sshmem.h"

#include <ucp/api/ucp.h>
#include <uct/api/uct.h>

BEGIN_C_DECLS

Expand All @@ -35,7 +36,6 @@ OSHMEM_DECLSPEC extern mca_sshmem_ucx_component_t
mca_sshmem_ucx_component;

typedef struct mca_sshmem_ucx_segment_context {
void *dev_mem;
sshmem_ucx_shadow_allocator_t *shadow_allocator;
ucp_mem_h ucp_memh;
} mca_sshmem_ucx_segment_context_t;
Expand Down
96 changes: 11 additions & 85 deletions oshmem/mca/sshmem/ucx/sshmem_ucx_module.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,6 @@

#include "sshmem_ucx.h"

//#include <ucs/sys/math.h>

#if HAVE_UCX_DEVICE_MEM
#include <ucp/core/ucp_resource.h>
#include <uct/ib/base/ib_alloc.h>
#endif

#define ALLOC_ELEM_SIZE sizeof(uint64_t)
#define min(a,b) ((a) < (b) ? (a) : (b))
#define max(a,b) ((a) > (b) ? (a) : (b))
Expand Down Expand Up @@ -104,7 +97,7 @@ static segment_allocator_t sshmem_ucx_allocator = {

static int
segment_create_internal(map_segment_t *ds_buf, void *address, size_t size,
unsigned flags, long hint, void *dev_mem)
unsigned flags, long hint)
{
mca_sshmem_ucx_segment_context_t *ctx;
int rc = OSHMEM_SUCCESS;
Expand All @@ -120,11 +113,14 @@ segment_create_internal(map_segment_t *ds_buf, void *address, size_t size,

mem_map_params.field_mask = UCP_MEM_MAP_PARAM_FIELD_ADDRESS |
UCP_MEM_MAP_PARAM_FIELD_LENGTH |
UCP_MEM_MAP_PARAM_FIELD_FLAGS;
UCP_MEM_MAP_PARAM_FIELD_FLAGS |
UCP_MEM_MAP_PARAM_FIELD_MEMORY_TYPE;

mem_map_params.address = address;
mem_map_params.length = size;
mem_map_params.flags = flags;
mem_map_params.address = address;
mem_map_params.length = size;
mem_map_params.flags = flags;
mem_map_params.memory_type = (hint & SHMEM_HINT_DEVICE_NIC_MEM) ?
UCS_MEMORY_TYPE_RDMA : UCS_MEMORY_TYPE_HOST;

status = ucp_mem_map(spml->ucp_context, &mem_map_params, &mem_h);
if (UCS_OK != status) {
Expand Down Expand Up @@ -163,7 +159,6 @@ segment_create_internal(map_segment_t *ds_buf, void *address, size_t size,
ds_buf->type = MAP_SEGMENT_ALLOC_UCX;
ds_buf->alloc_hints = hint;
ctx->ucp_memh = mem_h;
ctx->dev_mem = dev_mem;
if (hint) {
ds_buf->allocator = &sshmem_ucx_allocator;
}
Expand All @@ -181,81 +176,18 @@ segment_create_internal(map_segment_t *ds_buf, void *address, size_t size,
return rc;
}

#if HAVE_UCX_DEVICE_MEM
static uct_ib_device_mem_h alloc_device_mem(mca_spml_ucx_t *spml, size_t size,
void **address_p)
{
uct_ib_device_mem_h dev_mem = NULL;
ucs_status_t status;
uct_md_h uct_md;
void *address;
size_t length;

uct_md = ucp_context_find_tl_md(spml->ucp_context, "mlx5");
if (uct_md == NULL) {
SSHMEM_VERBOSE(1, "ucp_context_find_tl_md() returned NULL\n");
return NULL;
}

/* If found a matching memory domain, allocate device memory on it */
length = size;
address = NULL;
status = uct_ib_md_alloc_device_mem(uct_md, &length, &address,
UCT_MD_MEM_ACCESS_ALL, "sshmem_seg",
&dev_mem);
if (status != UCS_OK) {
/* If could not allocate device memory - fallback to mmap (since some
* PEs in the job may succeed and while others failed */
SSHMEM_VERBOSE(1, "uct_ib_md_alloc_dm() failed: %s\n",
ucs_status_string(status));
return NULL;
}

SSHMEM_VERBOSE(3, "uct_ib_md_alloc_dm() returned address %p\n", address);
*address_p = address;
return dev_mem;
}
#endif

static int
segment_create(map_segment_t *ds_buf,
const char *file_name,
size_t size, long hint)
{
mca_spml_ucx_t *spml = (mca_spml_ucx_t*)mca_spml.self;
unsigned flags;

#if HAVE_UCX_DEVICE_MEM
int ret = OSHMEM_ERROR;
if (hint & SHMEM_HINT_DEVICE_NIC_MEM) {
if (size > UINT_MAX) {
return OSHMEM_ERR_BAD_PARAM;
}

void *dev_mem_address;
uct_ib_device_mem_h dev_mem = alloc_device_mem(spml, size,
&dev_mem_address);
if (dev_mem != NULL) {
int ret;
ret = segment_create_internal(ds_buf, dev_mem_address, size, 0,
hint, dev_mem);
if (ret == OSHMEM_SUCCESS) {
return OSHMEM_SUCCESS;
} else if (dev_mem != NULL) {
uct_ib_md_release_device_mem(dev_mem);
/* fallback to regular allocation */
}
}
}
#endif

flags = UCP_MEM_MAP_ALLOCATE | (spml->heap_reg_nb ? UCP_MEM_MAP_NONBLOCK : 0);
unsigned flags = UCP_MEM_MAP_ALLOCATE | (spml->heap_reg_nb ? UCP_MEM_MAP_NONBLOCK : 0);
if (hint) {
return segment_create_internal(ds_buf, NULL, size, flags, hint, NULL);
return segment_create_internal(ds_buf, NULL, size, flags, hint);
} else {
return segment_create_internal(ds_buf, mca_sshmem_base_start_address,
size, flags | UCP_MEM_MAP_FIXED, hint,
NULL);
size, flags | UCP_MEM_MAP_FIXED, hint);
}
}

Expand Down Expand Up @@ -303,12 +235,6 @@ segment_unlink(map_segment_t *ds_buf)

ucp_mem_unmap(spml->ucp_context, ctx->ucp_memh);

#if HAVE_UCX_DEVICE_MEM
if (ctx->dev_mem) {
uct_ib_md_release_device_mem(ctx->dev_mem);
}
#endif

ds_buf->context = NULL;
free(ctx);

Expand Down

0 comments on commit 946a131

Please sign in to comment.