Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OSHMEM/MCA/SSHMEM/UCX: DEVICE_NIC_MEM hint - implementation should use RDMA memory type #11866

Merged
merged 1 commit into from
Oct 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion config/ompi_check_ucx.m4
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,8 @@ AC_DEFUN([OMPI_CHECK_UCX],[
UCP_ATOMIC_FETCH_OP_FXOR,
UCP_PARAM_FIELD_ESTIMATED_NUM_PPN,
UCP_WORKER_FLAG_IGNORE_REQUEST_LEAK,
UCP_OP_ATTR_FLAG_MULTI_SEND],
UCP_OP_ATTR_FLAG_MULTI_SEND,
UCS_MEMORY_TYPE_RDMA],
[], [],
[#include <ucp/api/ucp.h>])
AC_CHECK_DECLS([UCP_WORKER_ATTR_FIELD_ADDRESS_FLAGS],
Expand Down
30 changes: 2 additions & 28 deletions oshmem/mca/sshmem/ucx/configure.m4
Original file line number Diff line number Diff line change
Expand Up @@ -28,34 +28,9 @@ AC_DEFUN([MCA_oshmem_sshmem_ucx_CONFIG],[
save_LIBS="$LIBS"
save_CPPFLAGS="$CPPFLAGS"

alloc_dm_LDFLAGS=" -L$ompi_check_ucx_libdir/ucx"
alloc_dm_LIBS=" -luct_ib"
yosefe marked this conversation as resolved.
Show resolved Hide resolved
CPPFLAGS+=" $sshmem_ucx_CPPFLAGS"
LDFLAGS+=" $sshmem_ucx_LDFLAGS $alloc_dm_LDFLAGS"
LIBS+=" $sshmem_ucx_LIBS $alloc_dm_LIBS"

AC_LANG_PUSH([C])
AC_LINK_IFELSE([AC_LANG_PROGRAM(
[[
#include <ucp/core/ucp_resource.h>
#include <uct/ib/base/ib_alloc.h>
]],
[[
uct_md_h md = ucp_context_find_tl_md((ucp_context_h)NULL, "");
(void)uct_ib_md_alloc_device_mem(md, NULL, NULL, 0, "", NULL);
uct_ib_md_release_device_mem(NULL);
]])],
[
AC_MSG_NOTICE([UCX device memory allocation is supported])
AC_DEFINE([HAVE_UCX_DEVICE_MEM], [1], [Support for device memory allocation])
sshmem_ucx_LIBS+=" $alloc_dm_LIBS"
sshmem_ucx_LDFLAGS+=" $alloc_dm_LDFLAGS"
],
[
AC_MSG_NOTICE([UCX device memory allocation is not supported])
AC_DEFINE([HAVE_UCX_DEVICE_MEM], [0], [Support for device memory allocation])
])
AC_LANG_POP([C])
LDFLAGS+=" $sshmem_ucx_LDFLAGS"
LIBS+=" $sshmem_ucx_LIBS"

CPPFLAGS="$save_CPPFLAGS"
LDFLAGS="$save_LDFLAGS"
Expand All @@ -66,4 +41,3 @@ AC_DEFUN([MCA_oshmem_sshmem_ucx_CONFIG],[
AC_SUBST([sshmem_ucx_LDFLAGS])
AC_SUBST([sshmem_ucx_LIBS])
])dnl

1 change: 0 additions & 1 deletion oshmem/mca/sshmem/ucx/sshmem_ucx.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ OSHMEM_DECLSPEC extern mca_sshmem_ucx_component_t
mca_sshmem_ucx_component;

typedef struct mca_sshmem_ucx_segment_context {
void *dev_mem;
sshmem_ucx_shadow_allocator_t *shadow_allocator;
ucp_mem_h ucp_memh;
} mca_sshmem_ucx_segment_context_t;
Expand Down
117 changes: 29 additions & 88 deletions oshmem/mca/sshmem/ucx/sshmem_ucx_module.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,6 @@

#include "sshmem_ucx.h"

//#include <ucs/sys/math.h>

#if HAVE_UCX_DEVICE_MEM
#include <ucp/core/ucp_resource.h>
#include <uct/ib/base/ib_alloc.h>
#endif

#define ALLOC_ELEM_SIZE sizeof(uint64_t)
#define min(a,b) ((a) < (b) ? (a) : (b))
#define max(a,b) ((a) > (b) ? (a) : (b))
Expand Down Expand Up @@ -104,7 +97,7 @@ static segment_allocator_t sshmem_ucx_allocator = {

static int
segment_create_internal(map_segment_t *ds_buf, void *address, size_t size,
unsigned flags, long hint, void *dev_mem)
unsigned flags, ucs_memory_type_t mem_type, int err_level)
{
mca_sshmem_ucx_segment_context_t *ctx;
int rc = OSHMEM_SUCCESS;
Expand All @@ -120,15 +113,19 @@ segment_create_internal(map_segment_t *ds_buf, void *address, size_t size,

mem_map_params.field_mask = UCP_MEM_MAP_PARAM_FIELD_ADDRESS |
UCP_MEM_MAP_PARAM_FIELD_LENGTH |
UCP_MEM_MAP_PARAM_FIELD_FLAGS;
UCP_MEM_MAP_PARAM_FIELD_FLAGS |
UCP_MEM_MAP_PARAM_FIELD_MEMORY_TYPE;

mem_map_params.address = address;
mem_map_params.length = size;
mem_map_params.flags = flags;
mem_map_params.address = address;
mem_map_params.length = size;
mem_map_params.flags = flags;
mem_map_params.memory_type = mem_type;
yosefe marked this conversation as resolved.
Show resolved Hide resolved

status = ucp_mem_map(spml->ucp_context, &mem_map_params, &mem_h);
if (UCS_OK != status) {
SSHMEM_ERROR("ucp_mem_map() failed: %s\n", ucs_status_string(status));
SSHMEM_VERBOSE(err_level, "ucp_mem_map(memory_type=%s) failed: %s\n",
ucs_memory_type_names[mem_type],
ucs_status_string(status));
rc = OSHMEM_ERROR;
goto out;
}
Expand Down Expand Up @@ -161,12 +158,7 @@ segment_create_internal(map_segment_t *ds_buf, void *address, size_t size,
ds_buf->super.va_end = (void*)((uintptr_t)ds_buf->super.va_base + ds_buf->seg_size);
ds_buf->context = ctx;
ds_buf->type = MAP_SEGMENT_ALLOC_UCX;
ds_buf->alloc_hints = hint;
ctx->ucp_memh = mem_h;
ctx->dev_mem = dev_mem;
if (hint) {
ds_buf->allocator = &sshmem_ucx_allocator;
}

out:
OPAL_OUTPUT_VERBOSE(
Expand All @@ -181,82 +173,37 @@ segment_create_internal(map_segment_t *ds_buf, void *address, size_t size,
return rc;
}

#if HAVE_UCX_DEVICE_MEM
static uct_ib_device_mem_h alloc_device_mem(mca_spml_ucx_t *spml, size_t size,
void **address_p)
{
uct_ib_device_mem_h dev_mem = NULL;
ucs_status_t status;
uct_md_h uct_md;
void *address;
size_t length;

uct_md = ucp_context_find_tl_md(spml->ucp_context, "mlx5");
if (uct_md == NULL) {
SSHMEM_VERBOSE(1, "ucp_context_find_tl_md() returned NULL\n");
return NULL;
}

/* If found a matching memory domain, allocate device memory on it */
length = size;
address = NULL;
status = uct_ib_md_alloc_device_mem(uct_md, &length, &address,
UCT_MD_MEM_ACCESS_ALL, "sshmem_seg",
&dev_mem);
if (status != UCS_OK) {
/* If could not allocate device memory - fallback to mmap (since some
* PEs in the job may succeed and while others failed */
SSHMEM_VERBOSE(1, "uct_ib_md_alloc_dm() failed: %s\n",
ucs_status_string(status));
return NULL;
}

SSHMEM_VERBOSE(3, "uct_ib_md_alloc_dm() returned address %p\n", address);
*address_p = address;
return dev_mem;
}
#endif

static int
segment_create(map_segment_t *ds_buf,
const char *file_name,
size_t size, long hint)
{
mca_spml_ucx_t *spml = (mca_spml_ucx_t*)mca_spml.self;
unsigned flags;
unsigned flags = UCP_MEM_MAP_ALLOCATE;
int status;

#if HAVE_UCX_DEVICE_MEM
int ret = OSHMEM_ERROR;
if (hint & SHMEM_HINT_DEVICE_NIC_MEM) {
if (size > UINT_MAX) {
return OSHMEM_ERR_BAD_PARAM;
#if HAVE_DECL_UCS_MEMORY_TYPE_RDMA
status = segment_create_internal(ds_buf, NULL, size, flags,
yosefe marked this conversation as resolved.
Show resolved Hide resolved
UCS_MEMORY_TYPE_RDMA, 3);
if (status == OSHMEM_SUCCESS) {
ds_buf->alloc_hints = hint;
ds_buf->allocator = &sshmem_ucx_allocator;
return OSHMEM_SUCCESS;
}

void *dev_mem_address;
uct_ib_device_mem_h dev_mem = alloc_device_mem(spml, size,
&dev_mem_address);
if (dev_mem != NULL) {
int ret;
ret = segment_create_internal(ds_buf, dev_mem_address, size, 0,
hint, dev_mem);
if (ret == OSHMEM_SUCCESS) {
return OSHMEM_SUCCESS;
} else if (dev_mem != NULL) {
uct_ib_md_release_device_mem(dev_mem);
/* fallback to regular allocation */
}
}
}
#else
SSHMEM_VERBOSE(3, "DEVICE_NIC_MEM hint ignored since UCX does not "
"support MEMORY_TYPE_RDMA");
#endif
return OSHMEM_ERR_NOT_IMPLEMENTED;
}

flags = UCP_MEM_MAP_ALLOCATE | (spml->heap_reg_nb ? UCP_MEM_MAP_NONBLOCK : 0);
if (hint) {
return segment_create_internal(ds_buf, NULL, size, flags, hint, NULL);
} else {
return segment_create_internal(ds_buf, mca_sshmem_base_start_address,
size, flags | UCP_MEM_MAP_FIXED, hint,
NULL);
flags |= UCP_MEM_MAP_FIXED;
if (spml->heap_reg_nb) {
flags |= UCP_MEM_MAP_NONBLOCK;
}
return segment_create_internal(ds_buf, mca_sshmem_base_start_address, size,
flags, UCS_MEMORY_TYPE_HOST, 0);
}

static void *
Expand Down Expand Up @@ -303,12 +250,6 @@ segment_unlink(map_segment_t *ds_buf)

ucp_mem_unmap(spml->ucp_context, ctx->ucp_memh);

#if HAVE_UCX_DEVICE_MEM
if (ctx->dev_mem) {
uct_ib_md_release_device_mem(ctx->dev_mem);
}
#endif

ds_buf->context = NULL;
free(ctx);

Expand Down