diff --git a/src/mpi/errhan/errnames.txt b/src/mpi/errhan/errnames.txt index 06238483e23..bdeff8b5c5e 100644 --- a/src/mpi/errhan/errnames.txt +++ b/src/mpi/errhan/errnames.txt @@ -956,6 +956,7 @@ is too big (> MPIU_SHMW_GHND_SZ) **xpmem_release: xpmem_release failed **xpmem_remove: xpmem_remove failed **xpmem_segtree_init: xpmem_segtree_init failed +**xpmem_segtree_finalize: xpmem_segtree_finalize failed ## GPU related error messages **gpu_query_ptr: gpu_query_pointer_attr failed diff --git a/src/mpid/ch4/shm/ipc/src/ipc_p2p.h b/src/mpid/ch4/shm/ipc/src/ipc_p2p.h index 9cacfd2baa6..8b57c0db208 100644 --- a/src/mpid/ch4/shm/ipc/src/ipc_p2p.h +++ b/src/mpid/ch4/shm/ipc/src/ipc_p2p.h @@ -240,12 +240,14 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_IPCI_handle_lmt_recv(MPIDI_IPC_hdr * ipc_hdr, { void *src_buf = NULL; /* map */ - mpi_errno = MPIDI_XPMEM_ipc_handle_map(ipc_hdr->ipc_handle.xpmem, &src_buf); + mpi_errno = MPIDI_XPMEM_ipc_handle_map(&ipc_hdr->ipc_handle.xpmem, &src_buf); MPIR_ERR_CHECK(mpi_errno); /* copy */ mpi_errno = MPIDI_IPCI_copy_data(ipc_hdr, rreq, src_buf, src_data_sz); MPIR_ERR_CHECK(mpi_errno); - /* skip unmap */ + /* unmap */ + mpi_errno = MPIDI_XPMEM_ipc_handle_unmap(&ipc_hdr->ipc_handle.xpmem); + MPIR_ERR_CHECK(mpi_errno); } break; #endif diff --git a/src/mpid/ch4/shm/ipc/src/ipc_win.c b/src/mpid/ch4/shm/ipc/src/ipc_win.c index e7f5d2b9ea2..e771383f7a5 100644 --- a/src/mpid/ch4/shm/ipc/src/ipc_win.c +++ b/src/mpid/ch4/shm/ipc/src/ipc_win.c @@ -195,7 +195,7 @@ int MPIDI_IPC_mpi_win_create_hook(MPIR_Win * win) #ifdef MPIDI_CH4_SHM_ENABLE_XPMEM case MPIDI_IPCI_TYPE__XPMEM: mpi_errno = - MPIDI_XPMEM_ipc_handle_map(ipc_shared_table[i].ipc_handle.xpmem, + MPIDI_XPMEM_ipc_handle_map(&ipc_shared_table[i].ipc_handle.xpmem, &shared_table[i].shm_base_addr); MPIR_ERR_CHECK(mpi_errno); shared_table[i].mapped_type = 2; diff --git a/src/mpid/ch4/shm/ipc/xpmem/Makefile.mk b/src/mpid/ch4/shm/ipc/xpmem/Makefile.mk index 33978458e8a..b110d785bbe 100644 --- a/src/mpid/ch4/shm/ipc/xpmem/Makefile.mk +++ b/src/mpid/ch4/shm/ipc/xpmem/Makefile.mk @@ -7,11 +7,9 @@ noinst_HEADERS += src/mpid/ch4/shm/ipc/xpmem/xpmem_pre.h \ src/mpid/ch4/shm/ipc/xpmem/xpmem_post.h if BUILD_SHM_IPC_XPMEM -noinst_HEADERS += src/mpid/ch4/shm/ipc/xpmem/xpmem_seg.h \ - src/mpid/ch4/shm/ipc/xpmem/xpmem_types.h +noinst_HEADERS += src/mpid/ch4/shm/ipc/xpmem/xpmem_types.h mpi_core_sources += src/mpid/ch4/shm/ipc/xpmem/globals.c \ src/mpid/ch4/shm/ipc/xpmem/xpmem_init.c \ - src/mpid/ch4/shm/ipc/xpmem/xpmem_mem.c \ - src/mpid/ch4/shm/ipc/xpmem/xpmem_seg.c + src/mpid/ch4/shm/ipc/xpmem/xpmem_mem.c endif diff --git a/src/mpid/ch4/shm/ipc/xpmem/xpmem_init.c b/src/mpid/ch4/shm/ipc/xpmem/xpmem_init.c index b4c0120e44a..7409b45b3ff 100644 --- a/src/mpid/ch4/shm/ipc/xpmem/xpmem_init.c +++ b/src/mpid/ch4/shm/ipc/xpmem/xpmem_init.c @@ -6,9 +6,24 @@ #include "mpidimpl.h" #include "xpmem_post.h" #include "mpidu_init_shm.h" -#include "xpmem_seg.h" -static int xpmem_initialized = 0; +/* +=== BEGIN_MPI_T_CVAR_INFO_BLOCK === + +cvars: + - name : MPIR_CVAR_CH4_XPMEM_ENABLE + category : CH4 + type : int + default : 1 + class : none + verbosity : MPI_T_VERBOSITY_USER_BASIC + scope : MPI_T_SCOPE_ALL_EQ + description : >- + To manually disable XPMEM set to 0. The environment variable is valid only when the XPMEM + submodule is enabled. + +=== END_MPI_T_CVAR_INFO_BLOCK === +*/ int MPIDI_XPMEM_init_local(void) { @@ -75,7 +90,7 @@ int MPIDI_XPMEM_init_world(void) } MPIDU_Init_shm_barrier(); - xpmem_initialized = 1; + MPIDI_XPMEMI_global.initialized = true; fn_exit: MPIR_FUNC_EXIT; @@ -89,7 +104,7 @@ int MPIDI_XPMEM_init_world(void) * kernel module to be loaded at runtime. If XPMEM is not available, disable its use via the * special CVAR value. */ XPMEM_TRACE("init: xpmem_make failed. Disabling XPMEM support"); - MPIR_CVAR_CH4_XPMEM_ENABLE = 0; + MPIDI_XPMEMI_global.initialized = false; MPIR_CHKPMEM_REAP(); goto fn_exit; @@ -101,7 +116,7 @@ int MPIDI_XPMEM_mpi_finalize_hook(void) int i, ret = 0; MPIR_FUNC_ENTER; - if (MPIDI_XPMEMI_global.segid == -1 || !xpmem_initialized) { + if (MPIDI_XPMEMI_global.segid == -1 || !MPIDI_XPMEMI_global.initialized) { /* if XPMEM was disabled at runtime, return */ goto fn_exit; } @@ -109,7 +124,7 @@ int MPIDI_XPMEM_mpi_finalize_hook(void) for (i = 0; i < MPIR_Process.local_size; i++) { /* should be called before xpmem_release * MPIDI_XPMEMI_segtree_delete_all will call xpmem_detach */ - MPL_gavl_tree_destroy(MPIDI_XPMEMI_global.segmaps[i].segcache_ubuf); + MPIDI_XPMEMI_segtree_finalize(MPIDI_XPMEMI_global.segmaps[i].segcache_ubuf); if (MPIDI_XPMEMI_global.segmaps[i].apid != -1) { XPMEM_TRACE("finalize: release apid: node_rank %d, 0x%lx\n", i, (uint64_t) MPIDI_XPMEMI_global.segmaps[i].apid); @@ -126,7 +141,7 @@ int MPIDI_XPMEM_mpi_finalize_hook(void) /* success(0) or failure(-1) */ MPIR_ERR_CHKANDJUMP(ret == -1, mpi_errno, MPI_ERR_OTHER, "**xpmem_remove"); - xpmem_initialized = 0; + MPIDI_XPMEMI_global.initialized = false; fn_exit: MPIR_FUNC_EXIT; diff --git a/src/mpid/ch4/shm/ipc/xpmem/xpmem_mem.c b/src/mpid/ch4/shm/ipc/xpmem/xpmem_mem.c index 9acfdf95178..0a8bc91e990 100644 --- a/src/mpid/ch4/shm/ipc/xpmem/xpmem_mem.c +++ b/src/mpid/ch4/shm/ipc/xpmem/xpmem_mem.c @@ -2,29 +2,206 @@ * Copyright (C) by Argonne National Laboratory * See COPYRIGHT in top-level directory */ -#include "xpmem_seg.h" + +#include "mpidimpl.h" #include "xpmem_post.h" -int MPIDI_XPMEM_ipc_handle_map(MPIDI_XPMEM_ipc_handle_t handle, void **vaddr) +/* +=== BEGIN_MPI_T_CVAR_INFO_BLOCK === + +cvars: + - name : MPIR_CVAR_CH4_XPMEM_SEG_CACHE_ENABLE + category : CH4 + type : boolean + default : true + class : none + verbosity : MPI_T_VERBOSITY_USER_BASIC + scope : MPI_T_SCOPE_ALL_EQ + description : >- + Enable mapped segment cache on receiver side to avoid mapping overhead + per operation. + +=== END_MPI_T_CVAR_INFO_BLOCK === +*/ + +static MPIDI_XPMEMI_seg_t *seg_search(MPL_gavl_tree_t segcache, void *addr, uintptr_t size); +static void seg_insert(MPL_gavl_tree_t segcache, uintptr_t seg_low, uintptr_t seg_size, + void *att_vaddr); +static void seg_free(void *seg); + +/* Maps region into the local process memory. Will check and use cached + * region if available, or insert new entry into cache. + * It internally rounds down the low address and rounds up the size to + * ensure the cached segment is page aligned. Specific tree is given to + * differentiate different cache tree (e.g. user buffer tree used to cache + * user buffer, and XPMEM cooperative counter tree used to cache counter + * obj) + * + * Input parameters: + * - handle: handle for region to be mapped + * Output parameters: + * - vaddr: corresponding start address of the remote buffer in local + * virtual address space. */ +int MPIDI_XPMEM_ipc_handle_map(MPIDI_XPMEM_ipc_handle_t * handle, void **vaddr) { int mpi_errno = MPI_SUCCESS; - MPIR_FUNC_ENTER; - /* map the true data range, assuming no data outside true_lb/true_ub */ - void *addr = MPIR_get_contig_ptr(handle.addr, handle.true_lb); + void *addr = MPIR_get_contig_ptr(handle->addr, handle->true_lb); void *addr_out; - mpi_errno = - MPIDI_XPMEMI_seg_regist(handle.src_lrank, handle.range, addr, &addr_out, - MPIDI_XPMEMI_global.segmaps[handle.src_lrank].segcache_ubuf); + int node_rank = handle->src_lrank; + uintptr_t size = handle->range; + void *remote_vaddr = MPIR_get_contig_ptr(handle->addr, handle->true_lb); + MPIDI_XPMEMI_segmap_t *segmap = &MPIDI_XPMEMI_global.segmaps[node_rank]; + MPL_gavl_tree_t segcache = segmap->segcache_ubuf; + MPIDI_XPMEMI_seg_t *seg = NULL; + uintptr_t seg_low; + uintptr_t seg_size; + void *att_vaddr; + + MPIR_FUNC_ENTER; + + /* Get apid if it is the first time registered on the local process. */ + if (segmap->apid == -1) { + segmap->apid = xpmem_get(segmap->remote_segid, XPMEM_RDWR, XPMEM_PERMIT_MODE, + MPIDI_XPMEMI_PERMIT_VALUE); + /* 64-bit access permit ID or failure(-1) */ + MPIR_ERR_CHKANDJUMP(segmap->apid == -1, mpi_errno, MPI_ERR_OTHER, "**xpmem_get"); + XPMEM_TRACE("seg: register apid 0x%lx for node_rank %d, segid 0x%lx\n", + (uint64_t) segmap->apid, node_rank, (uint64_t) segmap->remote_segid); + } + + /* Search a cached segment or create a new one. Both low and size must be page aligned. */ + seg_low = MPL_ROUND_DOWN_ALIGN((uint64_t) remote_vaddr, + (uint64_t) MPIDI_XPMEMI_global.sys_page_sz); + seg_size = + MPL_ROUND_UP_ALIGN(size + ((uintptr_t) remote_vaddr - seg_low), + MPIDI_XPMEMI_global.sys_page_sz); + + seg = seg_search(segcache, remote_vaddr, size); + if (seg == NULL) { + struct xpmem_addr xpmem_addr; + xpmem_addr.apid = segmap->apid; + xpmem_addr.offset = seg_low; + att_vaddr = xpmem_attach(xpmem_addr, seg_size, NULL); + MPIR_ERR_CHKANDJUMP2(att_vaddr == (void *) -1, mpi_errno, MPI_ERR_OTHER, "**xpmem_attach", + "**xpmem_attach %p %d", remote_vaddr, (int) size); + seg_insert(segcache, seg_low, seg_size, att_vaddr); + } else { + seg_low = seg->remote_align_addr; + att_vaddr = (void *) seg->att_vaddr; + } + handle->att_vaddr = att_vaddr; - if (handle.is_contig) { + /* return mapped vaddr without round down */ + addr_out = (void *) ((uintptr_t) remote_vaddr - seg_low + att_vaddr); + XPMEM_TRACE("seg: mappped segment for node_rank %d, apid 0x%lx, " + "size 0x%lx->0x%lx, seg->low %p->0x%lx, attached_vaddr %p, vaddr %p\n", + node_rank, (uint64_t) segmap->apid, size, seg_size, + remote_vaddr, seg_low, (void *) att_vaddr, addr_out); + + if (handle->is_contig) { /* We'll do MPIR_Typerep_unpack */ *vaddr = addr_out; } else { /* We'll do MPIR_Localcopy */ - *vaddr = MPIR_get_contig_ptr(addr_out, -handle.true_lb); + *vaddr = MPIR_get_contig_ptr(addr_out, -handle->true_lb); + } + + fn_fail: + MPIR_FUNC_EXIT; + return mpi_errno; +} + +int MPIDI_XPMEM_ipc_handle_unmap(MPIDI_XPMEM_ipc_handle_t * handle) +{ + int mpi_errno = MPI_SUCCESS; + int ret; + + MPIR_FUNC_ENTER; + + /* skip unmap if cache enabled */ + if (MPIR_CVAR_CH4_XPMEM_SEG_CACHE_ENABLE) { + goto fn_exit; + } + + ret = xpmem_detach((void *) handle->att_vaddr); + MPIR_ERR_CHKANDJUMP(ret != 0, mpi_errno, MPI_ERR_OTHER, "**xpmem_detach"); + + fn_exit: + MPIR_FUNC_EXIT; + return mpi_errno; + fn_fail: + goto fn_exit; +} + +/*** segment cache routines ***/ + +/* Initialize an empty tree for segment cache. + * It should be called only once for a AVL tree at MPI init.*/ +int MPIDI_XPMEMI_segtree_init(MPL_gavl_tree_t * tree) +{ + int mpi_errno = MPI_SUCCESS, ret; + MPIR_FUNC_ENTER; + + if (MPIR_CVAR_CH4_XPMEM_SEG_CACHE_ENABLE) { + ret = MPL_gavl_tree_create(seg_free, tree); + MPIR_ERR_CHKANDJUMP(ret != MPL_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**xpmem_segtree_init"); + } + + fn_exit: + MPIR_FUNC_EXIT; + return mpi_errno; + fn_fail: + goto fn_exit; +} + +int MPIDI_XPMEMI_segtree_finalize(MPL_gavl_tree_t tree) +{ + int mpi_errno = MPI_SUCCESS, ret; + MPIR_FUNC_ENTER; + + if (MPIR_CVAR_CH4_XPMEM_SEG_CACHE_ENABLE) { + ret = MPL_gavl_tree_destroy(tree); + MPIR_ERR_CHKANDJUMP(ret != MPL_SUCCESS, mpi_errno, MPI_ERR_OTHER, + "**xpmem_segtree_finalize"); } + fn_exit: MPIR_FUNC_EXIT; return mpi_errno; + fn_fail: + goto fn_exit; +} + +static MPIDI_XPMEMI_seg_t *seg_search(MPL_gavl_tree_t segcache, void *addr, uintptr_t size) +{ + if (MPIR_CVAR_CH4_XPMEM_SEG_CACHE_ENABLE) { + return MPL_gavl_tree_search(segcache, addr, size); + } + + return NULL; +} + +static void seg_insert(MPL_gavl_tree_t segcache, uintptr_t seg_low, uintptr_t seg_size, + void *att_vaddr) +{ + if (MPIR_CVAR_CH4_XPMEM_SEG_CACHE_ENABLE) { + MPIDI_XPMEMI_seg_t *seg = MPL_malloc(sizeof(MPIDI_XPMEMI_seg_t), MPL_MEM_OTHER); + MPIR_Assert(seg != NULL); + seg->remote_align_addr = seg_low; + seg->att_vaddr = (uintptr_t) att_vaddr; + MPL_gavl_tree_insert(segcache, (void *) seg_low, seg_size, (void *) seg); + } +} + +static void seg_free(void *seg) +{ + MPIDI_XPMEMI_seg_t *seg_ptr = (MPIDI_XPMEMI_seg_t *) seg; + MPIR_FUNC_ENTER; + + xpmem_detach((void *) seg_ptr->att_vaddr); + MPL_free(seg); + + MPIR_FUNC_EXIT; + return; } diff --git a/src/mpid/ch4/shm/ipc/xpmem/xpmem_post.h b/src/mpid/ch4/shm/ipc/xpmem/xpmem_post.h index 3f9230913a3..f133b776e94 100644 --- a/src/mpid/ch4/shm/ipc/xpmem/xpmem_post.h +++ b/src/mpid/ch4/shm/ipc/xpmem/xpmem_post.h @@ -5,24 +5,15 @@ #ifndef XPMEM_POST_H_INCLUDED #define XPMEM_POST_H_INCLUDED +#ifdef MPIDI_CH4_SHM_ENABLE_XPMEM #include "ch4_impl.h" #include "ipc_types.h" +#include "xpmem_types.h" /* === BEGIN_MPI_T_CVAR_INFO_BLOCK === cvars: - - name : MPIR_CVAR_CH4_XPMEM_ENABLE - category : CH4 - type : int - default : 1 - class : none - verbosity : MPI_T_VERBOSITY_USER_BASIC - scope : MPI_T_SCOPE_ALL_EQ - description : >- - To manually disable XPMEM set to 0. The environment variable is valid only when the XPMEM - submodule is enabled. - - name : MPIR_CVAR_CH4_IPC_XPMEM_P2P_THRESHOLD category : CH4 type : int @@ -38,7 +29,6 @@ === END_MPI_T_CVAR_INFO_BLOCK === */ -#ifdef MPIDI_CH4_SHM_ENABLE_XPMEM MPL_STATIC_INLINE_PREFIX int MPIDI_XPMEM_get_ipc_attr(const void *buf, MPI_Aint count, MPI_Datatype datatype, MPIDI_IPCI_ipc_attr_t * ipc_attr) @@ -52,7 +42,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_XPMEM_get_ipc_attr(const void *buf, MPI_Aint int dt_contig; MPIDI_Datatype_get_info(count, datatype, dt_contig, data_sz, dt_ptr, true_lb); - if (!MPIR_CVAR_CH4_XPMEM_ENABLE || buf == MPI_BOTTOM || + if (!MPIDI_XPMEMI_global.initialized || buf == MPI_BOTTOM || data_sz < MPIR_CVAR_CH4_IPC_XPMEM_P2P_THRESHOLD) { goto fn_exit; } else { @@ -99,7 +89,8 @@ MPL_STATIC_INLINE_PREFIX void MPIDI_XPMEM_fill_ipc_handle(MPIDI_IPCI_ipc_attr_t int MPIDI_XPMEM_init_local(void); int MPIDI_XPMEM_init_world(void); int MPIDI_XPMEM_mpi_finalize_hook(void); -int MPIDI_XPMEM_ipc_handle_map(MPIDI_XPMEM_ipc_handle_t mem_handle, void **vaddr); +int MPIDI_XPMEM_ipc_handle_map(MPIDI_XPMEM_ipc_handle_t * mem_handle, void **vaddr); +int MPIDI_XPMEM_ipc_handle_unmap(MPIDI_XPMEM_ipc_handle_t * handle); #endif #endif /* XPMEM_POST_H_INCLUDED */ diff --git a/src/mpid/ch4/shm/ipc/xpmem/xpmem_pre.h b/src/mpid/ch4/shm/ipc/xpmem/xpmem_pre.h index b30b2081d71..44ca6af0548 100644 --- a/src/mpid/ch4/shm/ipc/xpmem/xpmem_pre.h +++ b/src/mpid/ch4/shm/ipc/xpmem/xpmem_pre.h @@ -12,6 +12,7 @@ typedef struct { int is_contig; const void *addr; MPI_Aint true_lb, range; + const void *att_vaddr; } MPIDI_XPMEM_ipc_handle_t; /* local struct used for query and preparing memory handle. diff --git a/src/mpid/ch4/shm/ipc/xpmem/xpmem_seg.c b/src/mpid/ch4/shm/ipc/xpmem/xpmem_seg.c deleted file mode 100644 index f688b335ea6..00000000000 --- a/src/mpid/ch4/shm/ipc/xpmem/xpmem_seg.c +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright (C) by Argonne National Laboratory - * See COPYRIGHT in top-level directory - */ - -#include "mpidimpl.h" -#include "xpmem_pre.h" -#include "xpmem_seg.h" - -/****************************************/ -/* Segment cache public routines */ -/****************************************/ -/* Initialize an empty tree for segment cache. - * It should be called only once for a AVL tree at MPI init.*/ -int MPIDI_XPMEMI_segtree_init(MPL_gavl_tree_t * tree) -{ - int mpi_errno = MPI_SUCCESS, ret; - MPIR_FUNC_ENTER; - - ret = MPL_gavl_tree_create(MPIDI_XPMEM_seg_free, tree); - MPIR_ERR_CHKANDJUMP(ret != MPL_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**xpmem_segtree_init"); - - fn_exit: - MPIR_FUNC_EXIT; - return mpi_errno; - fn_fail: - goto fn_exit; -} - -/* Registers a segment into cache for the specified remote buffer. - * It internally rounds down the low address and rounds up the size to - * ensure the cached segment is page aligned. Specific tree is given to - * differentiate different cache tree (e.g. user buffer tree used to cache - * user buffer, and XPMEM cooperative counter tree used to cache counter - * obj) - * - * Input parameters: - * - node_rank: rank of remote process on local node. - * - size: size in bytes of the remote buffer. - * - remote_vaddr: start virtual address of the remote buffer - * - segcache: specific tree we want to insert segment into - * Output parameters: - * - seg_ptr: registered segment. It can be a matched existing segment - * or a newly created one. - * - vaddr: corresponding start address of the remote buffer in local - * virtual address space. */ -int MPIDI_XPMEMI_seg_regist(int node_rank, uintptr_t size, - void *remote_vaddr, void **vaddr, MPL_gavl_tree_t segcache) -{ - int mpi_errno = MPI_SUCCESS, mpl_err; - MPIDI_XPMEMI_segmap_t *segmap = &MPIDI_XPMEMI_global.segmaps[node_rank]; - MPIDI_XPMEMI_seg_t *seg = NULL; - uintptr_t seg_low; - uintptr_t seg_size; - MPIR_FUNC_ENTER; - /* Get apid if it is the first time registered on the local process. */ - if (segmap->apid == -1) { - segmap->apid = xpmem_get(segmap->remote_segid, XPMEM_RDWR, XPMEM_PERMIT_MODE, - MPIDI_XPMEMI_PERMIT_VALUE); - /* 64-bit access permit ID or failure(-1) */ - MPIR_ERR_CHKANDJUMP(segmap->apid == -1, mpi_errno, MPI_ERR_OTHER, "**xpmem_get"); - XPMEM_TRACE("seg: register apid 0x%lx for node_rank %d, segid 0x%lx\n", - (uint64_t) segmap->apid, node_rank, (uint64_t) segmap->remote_segid); - } - - /* Search a cached segment or create a new one. Both low and size must be page aligned. */ - seg_low = MPL_ROUND_DOWN_ALIGN((uint64_t) remote_vaddr, - (uint64_t) MPIDI_XPMEMI_global.sys_page_sz); - seg_size = - MPL_ROUND_UP_ALIGN(size + ((uintptr_t) remote_vaddr - seg_low), - MPIDI_XPMEMI_global.sys_page_sz); - - seg = MPL_gavl_tree_search(segcache, remote_vaddr, size); - if (seg == NULL) { - struct xpmem_addr xpmem_addr; - void *att_vaddr; - - seg = (MPIDI_XPMEMI_seg_t *) MPL_malloc(sizeof(MPIDI_XPMEMI_seg_t), MPL_MEM_OTHER); - MPIR_Assert(seg != NULL); - - xpmem_addr.apid = segmap->apid; - xpmem_addr.offset = seg_low; - att_vaddr = xpmem_attach(xpmem_addr, seg_size, NULL); - MPIR_ERR_CHKANDJUMP2(att_vaddr == (void *) -1, mpi_errno, MPI_ERR_OTHER, "**xpmem_attach", - "**xpmem_attach %p %d", remote_vaddr, (int) size); - seg->remote_align_addr = seg_low; - seg->att_vaddr = (uintptr_t) att_vaddr; - MPL_gavl_tree_insert(segcache, (void *) seg_low, seg_size, (void *) seg); - } - - /* return mapped vaddr without round down */ - *vaddr = (void *) ((uintptr_t) remote_vaddr - seg->remote_align_addr + seg->att_vaddr); - XPMEM_TRACE("seg: register segment %p for node_rank %d, apid 0x%lx, " - "size 0x%lx->0x%lx, seg->low %p->0x%lx, attached_vaddr %p, vaddr %p\n", seg, - node_rank, (uint64_t) segmap->apid, size, seg_size, - remote_vaddr, seg_low, (void *) seg->att_vaddr, *vaddr); - fn_exit: - MPIR_FUNC_EXIT; - return mpi_errno; - fn_fail: - goto fn_exit; -} - -void MPIDI_XPMEM_seg_free(void *seg) -{ - MPIDI_XPMEMI_seg_t *seg_ptr = (MPIDI_XPMEMI_seg_t *) seg; - MPIR_FUNC_ENTER; - - xpmem_detach((void *) seg_ptr->att_vaddr); - MPL_free(seg); - - MPIR_FUNC_EXIT; - return; -} diff --git a/src/mpid/ch4/shm/ipc/xpmem/xpmem_seg.h b/src/mpid/ch4/shm/ipc/xpmem/xpmem_seg.h deleted file mode 100644 index 52c1b33593a..00000000000 --- a/src/mpid/ch4/shm/ipc/xpmem/xpmem_seg.h +++ /dev/null @@ -1,16 +0,0 @@ -/* - * Copyright (C) by Argonne National Laboratory - * See COPYRIGHT in top-level directory - */ - -#ifndef XPMEM_SEG_H_INCLUDED -#define XPMEM_SEG_H_INCLUDED - -#include "xpmem_types.h" - -int MPIDI_XPMEMI_segtree_init(MPL_gavl_tree_t * tree); -int MPIDI_XPMEMI_seg_regist(int node_rank, uintptr_t size, - void *remote_vaddr, void **vaddr, MPL_gavl_tree_t segcache); -void MPIDI_XPMEM_seg_free(void *seg); - -#endif /* XPMEM_SEG_H_INCLUDED */ diff --git a/src/mpid/ch4/shm/ipc/xpmem/xpmem_types.h b/src/mpid/ch4/shm/ipc/xpmem/xpmem_types.h index af8231f2254..2d8a424012f 100644 --- a/src/mpid/ch4/shm/ipc/xpmem/xpmem_types.h +++ b/src/mpid/ch4/shm/ipc/xpmem/xpmem_types.h @@ -26,6 +26,7 @@ typedef struct { xpmem_segid_t segid; /* my local segid associated with entire address space */ MPIDI_XPMEMI_segmap_t *segmaps; /* remote seg info for every local processes. */ size_t sys_page_sz; + bool initialized; } MPIDI_XPMEMI_global_t; extern MPIDI_XPMEMI_global_t MPIDI_XPMEMI_global; @@ -36,4 +37,7 @@ extern MPL_dbg_class MPIDI_XPMEMI_DBG_GENERAL; #define XPMEM_TRACE(...) \ MPL_DBG_MSG_FMT(MPIDI_XPMEMI_DBG_GENERAL,VERBOSE,(MPL_DBG_FDEST, "XPMEM "__VA_ARGS__)) +int MPIDI_XPMEMI_segtree_init(MPL_gavl_tree_t * tree); +int MPIDI_XPMEMI_segtree_finalize(MPL_gavl_tree_t tree); + #endif /* XPMEM_TYPES_H_INCLUDED */