Skip to content

Commit 48df263

Browse files
authored
DAOS-16257 mercury: Add flush before ep close. (#126)
Signed-off-by: Joseph Moore <joseph.moore@hpe.com>
1 parent d00d04f commit 48df263

File tree

3 files changed

+74
-1
lines changed

3 files changed

+74
-1
lines changed

debian/changelog

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
mercury (2.4.0-3) unstable; urgency=medium
2+
[ Joseph Moore ]
3+
* Add patch to na_ucx.c to flush ep prior to close
4+
5+
-- Joseph Moore <joseph.moore@hpe.com> Wed, 15 Jan 2025 10:00:00 -0600
16

27
mercury (2.4.0-2) unstable; urgency=medium
38
[ Joseph Moore ]

mercury.spec

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Name: mercury
22
Version: 2.4.0
3-
Release: 2%{?dist}
3+
Release: 3%{?dist}
44

55
# --without ucx build switch
66
%bcond_without ucx
@@ -16,6 +16,7 @@ Group: Development/Libraries
1616
URL: http://mercury-hpc.github.io/
1717
Source0: https://github.com/mercury-hpc/%{name}/releases/download/v%{dl_version}/%{name}-%{dl_version}.tar.bz2
1818
Patch0: na_ucx.patch
19+
Patch1: na_ucx_ep_flush.patch
1920

2021
BuildRequires: libfabric-devel >= 1.20
2122
BuildRequires: cmake
@@ -131,6 +132,9 @@ Mercury plugin to support the UCX transport.
131132
%{_libdir}/cmake/
132133

133134
%changelog
135+
* Wed Jan 15 2025 Joseph Moore <joseph.moore@hpe.com> - 2.4.0-3
136+
- Add patch to na_ucx.c to flush end point prior to close.
137+
134138
* Tue Jan 07 2025 Joseph Moore <joseph.moore@hpe.com> - 2.4.0-2
135139
- Enable debug RPMs for Leap sub-packages.
136140

na_ucx_ep_flush.patch

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
diff --git a/src/na/na_ucx.c b/src/na/na_ucx.c
2+
index 6e9c3b0..2f157da 100644
3+
--- a/src/na/na_ucx.c
4+
+++ b/src/na/na_ucx.c
5+
@@ -441,6 +441,12 @@ na_ucp_ep_create(ucp_worker_h worker, ucp_ep_params_t *ep_params,
6+
static void
7+
na_ucp_ep_error_cb(void *arg, ucp_ep_h ep, ucs_status_t status);
8+
9+
+/**
10+
+ * Flush endpoint.
11+
+ */
12+
+static ucs_status_ptr_t
13+
+na_ucp_ep_flush(ucp_ep_h ep);
14+
+
15+
/**
16+
* Close endpoint.
17+
*/
18+
@@ -1940,6 +1946,21 @@ na_ucp_ep_error_cb(
19+
na_ucx_addr_ref_decr(na_ucx_addr);
20+
}
21+
22+
+/*---------------------------------------------------------------------------*/
23+
+static ucs_status_ptr_t
24+
+na_ucp_ep_flush(ucp_ep_h ep)
25+
+{
26+
+ const ucp_request_param_t flush_params = {
27+
+ .op_attr_mask = 0};
28+
+ ucs_status_ptr_t status_ptr = ucp_ep_flush_nbx(ep, &flush_params);
29+
+
30+
+ NA_CHECK_SUBSYS_ERROR_DONE(addr,
31+
+ status_ptr != NULL && UCS_PTR_IS_ERR(status_ptr),
32+
+ "ucp_ep_flush_nb() failed (%s)",
33+
+ ucs_status_string(UCS_PTR_STATUS(status_ptr)));
34+
+ return status_ptr;
35+
+}
36+
+
37+
/*---------------------------------------------------------------------------*/
38+
static void
39+
na_ucp_ep_close(ucp_ep_h ep)
40+
@@ -2859,8 +2880,23 @@ na_ucx_addr_release(struct na_ucx_addr *na_ucx_addr)
41+
if (na_ucx_addr->ucp_ep != NULL) {
42+
/* NB. for deserialized addresses that are not "connected" addresses, do
43+
* not close the EP */
44+
- if (na_ucx_addr->worker_addr == NULL)
45+
+ if (na_ucx_addr->worker_addr == NULL) {
46+
+ if (!na_ucx_addr->na_ucx_class->ucp_listener) {
47+
+ ucs_status_ptr_t status_ptr = na_ucp_ep_flush(na_ucx_addr->ucp_ep);
48+
+
49+
+ if (UCS_PTR_IS_PTR(status_ptr)) {
50+
+ ucs_status_t status;
51+
+
52+
+ do {
53+
+ ucp_worker_progress(na_ucx_addr->na_ucx_class->ucp_worker);
54+
+ status = ucp_request_check_status(status_ptr);
55+
+ } while (status == UCS_INPROGRESS);
56+
+ ucp_request_free(status_ptr);
57+
+ }
58+
+ }
59+
+
60+
na_ucp_ep_close(na_ucx_addr->ucp_ep);
61+
+ }
62+
na_ucx_addr->ucp_ep = NULL;
63+
}
64+

0 commit comments

Comments
 (0)