From 55fc7b173034a357b3d6ac49ccd7540255048fe8 Mon Sep 17 00:00:00 2001 From: Jan Michalski Date: Fri, 7 Nov 2025 13:57:57 +0000 Subject: [PATCH 001/253] DAOS-17914 dlck: check pools' metadata (#16999) DLCK is a standalone utility so the user will have daos_server stopped (or crashed) and would like to check the pool for errors. Possibly because of already observed malfunctions. Hence, DLCK works in an environment in which any operation can crash it without warning. But its implemented in such a way to make sure it won't crash no matter what. The PR introduces the first DLCK command: check. It's purpose is to validate pool's metadata down to the container's tree. It reports the progress of the sub-checks into dedicated log files and the final results or critical errors back to the user by printing it on stdout. Signed-off-by: Jan Michalski --- src/bio/bio_context.c | 5 + src/bio/bio_recovery.c | 4 + src/common/btree.c | 319 +++++++++++++++--- src/common/mem.c | 5 + src/gurt/fault_inject.c | 3 + src/include/daos/btree.h | 9 + src/include/daos/common.h | 21 ++ src/include/daos_srv/checker.h | 260 ++++++++++++++ src/include/daos_srv/dlck.h | 12 - src/include/daos_srv/vos.h | 3 +- src/pool/srv_target.c | 2 +- src/utils/dlck/SConscript | 3 + src/utils/dlck/cmds/dlck_cmd_check.c | 221 ++++++++++++ src/utils/dlck/dlck_abt.c | 35 -- src/utils/dlck/dlck_args.h | 67 ++-- src/utils/dlck/dlck_args_common.c | 60 ++-- src/utils/dlck/dlck_args_files.c | 14 +- src/utils/dlck/dlck_args_parse.c | 26 +- src/utils/dlck/dlck_bitmap.h | 55 +++ src/utils/dlck/dlck_checker.c | 242 +++++++++++++ src/utils/dlck/dlck_checker.h | 115 +++++++ src/utils/dlck/dlck_cmds.h | 28 +- src/utils/dlck/dlck_engine.c | 250 +++++++++----- src/utils/dlck/dlck_engine.h | 131 ++++--- src/utils/dlck/dlck_main.c | 64 +++- src/utils/dlck/dlck_pool.c | 103 +++++- src/utils/dlck/dlck_pool.h | 34 +- src/utils/dlck/dlck_report.c | 61 ++++ src/utils/dlck/dlck_report.h | 25 ++ src/utils/dlck/tests/dlck_test_helper.c | 50 ++- .../dlck/tests/fault_injection_dlck.yaml | 33 ++ src/vos/vos_gc.c | 79 ++++- src/vos/vos_internal.h | 2 +- src/vos/vos_layout.h | 4 +- src/vos/vos_pool.c | 82 +++-- 35 files changed, 2061 insertions(+), 366 deletions(-) create mode 100644 src/include/daos_srv/checker.h delete mode 100644 src/include/daos_srv/dlck.h create mode 100644 src/utils/dlck/cmds/dlck_cmd_check.c create mode 100644 src/utils/dlck/dlck_bitmap.h create mode 100644 src/utils/dlck/dlck_checker.c create mode 100644 src/utils/dlck/dlck_checker.h create mode 100644 src/utils/dlck/dlck_report.c create mode 100644 src/utils/dlck/dlck_report.h create mode 100644 src/utils/dlck/tests/fault_injection_dlck.yaml diff --git a/src/bio/bio_context.c b/src/bio/bio_context.c index be015df2959..989e20e9501 100644 --- a/src/bio/bio_context.c +++ b/src/bio/bio_context.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2018-2025 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -904,6 +905,10 @@ int bio_mc_open(struct bio_xs_context *xs_ctxt, uuid_t pool_id, D_ASSERT(xs_ctxt != NULL); + if (DAOS_FAIL_CHECK(DAOS_FAULT_POOL_OPEN_BIO)) { /** fault injection */ + return daos_errno2der(daos_fail_value_get()); + } + *mc = NULL; if (!bio_nvme_configured(SMD_DEV_TYPE_META)) { /* No data blob for RDB */ diff --git a/src/bio/bio_recovery.c b/src/bio/bio_recovery.c index bcdc24830ec..4c2506aee23 100644 --- a/src/bio/bio_recovery.c +++ b/src/bio/bio_recovery.c @@ -514,6 +514,10 @@ bio_xsctxt_health_check(struct bio_xs_context *xs_ctxt, bool log_err, bool updat if (xs_ctxt == NULL) return 0; + if (DAOS_FAIL_CHECK(DAOS_FAULT_POOL_NVME_HEALTH)) { /** fault injection */ + return daos_errno2der(daos_fail_value_get()); + } + for (st = SMD_DEV_TYPE_DATA; st < SMD_DEV_TYPE_MAX; st++) { bxb = xs_ctxt->bxc_xs_blobstores[st]; diff --git a/src/common/btree.c b/src/common/btree.c index 128fd0e2092..7ef46cb1d39 100644 --- a/src/common/btree.c +++ b/src/common/btree.c @@ -167,11 +167,10 @@ struct btr_context { /** size of print buffer */ #define BTR_PRINT_BUF 128 -static int btr_class_init(umem_off_t root_off, - struct btr_root *root, unsigned int tree_class, - uint64_t *tree_feats, struct umem_attr *uma, - daos_handle_t coh, void *priv, - struct btr_instance *tins); +static int +btr_class_init(umem_off_t root_off, struct btr_root *root, unsigned int tree_class, + uint64_t *tree_feats, struct umem_attr *uma, daos_handle_t coh, void *priv, + btr_report_fn_t report_fn, void *report_arg, struct btr_instance *tins); static struct btr_record *btr_node_rec_at(struct btr_context *tcx, umem_off_t nd_off, unsigned int at); @@ -319,6 +318,11 @@ btr_ops(struct btr_context *tcx) return tcx->tc_tins.ti_ops; } +static inline void +report_fn_nop(void *arg, enum btr_report_type type, const char *fmt, ...) +{ +} + /** * Create a btree context (in volatile memory). * @@ -347,8 +351,8 @@ btr_context_create(umem_off_t root_off, struct btr_root *root, return -DER_NOMEM; tcx->tc_ref = 1; /* for the caller */ - rc = btr_class_init(root_off, root, tree_class, &tree_feats, uma, - coh, priv, &tcx->tc_tins); + rc = btr_class_init(root_off, root, tree_class, &tree_feats, uma, coh, priv, report_fn_nop, + NULL, &tcx->tc_tins); if (rc != 0) { D_ERROR("Failed to setup mem class %d: "DF_RC"\n", uma->uma_id, DP_RC(rc)); @@ -4446,49 +4450,22 @@ dbtree_iterate(daos_handle_t toh, uint32_t intent, bool backward, static struct btr_class btr_class_registered[BTR_TYPE_MAX]; /** - * Initialize a tree instance from a registered tree class. + * Calculate tree's features. + * + * \param[in] tree_class Tree's class identified. + * \param[in,out] tree_feats Tree's features. + * \param[in] tc Tree's class. + * + * \retval -DER_PROTO Unsupported features + * \retval DER_SUCCESS Success */ static int -btr_class_init(umem_off_t root_off, struct btr_root *root, - unsigned int tree_class, uint64_t *tree_feats, - struct umem_attr *uma, daos_handle_t coh, void *priv, - struct btr_instance *tins) +btr_class_feats_init(unsigned int tree_class, uint64_t *tree_feats, struct btr_class *tc) { - struct btr_class *tc; - uint64_t special_feat; - int rc; + uint64_t special_feat; - memset(tins, 0, sizeof(*tins)); - rc = umem_class_init(uma, &tins->ti_umm); - if (rc != 0) - return rc; - - tins->ti_priv = priv; - tins->ti_coh = coh; - tins->ti_root_off = UMOFF_NULL; - - if (!UMOFF_IS_NULL(root_off)) { - tins->ti_root_off = root_off; - if (root == NULL) - root = umem_off2ptr(&tins->ti_umm, root_off); - } - tins->ti_root = root; - - if (root != NULL && root->tr_class != 0) { - tree_class = root->tr_class; - *tree_feats = root->tr_feats; - } - - /* XXX should be multi-thread safe */ - if (tree_class >= BTR_TYPE_MAX) { - D_DEBUG(DB_TRACE, "Invalid class id: %d\n", tree_class); - return -DER_INVAL; - } - - tc = &btr_class_registered[tree_class]; - if (tc->tc_ops == NULL) { - D_DEBUG(DB_TRACE, "Unregistered class id %d\n", tree_class); - return -DER_NONEXIST; + if (DAOS_FAIL_CHECK(DAOS_FAULT_BTREE_FEATURES)) { /** fault injection */ + return -DER_PROTO; } /* If no hkey callbacks are supplied, only special key types are @@ -4522,11 +4499,77 @@ btr_class_init(umem_off_t root_off, struct btr_root *root, /** Only check btree managed bits that can be set in tr_class */ if ((*tree_feats & tc->tc_feats) != (*tree_feats & BTR_EXT_FEAT_MASK)) { - D_ERROR("Unsupported features "DF_X64"/"DF_X64"\n", - *tree_feats, tc->tc_feats); return -DER_PROTO; } + return DER_SUCCESS; +} + +#define TREE_CLASS_STR "Tree class... " +#define TREE_FEATURES_STR "Tree features... " +#define INVALID_CLASS_FMT "Invalid class id: %d\n" +#define UNREGISTERED_CLASS_FMT "Unregistered class id %d\n" +#define UNSUPPORTED_FEATURES_FMT "Unsupported features " DF_X64 "/" DF_X64 "\n" +#define OK_STR "ok.\n" + +/** + * Initialize a tree instance from a registered tree class. + */ +static int +btr_class_init(umem_off_t root_off, struct btr_root *root, unsigned int tree_class, + uint64_t *tree_feats, struct umem_attr *uma, daos_handle_t coh, void *priv, + btr_report_fn_t report_fn, void *report_arg, struct btr_instance *tins) +{ + struct btr_class *tc; + int rc; + + memset(tins, 0, sizeof(*tins)); + rc = umem_class_init(uma, &tins->ti_umm); + if (rc != 0) + return rc; + + tins->ti_priv = priv; + tins->ti_coh = coh; + tins->ti_root_off = UMOFF_NULL; + + if (!UMOFF_IS_NULL(root_off)) { + tins->ti_root_off = root_off; + if (root == NULL) + root = umem_off2ptr(&tins->ti_umm, root_off); + } + tins->ti_root = root; + + if (root != NULL && root->tr_class != 0) { + tree_class = root->tr_class; + *tree_feats = root->tr_feats; + } + + /* XXX should be multi-thread safe */ + if (tree_class >= BTR_TYPE_MAX || DAOS_FAIL_CHECK(DAOS_FAULT_BTREE_OPEN_INV_CLASS)) { + report_fn(report_arg, BTR_REPORT_ERROR, TREE_CLASS_STR INVALID_CLASS_FMT, + tree_class); + D_DEBUG(DB_TRACE, INVALID_CLASS_FMT, tree_class); + return -DER_INVAL; + } + + tc = &btr_class_registered[tree_class]; + if (tc->tc_ops == NULL || DAOS_FAIL_CHECK(DAOS_FAULT_BTREE_OPEN_UNREG_CLASS)) { + report_fn(report_arg, BTR_REPORT_ERROR, TREE_CLASS_STR UNREGISTERED_CLASS_FMT, + tree_class); + D_DEBUG(DB_TRACE, UNREGISTERED_CLASS_FMT, tree_class); + return -DER_NONEXIST; + } + report_fn(report_arg, BTR_REPORT_MSG, TREE_CLASS_STR OK_STR); + + rc = btr_class_feats_init(tree_class, tree_feats, tc); + if (rc != DER_SUCCESS) { + report_fn(report_arg, BTR_REPORT_ERROR, TREE_FEATURES_STR UNSUPPORTED_FEATURES_FMT, + *tree_feats, tc->tc_feats); + D_ERROR(UNSUPPORTED_FEATURES_FMT, *tree_feats, tc->tc_feats); + return rc; + } + report_fn(report_arg, BTR_REPORT_MSG, TREE_FEATURES_STR OK_STR); + tins->ti_ops = tc->tc_ops; return rc; } @@ -4640,3 +4683,183 @@ dbtree_overhead_get(int alloc_overhead, unsigned int tclass, uint64_t otype, return 0; } +#define CK_BTREE_NODE_FMT "Node (off=%#lx)... " +#define CK_BTREE_NODE_MALFORMED_STR "malformed - " +#define CK_BTREE_NON_ZERO_PADDING_FMT CK_BTREE_NODE_MALFORMED_STR "tn_pad_32 != 0 (%#" PRIx32 ")" +#define CK_BTREE_NON_ZERO_GEN_FMT CK_BTREE_NODE_MALFORMED_STR "tn_gen != 0 (%#" PRIx32 ")" + +/** + * Validate the integrity of the btree node. + * + * \param[in] nd Node to check. + * \param[in] nd_off Node's offset. + * \param[in] ck Checker. + * + * \retval DER_SUCCESS The node is correct. + * \retval -DER_NOTYPE The node is malformed. + */ +static int +btr_node_check(struct btr_node *nd, umem_off_t nd_off, btr_report_fn_t report_fn, void *report_arg, + bool error_on_non_zero_padding) +{ + uint16_t unknown_flags; + + D_ASSERT(report_fn != NULL); + + unknown_flags = nd->tn_flags & ~(BTR_NODE_LEAF | BTR_NODE_ROOT); + if (unknown_flags != 0) { + report_fn(report_arg, BTR_REPORT_ERROR, + CK_BTREE_NODE_MALFORMED_STR "unknown flags (%#" PRIx16 ")", + unknown_flags); + return -DER_NOTYPE; + } + + if (nd->tn_pad_32 != 0) { + if (error_on_non_zero_padding) { + report_fn(report_arg, BTR_REPORT_ERROR, + CK_BTREE_NODE_FMT CK_BTREE_NON_ZERO_PADDING_FMT, nd_off, + nd->tn_pad_32); + return -DER_NOTYPE; + } else { + report_fn(report_arg, BTR_REPORT_WARNING, + CK_BTREE_NODE_FMT CK_BTREE_NON_ZERO_PADDING_FMT, nd_off, + nd->tn_pad_32); + } + } + + if (nd->tn_gen != 0) { + if (error_on_non_zero_padding) { + report_fn(report_arg, BTR_REPORT_ERROR, + CK_BTREE_NODE_FMT CK_BTREE_NON_ZERO_GEN_FMT, nd_off, nd->tn_gen); + return -DER_NOTYPE; + } else { + report_fn(report_arg, BTR_REPORT_WARNING, + CK_BTREE_NODE_FMT CK_BTREE_NON_ZERO_GEN_FMT, nd_off, nd->tn_gen); + } + } + + report_fn(report_arg, BTR_REPORT_MSG, CK_BTREE_NODE_FMT OK_STR, nd_off); + + return DER_SUCCESS; +} + +/** + * \struct node_info + * + * List of node offsets. + */ +struct node_info { + d_list_t link; + umem_off_t nd_off; +}; + +/** + * Validate the integrity of a btree. + * + * \param[in] tcx Btree context. + * \param[in] ck Checker. + * + * \retval DER_SUCCESS The tree is correct. + * \retval -DER_NOTYPE The tree is malformed. + * \retval -DER_NONEXIST The tree is malformed. + * \retval -DER_* Possibly other errors. + */ +static int +btr_nodes_check(struct btr_context *tcx, btr_report_fn_t report_fn, void *report_arg, + bool error_on_non_zero_padding) +{ + D_LIST_HEAD(node_list); + struct node_info *ni; + struct node_info *ni_tmp; + umem_off_t nd_off; + struct btr_node *nd; + int rc = DER_SUCCESS; + + D_ASSERT(report_fn != NULL); + + if (btr_root_empty(tcx)) { + report_fn(report_arg, BTR_REPORT_MSG, "Empty tree\n"); + return DER_SUCCESS; + } + + D_ASSERT(!btr_has_embedded_value(tcx)); + + /** add the root node to the node list */ + D_ALLOC_PTR(ni); + ni->nd_off = tcx->tc_tins.ti_root->tr_node; + d_list_add_tail(&ni->link, &node_list); + + /** process the node list */ + while (!d_list_empty(&node_list)) { + ni = d_list_pop_entry(&node_list, struct node_info, link); + nd_off = ni->nd_off; + nd = btr_off2ptr(tcx, nd_off); + + /** check the node */ + rc = btr_node_check(nd, nd_off, report_fn, report_arg, error_on_non_zero_padding); + if (rc != DER_SUCCESS) { + break; + } + + /** a leaf has no child nodes */ + if (btr_node_is_leaf(tcx, nd_off)) { + continue; + } + + /** + * append the node's children to the front of the nodes' list + * + * Note: This makes the traversal depth-first. Given the limited depth of a typical + * DAOS tree, this approach should help reduce resource usage. + */ + for (int at = 0; at < nd->tn_keyn; ++at) { + D_ALLOC_PTR(ni); + ni->nd_off = btr_node_child_at(tcx, nd_off, at); + d_list_add(&ni->link, &node_list); + } + } + + /** free the list - in case we exit with an error and the list of nodes is not empty */ + d_list_for_each_entry_safe(ni, ni_tmp, &node_list, link) { + /** remove the node from the list */ + d_list_del(&ni->link); + D_FREE(ni); + } + + return rc; +} + +/** + * Check a btree. + * + * \param[in] root Address of the tree root. + * \param[in] uma Memory class attributes. + * \param[in] ck Checker. + */ +int +dbtree_check_inplace(struct btr_root *root, struct umem_attr *uma, btr_report_fn_t report_fn, + void *report_arg, bool error_on_non_zero_padding) +{ + struct btr_context tcx = {0}; + uint64_t tree_feats = -1; + int rc; + + D_ASSERT(root != NULL); + D_ASSERT(uma != NULL); + D_ASSERT(report_fn != NULL); + + rc = btr_class_init(UMOFF_NULL, root, -1, &tree_feats, uma, DAOS_HDL_INVAL, NULL, report_fn, + report_arg, &tcx.tc_tins); + if (rc != DER_SUCCESS) { + return rc; + } + + tcx.tc_feats = root->tr_feats; + tcx.tc_order = root->tr_order; + + rc = btr_nodes_check(&tcx, report_fn, report_arg, error_on_non_zero_padding); + + /** no need to free tcx */ + + return rc; +} diff --git a/src/common/mem.c b/src/common/mem.c index 73b0399baea..4ace7e96bd3 100644 --- a/src/common/mem.c +++ b/src/common/mem.c @@ -415,6 +415,11 @@ umempobj_open(const char *path, const char *layout_name, int flags, struct umem_ int enabled = 1; int rc; + if (DAOS_FAIL_CHECK(DAOS_FAULT_POOL_OPEN_UMEM)) { /** fault injection */ + errno = daos_fail_value_get(); + return NULL; + } + D_ALLOC(umm_pool, sizeof(*umm_pool) + sizeof(umm_pool->up_slabs[0]) * UMM_SLABS_CNT); if (umm_pool == NULL) return NULL; diff --git a/src/gurt/fault_inject.c b/src/gurt/fault_inject.c index 95376044b5d..386e24d0172 100644 --- a/src/gurt/fault_inject.c +++ b/src/gurt/fault_inject.c @@ -1,5 +1,6 @@ /* * (C) Copyright 2018-2024 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -417,6 +418,8 @@ fault_attr_parse(yaml_parser_t *parser) yaml_event_delete(&event); if (event_type == YAML_SEQUENCE_END_EVENT) break; + if (event_type == YAML_DOCUMENT_END_EVENT) /** in case the list is actually empty */ + break; if (rc != DER_SUCCESS) break; } while (1); diff --git a/src/include/daos/btree.h b/src/include/daos/btree.h index ad0066f111d..9b9b243d9bb 100644 --- a/src/include/daos/btree.h +++ b/src/include/daos/btree.h @@ -543,6 +543,15 @@ int dbtree_open_inplace(struct btr_root *root, struct umem_attr *uma, daos_handle_t *toh); int dbtree_open_inplace_ex(struct btr_root *root, struct umem_attr *uma, daos_handle_t coh, void *priv, daos_handle_t *toh); +enum btr_report_type { + BTR_REPORT_ERROR, + BTR_REPORT_WARNING, + BTR_REPORT_MSG, +}; +typedef void (*btr_report_fn_t)(void *arg, enum btr_report_type type, const char *fmt, ...); +int + dbtree_check_inplace(struct btr_root *root, struct umem_attr *uma, btr_report_fn_t report_fn, + void *report_arg, bool error_on_non_zero_padding); int dbtree_close(daos_handle_t toh); int dbtree_destroy(daos_handle_t toh, void *args); int dbtree_drain(daos_handle_t toh, int *credits, void *args, bool *destroyed); diff --git a/src/include/daos/common.h b/src/include/daos/common.h index 649a8f7bfc6..da4e41e24ab 100644 --- a/src/include/daos/common.h +++ b/src/include/daos/common.h @@ -933,6 +933,27 @@ enum { #define DAOS_WAL_FAIL_REPLAY (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0x101) #define DAOS_MEM_FAIL_CHECKPOINT (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0x102) +/** DLCK fault injection */ +#define DLCK_FAULT_CREATE_LOG_DIR (DAOS_FAIL_SYS_TEST_GROUP_LOC | 0x100) +#define DLCK_FAULT_CREATE_POOL_DIR (DAOS_FAIL_SYS_TEST_GROUP_LOC | 0x101) +#define DLCK_FAULT_ENGINE_START (DAOS_FAIL_SYS_TEST_GROUP_LOC | 0x102) +#define DLCK_FAULT_ENGINE_EXEC (DAOS_FAIL_SYS_TEST_GROUP_LOC | 0x103) +#define DLCK_FAULT_ENGINE_JOIN (DAOS_FAIL_SYS_TEST_GROUP_LOC | 0x104) +#define DLCK_FAULT_ENGINE_STOP (DAOS_FAIL_SYS_TEST_GROUP_LOC | 0x105) + +/** Pool open fault injection */ +#define DAOS_FAULT_POOL_NVME_HEALTH (DAOS_FAIL_SYS_TEST_GROUP_LOC | 0x200) +#define DAOS_FAULT_POOL_OPEN_BIO (DAOS_FAIL_SYS_TEST_GROUP_LOC | 0x201) +#define DAOS_FAULT_POOL_OPEN_UMEM (DAOS_FAIL_SYS_TEST_GROUP_LOC | 0x202) +#define DAOS_FAULT_POOL_OPEN_MAGIC (DAOS_FAIL_SYS_TEST_GROUP_LOC | 0x203) +#define DAOS_FAULT_POOL_OPEN_VERSION (DAOS_FAIL_SYS_TEST_GROUP_LOC | 0x204) +#define DAOS_FAULT_POOL_OPEN_UUID (DAOS_FAIL_SYS_TEST_GROUP_LOC | 0x205) +#define DAOS_FAULT_BTREE_OPEN_INV_CLASS (DAOS_FAIL_SYS_TEST_GROUP_LOC | 0x206) +#define DAOS_FAULT_BTREE_OPEN_UNREG_CLASS (DAOS_FAIL_SYS_TEST_GROUP_LOC | 0x207) +#define DAOS_FAULT_BTREE_FEATURES (DAOS_FAIL_SYS_TEST_GROUP_LOC | 0x208) +#define DAOS_FAULT_POOL_EXT_PADDING (DAOS_FAIL_SYS_TEST_GROUP_LOC | 0x209) +#define DAOS_FAULT_POOL_EXT_RESERVED (DAOS_FAIL_SYS_TEST_GROUP_LOC | 0x20a) + #define DAOS_DTX_SKIP_PREPARE DAOS_DTX_SPEC_LEADER #define DAOS_FAIL_CHECK(id) daos_fail_check(id) diff --git a/src/include/daos_srv/checker.h b/src/include/daos_srv/checker.h new file mode 100644 index 00000000000..c6e03de3726 --- /dev/null +++ b/src/include/daos_srv/checker.h @@ -0,0 +1,260 @@ +/** + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ + +#ifndef __DAOS_CHECKER_H__ +#define __DAOS_CHECKER_H__ + +#include +#include +#include +#include + +#define CHECKER_INDENT_MAX 10 + +/** + * @enum checker_event + * + * Checker event types. + */ +enum checker_event { + CHECKER_EVENT_INVALID = -1, + CHECKER_EVENT_ERROR = 0, + CHECKER_EVENT_WARNING, +}; + +/** + * @struct checker_options + * + * Checker control options. + */ +struct checker_options { + enum checker_event cko_non_zero_padding; +}; + +/** + * @struct checker + * + * Checker state. + */ +struct checker { + /** input */ + void *ck_private; + struct checker_options ck_options; + /** state */ + int ck_level; + char *ck_prefix; + int (*ck_indent_set)(struct checker *ck); + /** output */ + int (*ck_vprintf)(struct checker *ck, const char *fmt, va_list ap); + unsigned ck_warnings_num; +}; + +#define CHECKER_ERROR_INFIX "error: " +#define CHECKER_WARNING_INFIX "warning: " +#define CHECKER_OK_INFIX "ok" + +/** helpers */ + +/** + * Simple argument translation ... -> va_list + * + * \param[in] ck Checker to call. + * \param[in] fmt Format. + * \param[in] ... Format's arguments. + * + * \retval DER_SUCCESS Success. + * \retval -DER_* Error. + */ +static inline int +ck_common_printf(struct checker *ck, const char *fmt, ...) +{ + va_list args; + int rc; + + va_start(args, fmt); + rc = ck->ck_vprintf(ck, fmt, args); + va_end(args); + + return rc; +} + +/** + * Print a btree report as a checker message. + * + * \param[in] arg Checker. + * \param[in] type Btree report type. + * \param[in] fmt Format. + * \param[in] ... Format's arguments. + */ +static inline void +ck_report(void *arg, enum btr_report_type type, const char *fmt, ...) +{ + struct checker *ck = arg; + va_list args; + + va_start(args, fmt); + + switch (type) { + case BTR_REPORT_ERROR: + ck_common_printf(ck, "%s%s", ck->ck_prefix, CHECKER_ERROR_INFIX); + ck->ck_vprintf(ck, fmt, args); + break; + case BTR_REPORT_WARNING: + ck_common_printf(ck, "%s%s", ck->ck_prefix, CHECKER_WARNING_INFIX); + ck_common_printf(ck, fmt, args); + ck->ck_warnings_num++; + break; + case BTR_REPORT_MSG: + ck_common_printf(ck, "%s", ck->ck_prefix); + ck_common_printf(ck, fmt, args); + break; + default: + D_ASSERTF(0, "Unknown report type: %x\n", type); + } + + va_end(args); +} + +/** basic helpers */ + +#define IS_CHECKER(ck) (unlikely((ck) != NULL)) + +#define IS_NOT_CHECKER(dp) (likely((ck) == NULL)) + +#define YES_NO_STR(cond) ((cond) ? "yes" : "no") + +/** direct print(f) macros with and without prefix */ + +#define CK_PRINT(ck, msg) \ + do { \ + if (IS_CHECKER(ck)) { \ + (void)ck_common_printf(ck, "%s" msg, (ck)->ck_prefix); \ + } \ + } while (0) + +#define CK_PRINTF(ck, fmt, ...) \ + do { \ + if (IS_CHECKER(ck)) { \ + (void)ck_common_printf(ck, "%s" fmt, (ck)->ck_prefix, __VA_ARGS__); \ + } \ + } while (0) + +#define CK_PRINT_WO_PREFIX(ck, msg) \ + do { \ + if (IS_CHECKER(ck)) { \ + (void)ck_common_printf(ck, msg); \ + } \ + } while (0) + +#define CK_PRINTF_WO_PREFIX(ck, fmt, ...) \ + do { \ + if (IS_CHECKER(ck)) { \ + (void)ck_common_printf(ck, fmt, __VA_ARGS__); \ + } \ + } while (0) + +/** append + new line shortcuts */ + +#define CK_APPENDL_OK(ck) CK_PRINT_WO_PREFIX(ck, CHECKER_OK_INFIX ".\n") + +#define CK_APPENDL_RC(ck, rc) \ + do { \ + if (rc == DER_SUCCESS) { \ + CK_APPENDL_OK(ck); \ + } else { \ + CK_PRINTF_WO_PREFIX(ck, CHECKER_ERROR_INFIX DF_RC "\n", DP_RC(rc)); \ + } \ + } while (0) + +#define CK_APPENDFL_ERR(ck, fmt, ...) \ + CK_PRINTF_WO_PREFIX(ck, CHECKER_ERROR_INFIX fmt "\n", __VA_ARGS__) + +#define CK_APPENDFL_WARN(ck, fmt, ...) \ + do { \ + CK_PRINTF_WO_PREFIX(ck, CHECKER_WARNING_INFIX fmt "\n", __VA_ARGS__); \ + ++(ck)->ck_warnings_num; \ + } while (0) + +/** print(f) + return code + new line shortcuts */ + +#define CK_PRINTL_RC(ck, rc, msg) \ + do { \ + if (rc == DER_SUCCESS) { \ + CK_PRINT(ck, msg ": " CHECKER_OK_INFIX ".\n"); \ + } else { \ + CK_PRINTF(ck, CHECKER_ERROR_INFIX msg ": " DF_RC "\n", DP_RC(rc)); \ + } \ + } while (0) + +#define CK_PRINTFL_RC(ck, rc, fmt, ...) \ + do { \ + if (rc == DER_SUCCESS) { \ + CK_PRINTF(ck, fmt ": " CHECKER_OK_INFIX ".\n", __VA_ARGS__); \ + } else { \ + CK_PRINTF(ck, CHECKER_ERROR_INFIX fmt ": " DF_RC "\n", __VA_ARGS__, \ + DP_RC(rc)); \ + } \ + } while (0) + +/** + * An assert while run without a checker. A checker message otherwise. + * + * \param[in] ck Checker's state. + * \param[in] msg Message to print. + * \param[in] cond Condition to assert (without a checker) or condition to check (with a + * checker). + */ +#define CK_ASSERT(ck, msg, cond) \ + do { \ + if (IS_CHECKER(ck)) { \ + CK_PRINTF(ck, msg "%s\n", YES_NO_STR(cond)); \ + } else { \ + D_ASSERT(cond); \ + } \ + } while (0) + +/** manage the checker print's indentation */ + +static inline void +checker_print_indent_inc(struct checker *ck) +{ + if (IS_NOT_CHECKER(ck)) { + return; + } + + if (ck->ck_level == CHECKER_INDENT_MAX) { + CK_PRINT(ck, "Max indent reached.\n"); + return; + } + + ck->ck_level++; + ck->ck_indent_set(ck); +} + +static inline void +checker_print_indent_dec(struct checker *ck) +{ + if (IS_NOT_CHECKER(ck)) { + return; + } + + if (ck->ck_level == 0) { + CK_PRINT(ck, "Min indent reached.\n"); + return; + } + + ck->ck_level--; + ck->ck_indent_set(ck); +} + +#define CK_INDENT(ck, exp) \ + do { \ + checker_print_indent_inc(ck); \ + exp; \ + checker_print_indent_dec(ck); \ + } while (0) + +#endif /** __DAOS_CHECKER_H__ */ diff --git a/src/include/daos_srv/dlck.h b/src/include/daos_srv/dlck.h deleted file mode 100644 index 85745a9bccd..00000000000 --- a/src/include/daos_srv/dlck.h +++ /dev/null @@ -1,12 +0,0 @@ -/** - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP - * - * SPDX-License-Identifier: BSD-2-Clause-Patent - */ - -#ifndef __DAOS_DLCK_H__ -#define __DAOS_DLCK_H__ - -/** placeholder for the DLCK-dedicated APIs */ - -#endif /* __DAOS_DLCK_H__ */ diff --git a/src/include/daos_srv/vos.h b/src/include/daos_srv/vos.h index 07530961cc4..3eef3f4702d 100644 --- a/src/include/daos_srv/vos.h +++ b/src/include/daos_srv/vos.h @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -491,7 +492,7 @@ vos_pool_upgrade(daos_handle_t poh, uint32_t version); */ int vos_pool_open_metrics(const char *path, uuid_t uuid, unsigned int flags, void *metrics, - daos_handle_t *poh); + struct checker *ck, daos_handle_t *poh); /** * Close a VOSP, all opened containers sharing this pool handle diff --git a/src/pool/srv_target.c b/src/pool/srv_target.c index 756df65e7aa..07e587a3b85 100644 --- a/src/pool/srv_target.c +++ b/src/pool/srv_target.c @@ -539,7 +539,7 @@ pool_child_start(struct ds_pool_child *child, bool recreate) D_ASSERT(child->spc_metrics[DAOS_VOS_MODULE] != NULL); rc = vos_pool_open_metrics(path, child->spc_uuid, VOS_POF_EXCL | VOS_POF_EXTERNAL_FLUSH | VOS_POF_EXTERNAL_CHKPT, - child->spc_metrics[DAOS_VOS_MODULE], &child->spc_hdl); + child->spc_metrics[DAOS_VOS_MODULE], NULL, &child->spc_hdl); D_FREE(path); diff --git a/src/utils/dlck/SConscript b/src/utils/dlck/SConscript index 33a4194fce7..34ebafd52ce 100644 --- a/src/utils/dlck/SConscript +++ b/src/utils/dlck/SConscript @@ -11,15 +11,18 @@ def scons(): libs = ['vos_srv', 'daos_common_pmem', 'bio', 'uuid', 'gurt', 'pmemobj', 'abt', 'pthread'] src = [ + 'cmds/dlck_cmd_check.c', 'dlck_abt.c', 'dlck_args_common.c', 'dlck_args_engine.c', 'dlck_args_files.c', 'dlck_args_parse.c', 'dlck_args.c', + 'dlck_checker.c', 'dlck_engine.c', 'dlck_main.c', 'dlck_pool.c', + 'dlck_report.c', 'dlck_sched.c', 'dlck_tls.c', '../../engine/util.c', diff --git a/src/utils/dlck/cmds/dlck_cmd_check.c b/src/utils/dlck/cmds/dlck_cmd_check.c new file mode 100644 index 00000000000..a64ce81d2a4 --- /dev/null +++ b/src/utils/dlck/cmds/dlck_cmd_check.c @@ -0,0 +1,221 @@ +/** + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ + +#include +#include +#include + +#include "../dlck_args.h" +#include "../dlck_bitmap.h" +#include "../dlck_checker.h" +#include "../dlck_engine.h" +#include "../dlck_pool.h" +#include "../dlck_report.h" + +/** + * Target thread (worker). Check a single pool. + * + * \param[in] xa Target's arguments. + * \param[in] file File to process. + * \param[in] ck Checker. + * + * \retval DER_SUCCESS Success. + * \retval -DER_NOMEM Out of memory. + * \retval -DER_* Other errors. + */ +static int +pool_process(struct xstream_arg *xa, struct dlck_file *file, struct checker *ck) +{ + char *path; + daos_handle_t poh; + int rc; + + /** generate a VOS file path */ + rc = ds_mgmt_file(xa->ctrl->engine.storage_path, file->po_uuid, VOS_FILE, &xa->xs->tgt_id, + &path); + if (rc != DER_SUCCESS) { + CK_PRINTL_RC(ck, xa->rc, "VOS file path allocation failed"); + return rc; + } + + rc = vos_pool_open_metrics(path, file->po_uuid, DLCK_POOL_OPEN_FLAGS, NULL, ck, &poh); + if (rc == DER_SUCCESS) { + (void)vos_pool_close(poh); + } + D_FREE(path); + + /** check */ + if (rc != DER_SUCCESS) { + /** ignore a possible error from the unlock */ + return rc; + } + + return DER_SUCCESS; +} + +#define DLCK_POOL_CHECK_RESULT_PREFIX_FMT "[%d] pool " DF_UUIDF " check result" +#define DLCK_WARNINGS_NUM_FMT " (%u warning(s))" + +/** + * Target thread (worker). + */ +static void +exec_one(void *arg) +{ + struct xstream_arg *xa = arg; + struct dlck_file *file; + struct checker *main_ck = &xa->ctrl->checker; + struct checker ck; + int rc; + + /** initialize the daos_io_* thread */ + rc = dlck_engine_xstream_init(xa->xs); + if (rc != DER_SUCCESS) { + xa->rc = rc; + xa->progress = DLCK_XSTREAM_PROGRESS_END; + return; + } + + d_list_for_each_entry(file, &xa->ctrl->files.list, link) { + /** do not process the given file if the target is not requested */ + if (dlck_bitmap_isclr32(file->targets_bitmap, xa->xs->tgt_id)) { + /** report the progress to the main thread */ + ++xa->progress; + continue; + } + + /** initialize the logfile and its print utility */ + rc = dlck_checker_worker_init(&xa->ctrl->common.options, xa->ctrl->log_dir, + file->po_uuid, xa->xs->tgt_id, main_ck, &ck); + if (rc != DER_SUCCESS) { + /** There is no point continuing without a logfile. */ + dlck_xstream_set_rc(xa, rc); + xa->progress = DLCK_XSTREAM_PROGRESS_END; + break; + } + + /** check the pool */ + rc = pool_process(xa, file, &ck); + /** report the result */ + if (rc == DER_SUCCESS && ck.ck_warnings_num > 0) { + CK_PRINTF( + main_ck, + DLCK_POOL_CHECK_RESULT_PREFIX_FMT CHECKER_OK_INFIX DLCK_WARNINGS_NUM_FMT + ".\n", + xa->xs->tgt_id, DP_UUID(file->po_uuid), ck.ck_warnings_num); + } else { + CK_PRINTFL_RC(main_ck, rc, DLCK_POOL_CHECK_RESULT_PREFIX_FMT, + xa->xs->tgt_id, DP_UUID(file->po_uuid)); + } + dlck_xstream_set_rc(xa, rc); + dlck_uadd_no_overflow(xa->warnings_num, ck.ck_warnings_num, &xa->warnings_num); + /** Continue to the next pool regardless of the result. */ + + /** close the logfile */ + dlck_checker_worker_fini(&ck); + + /** report the progress to the main thread */ + ++xa->progress; + } + + if (xa->rc != DER_SUCCESS) { + (void)dlck_engine_xstream_fini(xa->xs); + return; + } + + rc = dlck_engine_xstream_fini(xa->xs); + dlck_xstream_set_rc(xa, rc); +} + +/** + * The main thread spawns and waits for other threads to complete their tasks. + */ +int +dlck_cmd_check(struct dlck_control *ctrl) +{ + D_ASSERT(ctrl != NULL); + + struct checker *ck = &ctrl->checker; + char log_dir_template[] = "/tmp/dlck_check_XXXXXX"; + struct dlck_engine *engine = NULL; + int *rcs; + int rc; + + /** create a log directory */ + if (DAOS_FAIL_CHECK(DLCK_FAULT_CREATE_LOG_DIR)) { /** fault injection */ + ctrl->log_dir = NULL; + errno = daos_fail_value_get(); + } else { + ctrl->log_dir = mkdtemp(log_dir_template); + } + if (ctrl->log_dir == NULL) { + rc = daos_errno2der(errno); + CK_PRINTL_RC(ck, rc, "Cannot create log directory"); + return rc; + } + CK_PRINTF(ck, "Log directory: %s\n", ctrl->log_dir); + + CK_PRINT(ck, "Start the engine... "); + rc = dlck_engine_start(&ctrl->engine, &engine); + CK_APPENDL_RC(ck, rc); + if (rc != DER_SUCCESS) { + return rc; + } + + if (d_list_empty(&ctrl->files.list)) { + /** no files specified means all files are requested */ + CK_PRINT(ck, "Read the list of pools... "); + rc = dlck_pool_list(&ctrl->files.list); + CK_APPENDL_RC(ck, rc); + if (rc != DER_SUCCESS) { + goto err_stop_engine; + } + /** no files exist */ + if (d_list_empty(&ctrl->files.list)) { + CK_PRINT(ck, "No pools exist. Exiting...\n"); + goto err_stop_engine; + } + } + + CK_PRINT(ck, "Create pools directories... "); + rc = dlck_pool_mkdir_all(ctrl->engine.storage_path, &ctrl->files.list, ck); + CK_APPENDL_RC(ck, rc); + if (rc != DER_SUCCESS) { + goto err_stop_engine; + } + + /** allocate an array of return codes for targets */ + D_ALLOC_ARRAY(rcs, ctrl->engine.targets); + if (rcs == NULL) { + rc = -DER_NOMEM; + CK_PRINTL_RC(ck, rc, ""); + goto err_stop_engine; + } + + rc = dlck_engine_exec_all(engine, exec_one, dlck_engine_xstream_arg_alloc, ctrl, + dlck_engine_xstream_arg_free, ck); + if (rc != DER_SUCCESS) { + goto err_free_rcs; + } + + CK_PRINT(ck, "Stop the engine... "); + rc = dlck_engine_stop(engine); + CK_APPENDL_RC(ck, rc); + + /** Ignore an error for now to print the collected results. */ + dlck_report_results(rcs, ctrl->engine.targets, ctrl->warnings_num, ck); + D_FREE(rcs); + + /** Return the first encountered error. */ + return rc; + +err_free_rcs: + D_FREE(rcs); +err_stop_engine: + (void)dlck_engine_stop(engine); + + return rc; +} diff --git a/src/utils/dlck/dlck_abt.c b/src/utils/dlck/dlck_abt.c index 5f1ba266d27..314f47e9d96 100644 --- a/src/utils/dlck/dlck_abt.c +++ b/src/utils/dlck/dlck_abt.c @@ -49,41 +49,6 @@ dlck_abt_attr_free(ABT_thread_attr *attr) return dss_abterr2der(ABT_thread_attr_free(attr)); } -int -dlck_abt_init(struct dlck_engine *engine) -{ - int rc; - - rc = ABT_init(0, NULL); - if (rc != ABT_SUCCESS) { - return dss_abterr2der(rc); - } - - rc = ABT_mutex_create(&engine->open_mtx); - if (rc != ABT_SUCCESS) { - (void)ABT_finalize(); - return dss_abterr2der(rc); - } - - return DER_SUCCESS; -} - -int -dlck_abt_fini(struct dlck_engine *engine) -{ - int rc; - - rc = ABT_mutex_free(&engine->open_mtx); - if (rc != ABT_SUCCESS) { - (void)ABT_finalize(); - return dss_abterr2der(rc); - } - - rc = ABT_finalize(); - - return dss_abterr2der(rc); -} - int dlck_xstream_create(struct dlck_xstream *xs) { diff --git a/src/utils/dlck/dlck_args.h b/src/utils/dlck/dlck_args.h index 2b719d87ecd..4ea3a6319c1 100644 --- a/src/utils/dlck/dlck_args.h +++ b/src/utils/dlck/dlck_args.h @@ -11,10 +11,9 @@ #include #include #include +#include #include -#include "dlck_cmds.h" - #define _STRINGIFY(x) #x #define STRINGIFY(x) _STRINGIFY(x) @@ -25,8 +24,7 @@ #define GROUP_AUTOMAGIC (-1) /** yes, -1 is the last group */ /** all short options */ -#define KEY_COMMON_CMD 'c' -#define KEY_COMMON_CO_UUID 'q' +#define KEY_COMMON_OPTIONS 'o' #define KEY_COMMON_WRITE_MODE 'w' #define KEY_FILES 'f' /** the options below follow the daos_engine options */ @@ -44,9 +42,10 @@ #define DLCK_TARGET_MAX 31 +#define MISSING_ARG_FMT "Missing argument for the '%s' option" + struct dlck_args_common { - enum dlck_cmd cmd; - uuid_t co_uuid; /** Container UUID. */ + struct checker_options options; bool write_mode; /** false by default (dry run) */ }; @@ -59,7 +58,6 @@ struct dlck_file { d_list_t link; uuid_t po_uuid; /** Pool UUID. */ uint32_t targets_bitmap; /** Bitmap of targets involved. */ - const char *desc; /** Argument provided by the user. */ }; /** @@ -85,17 +83,41 @@ struct dlck_args_files { d_list_t list; }; -struct dlck_print { - int (*dp_printf)(const char *fmt, ...); -}; +/** + * Count the number of files in the list. + * + * \param[in] files The list of files to count. + * + * \return The number of files on the list \p files. + */ +static inline unsigned +dlck_args_files_num(struct dlck_args_files *files) +{ + struct dlck_file *file; + unsigned num = 0; + + d_list_for_each_entry(file, &files->list, link) { + ++num; + } + return num; +} + +/** + * @struct dlck_control + * + * Bundle of input, output, and control arguments. + */ struct dlck_control { /** in */ struct dlck_args_common common; struct dlck_args_files files; struct dlck_args_engine engine; - /** print */ - struct dlck_print print; + /** checker */ + struct checker checker; + /** out */ + char *log_dir; + unsigned warnings_num; }; /** helper definitions */ @@ -113,7 +135,7 @@ struct dlck_control { #define FAIL(STATE, RC, ERRNUM, ...) \ do { \ argp_failure(STATE, ERRNUM, ERRNUM, __VA_ARGS__); \ - RC = ERRNUM; \ + (RC) = ERRNUM; \ } while (0) #define RETURN_FAIL(STATE, ERRNUM, ...) \ @@ -122,10 +144,6 @@ struct dlck_control { return ERRNUM; \ } while (0) -#define DLCK_PRINT(ctrl, fmt) (void)ctrl->print.dp_printf(fmt) - -#define DLCK_PRINTF(ctrl, fmt, ...) (void)ctrl->print.dp_printf(fmt, __VA_ARGS__) - /** dlck_args_parse.c */ /** @@ -160,15 +178,18 @@ int parse_file(const char *arg, struct argp_state *state, struct dlck_file **file_ptr); /** - * Extract a command from \p arg. + * Extract an event from \p arg. * - * \param[in] arg String value. + * \param[in] option Name of the option. + * \param[in] value String value. + * \param[out] state State of the parser. + * \param[out] rc Return code. * - * \retval DLCK_CMD_UNKNOWN The provided command is unknown. - * \retval DLCK_CMD_* DLCK command. + * \retval CHECKER_EVENT_INVALID The provided event is invalid. + * \retval CHECKER_EVENT_* DLCK event. */ -enum dlck_cmd -parse_command(const char *arg); +enum checker_event +parse_event(const char *option, const char *value, struct argp_state *state, int *rc); /** dlck_args_files.c */ diff --git a/src/utils/dlck/dlck_args_common.c b/src/utils/dlck/dlck_args_common.c index 0ce3ce62bba..bc65af4c15a 100644 --- a/src/utils/dlck/dlck_args_common.c +++ b/src/utils/dlck/dlck_args_common.c @@ -10,34 +10,58 @@ #include "dlck_args.h" +#define DLCK_OPT_NON_ZERO_PADDING_STR "non_zero_padding" + static struct argp_option args_common_options[] = { OPT_HEADER("Options:", GROUP_OPTIONS), /** entries below inherits the group number of the header entry */ {"write_mode", KEY_COMMON_WRITE_MODE, 0, 0, "Make changes persistent."}, - {"cmd", KEY_COMMON_CMD, "CMD", 0, "Command (Required). Please see available commands below."}, - {"co_uuid", KEY_COMMON_CO_UUID, "UUID", 0, - "UUID of a container to process. If not provided all containers are processed."}, - OPT_HEADER("Available commands:", GROUP_AVAILABLE_CMDS), + {"options", KEY_COMMON_OPTIONS, "OPTIONS", 0, + "Set options. Options are comma-separated and may include arguments using the equals sign " + "('='). Please see available options below."}, + OPT_HEADER("Available options:", GROUP_AVAILABLE_CMDS), /** entries below inherits the group number of the header entry */ - LIST_ENTRY("WIP", "No commands implemented yet."), + LIST_ENTRY(DLCK_OPT_NON_ZERO_PADDING_STR "=EVENT", + "Action to take when non-zero padding or reserved fields are detected. EVENT can be " + "either 'error' or 'warning'. It is 'error' by default."), {0}}; +enum dlck_options_values { DLCK_OPT_NON_ZERO_PADDING }; + +static char *options_tokens[] = { + [DLCK_OPT_NON_ZERO_PADDING] = DLCK_OPT_NON_ZERO_PADDING_STR, +}; + static void args_common_init(struct dlck_args_common *args) { memset(args, 0, sizeof(*args)); /** set defaults */ args->write_mode = false; /** dry run */ - args->cmd = DLCK_CMD_NOT_SET; - uuid_clear(args->co_uuid); + args->options.cko_non_zero_padding = CHECKER_EVENT_WARNING; } static int -args_common_check(struct argp_state *state, struct dlck_args_common *args) +args_common_options_parse(char *options_str, struct checker_options *opts, struct argp_state *state) { - if (args->cmd == DLCK_CMD_NOT_SET) { - RETURN_FAIL(state, EINVAL, "Command not set"); + char *value; + enum checker_event tmp_event; + int rc; + + while (*options_str != '\0') { + switch (getsubopt(&options_str, options_tokens, &value)) { + case DLCK_OPT_NON_ZERO_PADDING: + tmp_event = parse_event(DLCK_OPT_NON_ZERO_PADDING_STR, value, state, &rc); + if (tmp_event == CHECKER_EVENT_INVALID) { + return rc; + } + opts->cko_non_zero_padding = tmp_event; + break; + default: + RETURN_FAIL(state, EINVAL, "Unknown option: '%s'", value); + } } + return 0; } @@ -45,7 +69,6 @@ static error_t args_common_parser(int key, char *arg, struct argp_state *state) { struct dlck_args_common *args = state->input; - uuid_t tmp_uuid; int rc = 0; /** state changes */ @@ -54,7 +77,6 @@ args_common_parser(int key, char *arg, struct argp_state *state) args_common_init(args); return 0; case ARGP_KEY_END: - return args_common_check(state, args); case ARGP_KEY_SUCCESS: case ARGP_KEY_FINI: return 0; @@ -65,18 +87,8 @@ args_common_parser(int key, char *arg, struct argp_state *state) case KEY_COMMON_WRITE_MODE: args->write_mode = true; break; - case KEY_COMMON_CMD: - args->cmd = parse_command(arg); - if (args->cmd == DLCK_CMD_UNKNOWN) { - RETURN_FAIL(state, EINVAL, "Unknown command: %s", arg); - } - break; - case KEY_COMMON_CO_UUID: - rc = uuid_parse(arg, tmp_uuid); - if (rc != 0) { - RETURN_FAIL(state, EINVAL, "Malformed uuid: %s", arg); - } - uuid_copy(args->co_uuid, tmp_uuid); + case KEY_COMMON_OPTIONS: + rc = args_common_options_parse(arg, &args->options, state); break; default: return ARGP_ERR_UNKNOWN; diff --git a/src/utils/dlck/dlck_args_files.c b/src/utils/dlck/dlck_args_files.c index 3bfe9455b0e..5e98e7a954e 100644 --- a/src/utils/dlck/dlck_args_files.c +++ b/src/utils/dlck/dlck_args_files.c @@ -14,9 +14,9 @@ #include "dlck_args.h" static struct argp_option args_files_options[] = { - {"file", KEY_FILES, "UUID,TARGET", 0, + {"file", KEY_FILES, "UUID[,TARGET]", 0, "Pool UUID and a set of targets. If no TARGET is provided, all targets are used. This option " - "can be specified multiple times.", + "can be specified multiple times. By default all pools are used.", GROUP_OPTIONS}, {0}}; @@ -28,15 +28,6 @@ args_files_init(struct dlck_args_files *args) D_INIT_LIST_HEAD(&args->list); } -static int -args_files_check(struct argp_state *state, struct dlck_args_files *args) -{ - if (d_list_empty(&args->list)) { - RETURN_FAIL(state, EINVAL, "No file chosen"); - } - return 0; -} - static error_t args_files_parser(int key, char *arg, struct argp_state *state) { @@ -50,7 +41,6 @@ args_files_parser(int key, char *arg, struct argp_state *state) args_files_init(args); return 0; case ARGP_KEY_END: - return args_files_check(state, args); case ARGP_KEY_SUCCESS: case ARGP_KEY_FINI: return 0; diff --git a/src/utils/dlck/dlck_args_parse.c b/src/utils/dlck/dlck_args_parse.c index 581ff9af106..17aa2ab9e2d 100644 --- a/src/utils/dlck/dlck_args_parse.c +++ b/src/utils/dlck/dlck_args_parse.c @@ -9,6 +9,7 @@ #include #include "dlck_args.h" +#include "dlck_bitmap.h" int parse_unsigned(const char *arg, unsigned *value, struct argp_state *state) @@ -49,8 +50,6 @@ parse_file(const char *arg, struct argp_state *state, struct dlck_file **file_pt RETURN_FAIL(state, ENOMEM, "Out of memory"); } - file->desc = arg; - D_STRNDUP(arg_copy, arg, FILE_STR_MAX); if (arg_copy == NULL) { FAIL(state, rc, ENOMEM, "Out of memory"); @@ -77,7 +76,7 @@ parse_file(const char *arg, struct argp_state *state, struct dlck_file **file_pt FAIL(state, rc, EINVAL, "Chosen target is too big: %" PRIu32 ">%" PRIu32, target, DLCK_TARGET_MAX); } - file->targets_bitmap |= (1 << target); + dlck_bitmap_setbit32(&file->targets_bitmap, target); } /** No target means all targets. */ @@ -99,10 +98,23 @@ parse_file(const char *arg, struct argp_state *state, struct dlck_file **file_pt return rc; } -enum dlck_cmd -parse_command(const char *arg) +#define DLCK_EVENT_ERROR_STR "error" +#define DLCK_EVENT_WARNING_STR "warning" + +enum checker_event +parse_event(const char *option, const char *value, struct argp_state *state, int *rc) { - /** placeholder for future commands */ + if (value != NULL) { + if (strcmp(value, DLCK_EVENT_ERROR_STR) == 0) { + return CHECKER_EVENT_ERROR; + } else if (strcmp(value, DLCK_EVENT_WARNING_STR) == 0) { + return CHECKER_EVENT_WARNING; + } + + FAIL(state, *rc, EINVAL, "Invalid event '%s' for the '%s' option", value, option); + } else { + FAIL(state, *rc, EINVAL, MISSING_ARG_FMT, option); + } - return DLCK_CMD_UNKNOWN; + return CHECKER_EVENT_INVALID; } diff --git a/src/utils/dlck/dlck_bitmap.h b/src/utils/dlck/dlck_bitmap.h new file mode 100644 index 00000000000..9a204113c14 --- /dev/null +++ b/src/utils/dlck/dlck_bitmap.h @@ -0,0 +1,55 @@ +/** + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ + +#ifndef __DLCK_BITMAP__ +#define __DLCK_BITMAP__ + +#include +#include + +/** + * Typed variant of the setbit() macro. For 32-bit values. + * + * \param[in,out] bitmap Bitmap to modify. + * \param[in] bit Bit to set. + */ +static inline void +dlck_bitmap_setbit32(uint32_t *bitmap, int bit) +{ + setbit((uint8_t *)bitmap, bit); +} + +/** + * Typed variant of the isset() macro. For 32-bit values. + * + * \param[in] bitmap Bitmap to check. + * \param[in] bit Bit to check. + * + * \retval true if \p bit is set. + * \retval false otherwise. + */ +static inline bool +dlck_bitmap_isset32(uint32_t bitmap, int bit) +{ + return isset((uint8_t *)&bitmap, bit); +} + +/** + * Typed variant of the isclr() macro. For 32-bit values. + * + * \param[in] bitmap Bitmap to check. + * \param[in] bit Bit to check. + * + * \retval true if \p bit is NOT set. + * \retval false otherwise. + */ +static inline bool +dlck_bitmap_isclr32(uint32_t bitmap, int bit) +{ + return isclr((uint8_t *)&bitmap, bit); +} + +#endif /** __DLCK_BITMAP__ */ diff --git a/src/utils/dlck/dlck_checker.c b/src/utils/dlck/dlck_checker.c new file mode 100644 index 00000000000..986cd852755 --- /dev/null +++ b/src/utils/dlck/dlck_checker.c @@ -0,0 +1,242 @@ +/** + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ +#define D_LOGFAC DD_FAC(dlck) + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "dlck_checker.h" + +/** + * Flush output immediately in case DLCK crashes unexpectedly. + * Intended to ensure no useful diagnostic information is lost due to not flushed buffers. + */ +static int +dlck_vprintf_internal(FILE *stream, const char *fmt, va_list args) +{ + int rc; + + rc = vfprintf(stream, fmt, args); + if (rc < 0) { + rc = daos_errno2der(errno); + D_ERROR("vfprintf() failed: " DF_RC "\n", DP_RC(rc)); + return rc; + } + + rc = fflush(stream); + if (rc == EOF) { + rc = daos_errno2der(errno); + D_ERROR("fflush() failed: " DF_RC "\n", DP_RC(rc)); + return rc; + } + + return rc; +} + +/** + * Wrap printing in a lock/unlock block to guarantee thread-safe output. + */ +static int +dlck_checker_main_vprintf(struct checker *ck, const char *fmt, va_list args) +{ + struct dlck_checker_main *dcm = dlck_checker_main_get_custom(ck); + int rc_abt; + int rc; + + rc_abt = ABT_mutex_lock(dcm->stream_mutex); + if (rc_abt != ABT_SUCCESS) { + rc = dss_abterr2der(rc_abt); + D_ERROR(DLCK_PRINT_MAIN_LOCK_FAIL_FMT, DP_RC(rc)); + return rc; + } + + rc = dlck_vprintf_internal(dcm->core.stream, fmt, args); + if (rc != DER_SUCCESS) { + (void)ABT_mutex_unlock(dcm->stream_mutex); + return rc; + } + + rc_abt = ABT_mutex_unlock(dcm->stream_mutex); + if (rc_abt != ABT_SUCCESS) { + rc = dss_abterr2der(rc_abt); + D_ERROR(DLCK_PRINT_MAIN_UNLOCK_FAIL_FMT, DP_RC(rc)); + return rc; + } + + return rc; +} + +static int +dlck_checker_core_indent_set(struct dlck_checker_worker *dwc, int level) +{ + memset(dwc->prefix, DLCK_PRINT_INDENT, CHECKER_INDENT_MAX); + if (level > 0) { + dwc->prefix[level] = ' '; + dwc->prefix[level + 1] = '\0'; + } else { + dwc->prefix[0] = '\0'; + } + + return DER_SUCCESS; +} + +static int +dlck_checker_main_indent_set(struct checker *ck) +{ + struct dlck_checker_main *dcm = dlck_checker_main_get_custom(ck); + return dlck_checker_core_indent_set(&dcm->core, ck->ck_level); +} + +int +dlck_checker_main_init(struct checker *ck) +{ + struct dlck_checker_main *dcm; + int rc_abt; + int rc; + + D_ALLOC_PTR(dcm); + if (dcm == NULL) { + return -DER_NOMEM; + } + + dcm->core.magic = DLCK_CHECKER_MAIN_MAGIC; + dcm->core.stream = stdout; + + rc_abt = ABT_mutex_create(&dcm->stream_mutex); + if (rc_abt != ABT_SUCCESS) { + rc = dss_abterr2der(rc_abt); + D_ERROR("Cannot create a stream synchronization mutex: " DF_RC "\n", DP_RC(rc)); + D_FREE(dcm); + return rc; + } + + ck->ck_private = dcm; + ck->ck_vprintf = dlck_checker_main_vprintf; + ck->ck_indent_set = dlck_checker_main_indent_set; + ck->ck_prefix = dcm->core.prefix; + + return DER_SUCCESS; +} + +int +dlck_checker_main_fini(struct checker *ck) +{ + struct dlck_checker_main *dcm = dlck_checker_main_get_custom(ck); + int rc_abt; + int rc = DER_SUCCESS; + + rc_abt = ABT_mutex_free(&dcm->stream_mutex); + if (rc_abt != ABT_SUCCESS) { + rc = dss_abterr2der(rc_abt); + D_ERROR("Failed to free the stream synchronization mutex: " DF_RC "\n", DP_RC(rc)); + } + + D_FREE(dcm); + memset(ck, 0, sizeof(*ck)); + + return rc; +} + +/** + * Get the custom payload from the worker's checker. + * + * \param[in] ck Print utility (only the worker one will work). + * + * \return The custom payload. + */ +static inline struct dlck_checker_worker * +dlck_checker_worker_get_custom(struct checker *ck) +{ + struct dlck_checker_worker *dcw = ck->ck_private; + D_ASSERT(dcw->magic == DLCK_CHECKER_WORKER_MAGIC); + return dcw; +} + +static int +dlck_checker_worker_indent_set(struct checker *ck) +{ + struct dlck_checker_worker *dcw = dlck_checker_worker_get_custom(ck); + return dlck_checker_core_indent_set(dcw, ck->ck_level); +} + +/** + * Just print. + */ +static int +dlck_checker_worker_vprintf(struct checker *ck, const char *fmt, va_list args) +{ + struct dlck_checker_worker *dcw = dlck_checker_worker_get_custom(ck); + FILE *stream = dcw->stream; + + return dlck_vprintf_internal(stream, fmt, args); +} + +int +dlck_checker_worker_init(struct checker_options *options, const char *log_dir, uuid_t po_uuid, + int tgt_id, struct checker *main_ck, struct checker *ck) +{ + struct dlck_checker_worker *dcw; + char *log_file; + FILE *stream; + int rc; + + D_ALLOC_PTR(dcw); + if (dcw == NULL) { + return -DER_NOMEM; + } + + /** open the logfile */ + D_ASPRINTF(log_file, "%s/" DF_UUIDF "_%s%d", log_dir, DP_UUID(po_uuid), VOS_FILE, tgt_id); + if (log_file == NULL) { + rc = -DER_NOMEM; + CK_PRINTFL_RC(main_ck, rc, "[%d] Log file path allocation failed", tgt_id); + /** + * It is very unlikely we can continue work without an ability to allocate more + * memory. + */ + D_FREE(dcw); + return rc; + } + + stream = fopen(log_file, "w"); + if (stream == NULL) { + rc = daos_errno2der(errno); + CK_PRINTFL_RC(main_ck, rc, "[%d] Log file open failed: %s", tgt_id, log_file); + D_FREE(log_file); + D_FREE(dcw); + return rc; + } + D_FREE(log_file); + + dcw->magic = DLCK_CHECKER_WORKER_MAGIC; + dcw->stream = stream; + + memset(ck, 0, sizeof(*ck)); + memcpy(&ck->ck_options, options, sizeof(*options)); + ck->ck_vprintf = dlck_checker_worker_vprintf; + ck->ck_indent_set = dlck_checker_worker_indent_set; + ck->ck_private = dcw; + ck->ck_prefix = dcw->prefix; + + return DER_SUCCESS; +} + +void +dlck_checker_worker_fini(struct checker *ck) +{ + struct dlck_checker_worker *dcw = dlck_checker_worker_get_custom(ck); + + (void)fclose(dcw->stream); + D_FREE(dcw); + memset(ck, 0, sizeof(*ck)); +} diff --git a/src/utils/dlck/dlck_checker.h b/src/utils/dlck/dlck_checker.h new file mode 100644 index 00000000000..1fe663266e2 --- /dev/null +++ b/src/utils/dlck/dlck_checker.h @@ -0,0 +1,115 @@ +/** + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ + +#ifndef __DLCK_CHECKER__ +#define __DLCK_CHECKER__ + +#include + +#include + +#define DLCK_PRINT_INDENT '-' +#define DLCK_CHECKER_MAIN_MAGIC 0x17A28DC5626110A5 +#define DLCK_CHECKER_WORKER_MAGIC 0xEB4F7DD311060A6D + +/** + * \struct dlck_checker_worker + * + * Custom payload of the worker checker. + */ +struct dlck_checker_worker { + uint64_t magic; + FILE *stream; + char prefix[CHECKER_INDENT_MAX + 2]; /** ' ' and '\0' hence 2 characters */ +}; + +/** + * \struct dlck_checker_main + * + * Custom payload of the main checker. + */ +struct dlck_checker_main { + struct dlck_checker_worker core; + ABT_mutex stream_mutex; +}; + +#define DLCK_PRINT_MAIN_LOCK_FAIL_FMT \ + "Failed to lock the stream's synchronization mutex: " DF_RC "\n" +#define DLCK_PRINT_MAIN_UNLOCK_FAIL_FMT \ + "Failed to unlock the stream's synchronization mutex: " DF_RC "\n" + +/** + * \brief Init the main checker. + * + * Prints to stdout and it is guarded by a mutex. + * + * \param[out] dp Initialized checker. + * + * \retval DER_SUCCESS Success. + * \retval -DER_NOMEM Out of memory. + */ +int +dlck_checker_main_init(struct checker *ck); + +/** + * Finalize the main print utility. + * + * \param[in] ck Checker to finalize. + * + * \retval DER_SUCCESS Success. + * \retval -DER_* An error. + */ +int +dlck_checker_main_fini(struct checker *ck); + +/** + * Get the custom payload from the main print utility. + * + * \note Only for advance use-cases. Please see DLCK_PRINT*() macros first. + * + * \param[in] dp Print utility (only the main one will work). + * + * \return The custom payload. + */ +static inline struct dlck_checker_main * +dlck_checker_main_get_custom(struct checker *ck) +{ + struct dlck_checker_main *dcm = ck->ck_private; + D_ASSERT(dcm->core.magic == DLCK_CHECKER_MAIN_MAGIC); + return dcm; +} + +/** + * \brief Init a worker's checker. + * + * Creates and opens a logfile. The created checker will direct log into the created file. + * + * \param[in] options Control options. + * \param[in] log_dir Directory where a logfile will be created. + * \param[in] po_uuid Pool's UUID. + * \param[in] tgt_id Target's ID. + * \param[in] main_ck Main checker. To report errors when they occur. + * \param[out] ck Created checker. + * + * \retval DER_SUCCESS Success. + * \retval -DER_NOMEM Out of memory. + * \retval -DER_* Other error. + */ +int +dlck_checker_worker_init(struct checker_options *options, const char *log_dir, uuid_t po_uuid, + int tgt_id, struct checker *main_ck, struct checker *ck); + +/** + * \brief Finalize the worker's checker. + * + * \note The worker output stream will be closed. + * + * \param[in] ck Checker to finalize. + */ +void +dlck_checker_worker_fini(struct checker *ck); + +#endif /** __DLCK_CHECKER__ */ diff --git a/src/utils/dlck/dlck_cmds.h b/src/utils/dlck/dlck_cmds.h index 46f6521e05b..38a034baad6 100644 --- a/src/utils/dlck/dlck_cmds.h +++ b/src/utils/dlck/dlck_cmds.h @@ -7,17 +7,23 @@ #ifndef __DLCK_CMDS__ #define __DLCK_CMDS__ -enum dlck_cmd { - DLCK_CMD_NOT_SET = -2, - DLCK_CMD_UNKNOWN = -1, -}; +#include "dlck_args.h" -struct dlck_control; - -typedef int (*dlck_cmd_func)(struct dlck_control *ctrl); - -#define DLCK_CMDS_FUNCS \ - { \ - } +/** + * \brief Validate the integrity of the pool(s) metadata. + * + * The \p ctrl argument specifies which pool(s) to check and how the output will be printed. + * + * \param[in] ctrl Control bundle. + * + * \retval DER_SUCCESS All checked pools are ok. + * \retval -DER_DF_INVAL Durable format error. + * \retval -DER_DF_INCOMPT Incompatible durable format. + * \retval -DER_ID_MISMATCH Pool UUID mismatch. + * \retval -DER_NOTYPE Unexpected contents. + * \retval -DER_* Other errors. + */ +int +dlck_cmd_check(struct dlck_control *ctrl); #endif /** __DLCK_CMDS__ */ diff --git a/src/utils/dlck/dlck_engine.c b/src/utils/dlck/dlck_engine.c index 639b0598199..46670f3a443 100644 --- a/src/utils/dlck/dlck_engine.c +++ b/src/utils/dlck/dlck_engine.c @@ -419,6 +419,10 @@ dlck_engine_start(struct dlck_args_engine *args, struct dlck_engine **engine_ptr int tag = DAOS_SERVER_TAG - DAOS_TGT_TAG; int rc; + if (DAOS_FAIL_CHECK(DLCK_FAULT_ENGINE_START)) { /** fault injection */ + return daos_errno2der(daos_fail_value_get()); + } + rc = dlck_engine_alloc(args->targets, &engine); if (rc != DER_SUCCESS) { return rc; @@ -429,15 +433,10 @@ dlck_engine_start(struct dlck_args_engine *args, struct dlck_engine **engine_ptr goto fail_engine_free; } - rc = dlck_abt_init(engine); - if (rc != DER_SUCCESS) { - goto fail_engine_free; - } - rc = bio_nvme_init(args->nvme_conf, args->numa_node, args->max_dma_buf_size, args->nvme_hugepage_size, args->targets, bypass_health_chk); if (rc != DER_SUCCESS) { - goto fail_abt_fini; + goto fail_engine_free; } dss_register_key(&daos_srv_modkey); @@ -485,8 +484,6 @@ dlck_engine_start(struct dlck_args_engine *args, struct dlck_engine **engine_ptr dss_unregister_key(&vos_module_key); dss_unregister_key(&daos_srv_modkey); bio_nvme_fini(); -fail_abt_fini: - (void)dlck_abt_fini(engine); fail_engine_free: dlck_engine_free(engine); @@ -498,6 +495,15 @@ dlck_engine_stop(struct dlck_engine *engine) { int rc; + if (DAOS_FAIL_CHECK(DLCK_FAULT_ENGINE_STOP)) { /** fault injection */ + return daos_errno2der(daos_fail_value_get()); + } + + if (engine->join_fail) { + /** Cannot stop the engine in this case. It will probably crash. */ + return -DER_BUSY; + } + rc = xstream_stop_all(engine); if (rc != DER_SUCCESS) { /** not all execution streams were stopped - can't pull out other resources */ @@ -521,148 +527,228 @@ dlck_engine_stop(struct dlck_engine *engine) bio_nvme_fini(); - rc = dlck_abt_fini(engine); - dlck_engine_free(engine); return rc; } -int -dlck_engine_exec_all(struct dlck_engine *engine, dlck_ult_func exec_one, - arg_alloc_fn_t arg_alloc_fn, void *custom, arg_free_fn_t arg_free_fn) -{ +/** + * \struct dlck_exec + * + * Job batch. ULTs + their arguments + the free function to clean it all up. + */ +struct dlck_exec { struct dlck_ult *ults; void **ult_args; - int rc; - int rc2; + void *custom; + arg_free_fn_t arg_free_fn; +}; + +/** + * \brief Join all ULTs but ignore errors. No error returned neither. + * + * \note It is designed as a cleanup procedure in case of an error either while starting or stopping + * ULTs. + * + * \param[in] engine Engine to clean up. + * \param[in,out] de Execution to stop and cleanup after. + */ +static void +dlck_engine_join_all_no_error(struct dlck_engine *engine, struct dlck_exec *de) +{ + int rc; - D_ALLOC_ARRAY(ults, engine->targets); - if (ults == NULL) { + for (int i = 0; i < engine->targets; ++i) { + if (de->ults[i].thread != ABT_THREAD_NULL) { + rc = ABT_thread_join(de->ults[i].thread); + if (rc != ABT_SUCCESS) { + engine->join_fail = true; + /** + * the ULT did not join - can't free the thread nor free the + * arguments + */ + continue; + } + + (void)ABT_thread_free(&de->ults[i].thread); + } + (void)de->arg_free_fn(de->custom, &de->ult_args[i]); + } + + D_FREE(de->ult_args); + D_FREE(de->ults); +} + +/** + * Spawn an ULT on each of the targets execution stream. + * + * \param[in] engine Engine to run the created ULTs on. + * \param[in] exec_one Function to run in the ULTs. + * \param[in] arg_alloc_fn Function to allocate arguments for an ULT. + * \param[in,out] de Execution state to store the created resources. + * + * \retval DER_SUCCESS Success. + * \retval -DER_NOMEM Out of memory. + * \retval -DER_* Other error. + */ +static int +dlck_engine_targets_start(struct dlck_engine *engine, dlck_ult_func exec_one, + arg_alloc_fn_t arg_alloc_fn, struct dlck_exec *de) +{ + int rc = DER_SUCCESS; + + if (DAOS_FAIL_CHECK(DLCK_FAULT_ENGINE_EXEC)) { /** fault injection */ + return daos_errno2der(daos_fail_value_get()); + } + + D_ALLOC_ARRAY(de->ults, engine->targets); + if (de->ults == NULL) { return -DER_NOMEM; } - D_ALLOC_ARRAY(ult_args, engine->targets); - if (ult_args == NULL) { - D_FREE(ults); + D_ALLOC_ARRAY(de->ult_args, engine->targets); + if (de->ult_args == NULL) { + D_FREE(de->ults); return -DER_NOMEM; } for (int i = 0; i < engine->targets; ++i) { /** prepare arguments */ - rc = arg_alloc_fn(engine, i, custom, &ult_args[i]); + rc = arg_alloc_fn(engine, i, de->custom, &de->ult_args[i]); if (rc != DER_SUCCESS) { goto fail_join_and_free; } /** start an ULT */ - rc = dlck_ult_create(engine->xss[i].pool, exec_one, ult_args[i], &ults[i]); + rc = dlck_ult_create(engine->xss[i].pool, exec_one, de->ult_args[i], &de->ults[i]); if (rc != DER_SUCCESS) { goto fail_join_and_free; } } + return rc; + +fail_join_and_free: + dlck_engine_join_all_no_error(engine, de); + + return rc; +} + +/** + * Wait for all the target ULTs to conclude. + * + * \param[in] engine Engine where the ULTs run. + * \param[in] de Execution state to wait for and release. + * + * \retval DER_SUCCESS Success. + * \retval -DER_* Other error. + */ +static int +dlck_engine_targets_stop(struct dlck_engine *engine, struct dlck_exec *de) +{ + int rc = DER_SUCCESS; + + if (DAOS_FAIL_CHECK(DLCK_FAULT_ENGINE_JOIN)) { /** fault injection */ + engine->join_fail = true; + return daos_errno2der(daos_fail_value_get()); + } + for (int i = 0; i < engine->targets; ++i) { - rc = ABT_thread_join(ults[i].thread); + rc = ABT_thread_join(de->ults[i].thread); if (rc != ABT_SUCCESS) { rc = dss_abterr2der(rc); + engine->join_fail = true; goto fail_join_and_free; } - rc = ABT_thread_free(&ults[i].thread); + rc = ABT_thread_free(&de->ults[i].thread); if (rc != ABT_SUCCESS) { rc = dss_abterr2der(rc); goto fail_join_and_free; } - rc = arg_free_fn(custom, &ult_args[i]); + rc = de->arg_free_fn(de->custom, &de->ult_args[i]); if (rc != 0) { goto fail_join_and_free; } } - D_FREE(ult_args); - D_FREE(ults); + D_FREE(de->ult_args); + D_FREE(de->ults); - return DER_SUCCESS; + return rc; fail_join_and_free: - for (int i = 0; i < engine->targets; ++i) { - if (ults[i].thread != ABT_THREAD_NULL) { - rc2 = ABT_thread_join(ults[i].thread); - if (rc2 != ABT_SUCCESS) { - /** - * the ULT did not join - can't free the thread nor free the - * arguments - */ - continue; - } - } - (void)ABT_thread_free(&ults[i].thread); - (void)arg_free_fn(custom, &ult_args[i]); - } - - D_FREE(ult_args); - D_FREE(ults); + dlck_engine_join_all_no_error(engine, de); return rc; } +#define STOP_TGT_STR "Wait for targets to stop" + int -dlck_pool_open_safe(ABT_mutex mtx, const char *storage_path, uuid_t po_uuid, int tgt_id, - daos_handle_t *poh) +dlck_engine_exec_all(struct dlck_engine *engine, dlck_ult_func exec_one, + arg_alloc_fn_t arg_alloc_fn, void *custom, arg_free_fn_t arg_free_fn, + struct checker *ck) { - int rc; - int rc_abt; - - rc_abt = ABT_mutex_lock(mtx); - if (rc_abt != ABT_SUCCESS) { - return dss_abterr2der(rc_abt); - } - - rc = dlck_pool_open(storage_path, po_uuid, tgt_id, poh); + struct dlck_exec de = {0}; + int rc; - /** unlock ASAP */ - rc_abt = ABT_mutex_unlock(mtx); + /** initialize batch */ + de.arg_free_fn = arg_free_fn; + de.custom = custom; - /** code returned from the open operation takes precedence */ + CK_PRINT(ck, "Start targets... "); + rc = dlck_engine_targets_start(engine, exec_one, arg_alloc_fn, &de); + CK_APPENDL_OK(ck); if (rc != DER_SUCCESS) { return rc; } - /** unlock error is an error */ - if (rc_abt != ABT_SUCCESS) { - return dss_abterr2der(rc_abt); - } + CK_PRINT(ck, STOP_TGT_STR "...\n"); + rc = dlck_engine_targets_stop(engine, &de); + CK_PRINTL_RC(ck, rc, STOP_TGT_STR); - return DER_SUCCESS; + return rc; } int -dlck_pool_close_safe(ABT_mutex mtx, daos_handle_t poh) +dlck_engine_xstream_arg_alloc(struct dlck_engine *engine, int idx, void *ctrl_ptr, + void **output_arg) { - int rc; - int rc_abt; + struct xstream_arg *xa; - rc_abt = ABT_mutex_lock(mtx); - if (rc_abt != ABT_SUCCESS) { - return dss_abterr2der(rc_abt); + D_ALLOC_PTR(xa); + if (xa == NULL) { + return -DER_NOMEM; } - rc = vos_pool_close(poh); + xa->ctrl = ctrl_ptr; + xa->engine = engine; + xa->xs = &engine->xss[idx]; + xa->rc = DER_SUCCESS; - /** unlock ASAP */ - rc_abt = ABT_mutex_unlock(mtx); + *output_arg = xa; - /** code returned from the close operation takes precedence */ - if (rc != DER_SUCCESS) { - return rc; - } + return DER_SUCCESS; +} - /** unlock error is an error */ - if (rc_abt != ABT_SUCCESS) { - return dss_abterr2der(rc_abt); +int +dlck_engine_xstream_arg_free(void *ctrl_ptr, void **arg) +{ + struct dlck_control *ctrl = ctrl_ptr; + struct xstream_arg *xa = *arg; + int rc; + + if (xa == NULL) { + return DER_SUCCESS; } - return DER_SUCCESS; + rc = xa->rc; + dlck_uadd_no_overflow(ctrl->warnings_num, xa->warnings_num, &ctrl->warnings_num); + + D_FREE(*arg); + *arg = NULL; + + return rc; } diff --git a/src/utils/dlck/dlck_engine.h b/src/utils/dlck/dlck_engine.h index a77820930eb..d619e7b14bb 100644 --- a/src/utils/dlck/dlck_engine.h +++ b/src/utils/dlck/dlck_engine.h @@ -11,6 +11,14 @@ #include "dlck_args.h" +#if defined(__x86_64) || defined(_M_X64) || defined(__aarch64__) || defined(__riscv) +#define CACHELINE_SIZE 64ULL +#elif defined(__PPC64__) +#define CACHELINE_SIZE 128ULL +#else +#error unable to recognize architecture at compile time +#endif + struct dlck_ult { ABT_thread thread; }; @@ -29,7 +37,7 @@ struct dlck_xstream { struct dlck_engine { unsigned targets; struct dlck_xstream *xss; - ABT_mutex open_mtx; + bool join_fail; }; typedef void (*dlck_ult_func)(void *arg); @@ -53,6 +61,7 @@ dlck_engine_start(struct dlck_args_engine *args, struct dlck_engine **engine_ptr * \param[in] engine Engine to stop. * * \retval DER_SUCCESS Success. + * \retval -DER_BUSY Joining ULTs failed. Unrecoverable. * \retval -DER_* Errors. */ int @@ -83,28 +92,6 @@ dlck_engine_xstream_fini(struct dlck_xstream *xs); /** dlck_abt.c */ -/** - * Initialize ABT as it is about to be used by the \p engine. - * - * \param[out] engine Engine for which ABT is initialized for. - * - * \retval DER_SUCCESS Success. - * \retval -DER_* Error. - */ -int -dlck_abt_init(struct dlck_engine *engine); - -/** - * Finalize ABT for the \p engine. - * - * \param[in,out] engine Engine for which ABT is finalized for. - * - * \retval DER_SUCCESS Success. - * \retval -DER_* Error. - */ -int -dlck_abt_fini(struct dlck_engine *engine); - /** * Just create an ABT execution stream. * @@ -158,45 +145,95 @@ typedef int (*arg_free_fn_t)(void *custom, void **arg); * \param[in] arg_alloc_fn Function to allocate arguments for an ULT. * \param[in] custom Custom parameters for \p arg_alloc_fn and \p arg_free_fn function. * \param[in] arg_free_fn Function to free arguments. + * \param[in] checker Checker. * * \retval DER_SUCCESS Success. * \retval -DER_* Error. */ int dlck_engine_exec_all(struct dlck_engine *engine, dlck_ult_func exec_one, - arg_alloc_fn_t arg_alloc_fn, void *input_arg, arg_free_fn_t arg_free_fn); + arg_alloc_fn_t arg_alloc_fn, void *input_arg, arg_free_fn_t arg_free_fn, + struct checker *ck); + +#define DLCK_XSTREAM_PROGRESS_END UINT_MAX /** - * Open a pool but lock the \p mtx mutex first and unlock it after. Thread-safe. - * - * \param[in] mtx Mutex. - * \param[in] storage_path Storage path. - * \param[in] po_uuid Pool UUID. - * \param[in] tgt_id Target ID. - * \param[out] poh Pool handle. - * - * \retval DER_SUCCESS Success. - * \retval -DER_NOMEM Out of memory. - * \retval -DER_NO_PERM Permission problem. Please see open(3) and fallocate(2). - * \retval -DER_EXIST The file already exists. Please see open(3). - * \retval -DER_NONEXIST The file does not exist. Please see open(3). - * \retval -DER_NOSPACE There is not enough space left on the device. - * \retval -DER_* Possibly other errors. + * @struct xstream_arg + * + * Arguments passed to the main ULT on each of the execution streams. + */ +struct xstream_arg { + /** in */ + struct dlck_control *ctrl; /** Control state. */ + struct dlck_engine *engine; /** Engine itself. */ + struct dlck_xstream *xs; /** The execution stream the ULT is run in. */ + /** out */ + volatile unsigned progress __attribute__((__aligned__(CACHELINE_SIZE))); + int rc; /** return code */ + unsigned warnings_num; +}; + +static inline void +dlck_xstream_set_rc(struct xstream_arg *xa, int rc) +{ + if (rc == DER_SUCCESS) { + return; + } + + /** do not overwrite the first error found */ + if (xa->rc == DER_SUCCESS) { + xa->rc = rc; + } +} + +static inline void +dlck_uadd_no_overflow(unsigned a, unsigned b, unsigned *result) +{ + /** safeguard against integer overflow */ + if (__builtin_uadd_overflow(a, b, result)) { + *result = UINT_MAX; + } +} + +/** + * Allocate arguments for a ULT. + * + * \param[in] engine Engine the ULT is about to be run in. + * \param[in] idx ULT ID. + * \param[in] ctrl_ptr Control state to be passed to the ULT. + * \param[out] output_arg Allocated argument for the ULT. + * + * \retval DER_SUCCESS Success. + * \retval -DER_NOMEM Out of memory. */ int -dlck_pool_open_safe(ABT_mutex mtx, const char *storage_path, uuid_t po_uuid, int tgt_id, - daos_handle_t *poh); +dlck_engine_xstream_arg_alloc(struct dlck_engine *engine, int idx, void *ctrl_ptr, + void **output_arg); /** - * Close a pool but lock the \p mtx mutex first and unlock it after. Thread-safe. + * Free arguments of a ULT. * - * \param[in] mtx Mutex. - * \param[in] poh Pool handle. + * \param[out] ctrl_ptr Control state to collect stats in. + * \param[in,out] arg ULT arguments to process and free. * - * \retval DER_SUCCESS Success. - * \retval -DER_INVAL Issues with \p mtx. + * \return The return code for the ULT. */ int -dlck_pool_close_safe(ABT_mutex mtx, daos_handle_t poh); +dlck_engine_xstream_arg_free(void *ctrl_ptr, void **arg); + +/** + * Read the progress of the given execution stream \p xa. + * + * \param[in] xa Execution stream. + * \param[out] progress Progress read from \p xa. + * + * \retval DER_SUCCESS Success. + * \retval -DER_INVAL Invalid mutex. + */ +static inline void +dlck_xstream_progress_get(struct xstream_arg *xa, unsigned *progress) +{ + *progress = xa->progress; +} #endif /** __DLCK_ENGINE__ */ diff --git a/src/utils/dlck/dlck_main.c b/src/utils/dlck/dlck_main.c index 7d957021fd8..b75c7981755 100644 --- a/src/utils/dlck/dlck_main.c +++ b/src/utils/dlck/dlck_main.c @@ -3,33 +3,83 @@ * * SPDX-License-Identifier: BSD-2-Clause-Patent */ +#define D_LOGFAC DD_FAC(dlck) #include #include +#include +#include +#include +#include #include -#include "dlck_cmds.h" #include "dlck_args.h" - -static const dlck_cmd_func dlck_cmds[] = DLCK_CMDS_FUNCS; +#include "dlck_checker.h" +#include "dlck_cmds.h" int main(int argc, char *argv[]) { struct dlck_control ctrl = {0}; + int rc_abt; int rc; + rc = d_fault_inject_init(); + if (rc != DER_SUCCESS && rc != -DER_NOSYS) { + return rc; + } + + if (d_fault_inject_is_enabled()) { + /** an errno value the fault injection will trigger */ + daos_fail_value_set(EINVAL); + } + dlck_args_parse(argc, argv, &ctrl); - D_ASSERT(ctrl.common.cmd < ARRAY_SIZE(dlck_cmds)); - D_ASSERT(ctrl.common.cmd >= 0); + rc_abt = ABT_init(0, NULL); + if (rc_abt != ABT_SUCCESS) { + rc = dss_abterr2der(rc_abt); + goto err_args_free; + } + + rc = dlck_checker_main_init(&ctrl.checker); + if (rc != DER_SUCCESS) { + goto err_abt_fini; + } - ctrl.print.dp_printf = printf; + rc = dlck_cmd_check(&ctrl); + if (rc != DER_SUCCESS) { + goto err_print_main_fini; + } - rc = dlck_cmds[ctrl.common.cmd](&ctrl); + rc = dlck_checker_main_fini(&ctrl.checker); + if (rc != DER_SUCCESS) { + goto err_abt_fini; + } + + rc_abt = ABT_finalize(); + if (rc_abt != ABT_SUCCESS) { + rc = dss_abterr2der(rc_abt); + goto err_args_free; + } + + dlck_args_free(&ctrl); + + rc = d_fault_inject_fini(); + if (rc == -DER_NOSYS) { + rc = DER_SUCCESS; + } + + return rc; +err_print_main_fini: + (void)dlck_checker_main_fini(&ctrl.checker); +err_abt_fini: + (void)ABT_finalize(); +err_args_free: dlck_args_free(&ctrl); + (void)d_fault_inject_fini(); return rc; } diff --git a/src/utils/dlck/dlck_pool.c b/src/utils/dlck/dlck_pool.c index 937d372ed12..a766d1ed292 100644 --- a/src/utils/dlck/dlck_pool.c +++ b/src/utils/dlck/dlck_pool.c @@ -15,7 +15,7 @@ #include "dlck_pool.h" int -dlck_pool_mkdir(const char *storage_path, uuid_t po_uuid) +dlck_pool_mkdir(const char *storage_path, uuid_t po_uuid, struct checker *ck) { char po_uuid_str[UUID_STR_LEN]; char *path; @@ -28,13 +28,37 @@ dlck_pool_mkdir(const char *storage_path, uuid_t po_uuid) return -DER_NOMEM; } - rc = mkdir(path, 0777); - D_FREE(path); + if (DAOS_FAIL_CHECK(DLCK_FAULT_CREATE_POOL_DIR)) { + errno = daos_fail_value_get(); + rc = -1; + } else { + rc = mkdir(path, 0777); + } if (rc != 0 && errno != EEXIST) { - return daos_errno2der(errno); + rc = daos_errno2der(errno); + CK_PRINTFL_RC(ck, rc, "Cannot create a pool directory: %s", path); } else { - return DER_SUCCESS; + rc = DER_SUCCESS; + } + + D_FREE(path); + return rc; +} + +int +dlck_pool_mkdir_all(const char *storage_path, d_list_t *files, struct checker *ck) +{ + struct dlck_file *file; + int rc; + + d_list_for_each_entry(file, files, link) { + rc = dlck_pool_mkdir(storage_path, file->po_uuid, ck); + if (rc != DER_SUCCESS) { + return rc; + } } + + return DER_SUCCESS; } static int @@ -59,15 +83,11 @@ int dlck_pool_open(const char *storage_path, uuid_t po_uuid, int tgt_id, daos_handle_t *poh) { char *path; - char po_uuid_str[UUID_STR_LEN]; - const unsigned int flags = VOS_POF_EXCL | VOS_POF_FOR_FEATURE_FLAG; int rc; - uuid_unparse(po_uuid, po_uuid_str); - - D_ASPRINTF(path, "%s/%s/" VOS_FILE "%d", storage_path, po_uuid_str, tgt_id); - if (path == NULL) { - return -DER_NOMEM; + rc = ds_mgmt_file(storage_path, po_uuid, VOS_FILE, &tgt_id, &path); + if (rc != DER_SUCCESS) { + return rc; } /** no MD-on-SSD mode means no file preallocation is necessary */ @@ -78,7 +98,7 @@ dlck_pool_open(const char *storage_path, uuid_t po_uuid, int tgt_id, daos_handle } } - rc = vos_pool_open(path, po_uuid, flags, poh); + rc = vos_pool_open(path, po_uuid, DLCK_POOL_OPEN_FLAGS, poh); fail: D_FREE(path); @@ -121,3 +141,60 @@ dlck_pool_cont_list(daos_handle_t poh, d_list_t *co_uuids) return vos_iterate(¶m, VOS_ITER_COUUID, false, &anchors, cont_list_append, NULL, co_uuids, NULL); } + +int +dlck_pool_list(d_list_t *file_list) +{ + D_LIST_HEAD(pool_list); + int pool_cnt = 0; + struct smd_pool_info *pool_info = NULL; + struct smd_pool_info *tmp; + struct dlck_file *file = NULL; + struct dlck_file *file_tmp; + int rc; + + D_ASSERT(d_list_empty(file_list)); + + /** get the list of pools */ + rc = smd_pool_list(&pool_list, &pool_cnt); + if (rc != DER_SUCCESS) { + return rc; + } + + d_list_for_each_entry_safe(pool_info, tmp, &pool_list, spi_link) { + /** allocate a new file */ + D_ALLOC_PTR(file); + if (file == NULL) { + rc = -DER_NOMEM; + goto err; + } + + /** populate and append the file */ + uuid_copy(file->po_uuid, pool_info->spi_id); + file->targets_bitmap = -1; /** all targets by default */ + d_list_add(&file->link, file_list); + + /** remove the pool from the list and free it */ + d_list_del(&pool_info->spi_link); + smd_pool_free_info(pool_info); + } + + return DER_SUCCESS; + +err: + /** free the list of files */ + d_list_for_each_entry_safe(file, file_tmp, file_list, link) { + d_list_del(&file->link); + D_FREE(file); + } + D_ASSERT(d_list_empty(file_list)); + + /** free the list of pools */ + d_list_for_each_entry_safe(pool_info, tmp, &pool_list, spi_link) { + d_list_del(&pool_info->spi_link); + smd_pool_free_info(pool_info); + pool_info = NULL; + } + + return rc; +} diff --git a/src/utils/dlck/dlck_pool.h b/src/utils/dlck/dlck_pool.h index 384f29e02ba..882cd3bf0f6 100644 --- a/src/utils/dlck/dlck_pool.h +++ b/src/utils/dlck/dlck_pool.h @@ -11,21 +11,39 @@ #include "dlck_args.h" +#define DLCK_POOL_OPEN_FLAGS (VOS_POF_EXCL | VOS_POF_FOR_FEATURE_FLAG) + /** * Create a directory for the pool. * * \param[in] storage_path Storage path. * \param[in] po_uuid Pool UUID. + * \param[in] ck Checker. * * \retval DER_SUCCESS Success. * \retval -DER_NOMEM Out of memory. * \retval -DER_NO_PERM Permission problem. Please see mkdir(2). - * \retval -DER_EXIST Directory already exists. * \retval -DER_NONEXIST A component of the \p storage_path does not exist. * \retval -DER_* Possibly other errors. */ int -dlck_pool_mkdir(const char *storage_path, uuid_t po_uuid); +dlck_pool_mkdir(const char *storage_path, uuid_t po_uuid, struct checker *ck); + +/** + * Create pool directories for all \p files provided. + * + * \param[in] storage_path Engine the ULT is about to be run in. + * \param[in] files List of files. + * \param[in] ck Checker. + * + * \retval DER_SUCCESS Success. + * \retval -DER_NOMEM Out of memory. + * \retval -DER_NO_PERM Permission problem. Please see mkdir(2). + * \retval -DER_NONEXIST A component of the \p storage_path does not exist. + * \retval -DER_* Possibly other errors but not -DER_EXIST. + */ +int +dlck_pool_mkdir_all(const char *storage_path, d_list_t *files, struct checker *ck); /** * Open a pool. @@ -66,4 +84,16 @@ struct co_uuid_list_elem { int dlck_pool_cont_list(daos_handle_t poh, d_list_t *co_uuids); +/** + * Add all files (pool UUIDs + all targets bitmap) to \p file_list. + * + * \param[out] file_list List of all files belonging to the given DAOS engine. + * + * \retval DER_SUCCESS Success. + * \retval -DER_NOMEM Out of memory. + * \retval -DER_* Possibly other errors. + */ +int +dlck_pool_list(d_list_t *file_list); + #endif /** __DLCK_POOL__ */ diff --git a/src/utils/dlck/dlck_report.c b/src/utils/dlck/dlck_report.c new file mode 100644 index 00000000000..99b82a9dc7c --- /dev/null +++ b/src/utils/dlck/dlck_report.c @@ -0,0 +1,61 @@ +/** + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ +#define D_LOGFAC DD_FAC(dlck) + +#include +#include + +#include +#include + +#include "dlck_checker.h" +#include "dlck_report.h" + +#define DLCK_PROGRESS_LINE_LEN 32 + +/** + * Produce and provide a simple separator: + * + * ======== + */ +static inline char * +get_separator() +{ + static char separator[DLCK_PROGRESS_LINE_LEN] = {0}; + static bool initialized = false; + + if (unlikely(!initialized)) { + memset(separator, '=', DLCK_PROGRESS_LINE_LEN); + initialized = true; + } + + return separator; +} + +#define DLCK_PRINT_SEPARATOR(ck) CK_PRINTF(ck, "%s\n", get_separator()) + +/** + * \note This function is called when no other threads are running in parallel. No locks are + * necessary. + */ +void +dlck_report_results(int *rcs, unsigned targets, unsigned warnings_num, struct checker *ck) +{ + /** print header */ + DLCK_PRINT_SEPARATOR(ck); + CK_PRINT(ck, "Targets:\n"); + DLCK_PRINT_SEPARATOR(ck); + + /** print records */ + for (int i = 0; i < targets; ++i) { + CK_PRINTFL_RC(ck, rcs[i], "[%d] result", i); + } + + /** print footer */ + DLCK_PRINT_SEPARATOR(ck); + CK_PRINTF(ck, "Total: %u warning(s).\n", warnings_num); + DLCK_PRINT_SEPARATOR(ck); +} diff --git a/src/utils/dlck/dlck_report.h b/src/utils/dlck/dlck_report.h new file mode 100644 index 00000000000..c1a1c3da665 --- /dev/null +++ b/src/utils/dlck/dlck_report.h @@ -0,0 +1,25 @@ +/** + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ + +#ifndef __DLCK_REPORT__ +#define __DLCK_REPORT__ + +/** + * Report targets' results. + * + * \param[in] rcs Array of return codes for all targets. + * \param[in] targets Number of targets. + * \param[in] warnings_num Number of warnings. + * \param[in] dp Main print utility. + * + * \retval DER_SUCCESS Success. + * \retval -DER_MISC Printing error. + * \retval -DER_* Other errors. + */ +void +dlck_report_results(int *rcs, unsigned targets, unsigned warnings_num, struct checker *ck); + +#endif /** __DLCK_REPORT__ */ diff --git a/src/utils/dlck/tests/dlck_test_helper.c b/src/utils/dlck/tests/dlck_test_helper.c index 74d83af0b45..cd2dd268b9a 100644 --- a/src/utils/dlck/tests/dlck_test_helper.c +++ b/src/utils/dlck/tests/dlck_test_helper.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include @@ -23,6 +22,7 @@ #include #include "../dlck_args.h" +#include "../dlck_bitmap.h" #include "../dlck_engine.h" #include "../dlck_pool.h" @@ -263,13 +263,13 @@ exec_one(void *arg) } d_list_for_each_entry(file, &xst->args_files->list, link) { - /** do not process the given file if the target is excluded */ - if ((file->targets_bitmap & (1 << xst->xs->tgt_id)) == 0) { + /** do not process the given file if the target is not requested */ + if (dlck_bitmap_isclr32(file->targets_bitmap, xst->xs->tgt_id)) { continue; } - rc = dlck_pool_open_safe(xst->engine->open_mtx, xst->args_engine->storage_path, - file->po_uuid, xst->xs->tgt_id, &xst->poh); + rc = dlck_pool_open(xst->args_engine->storage_path, file->po_uuid, xst->xs->tgt_id, + &xst->poh); if (rc != DER_SUCCESS) { xst->rc = rc; break; @@ -277,7 +277,7 @@ exec_one(void *arg) cont_process(xst, xst->co_uuid); - rc = dlck_pool_close_safe(xst->engine->open_mtx, xst->poh); + rc = vos_pool_close(xst->poh); if (rc != DER_SUCCESS) { xst->rc = rc; break; @@ -374,23 +374,38 @@ static struct argp argp = {NULL, parser, NULL /** usage */, NULL, children}; static int setup(struct dlck_helper_args *args, struct bundle *bundle) { - struct dlck_file *file; struct dlck_engine *engine; unsigned int seed = SRAND_SEED; int rc; + int rc_abt; - /** prepare pool storage directories */ - d_list_for_each_entry(file, &args->files.list, link) { - rc = dlck_pool_mkdir(args->engine.storage_path, file->po_uuid); - assert_int_equal(rc, DER_SUCCESS); + rc_abt = ABT_init(0, NULL); + if (rc_abt != ABT_SUCCESS) { + rc = dss_abterr2der(rc_abt); + return rc; } /** start an engine */ rc = dlck_engine_start(&args->engine, &engine); if (rc != DER_SUCCESS) { + (void)ABT_finalize(); return rc; } + if (d_list_empty(&args->files.list)) { + /** no files specified means all files are requested */ + rc = dlck_pool_list(&args->files.list); + if (rc != DER_SUCCESS) { + goto fail_engine_stop; + } + } + + /** prepare pool storage directories */ + rc = dlck_pool_mkdir_all(args->engine.storage_path, &args->files.list, NULL); + if (rc != DER_SUCCESS) { + goto fail_engine_stop; + } + D_ALLOC_ARRAY(bundle->co_uuids, args->engine.targets); if (bundle->co_uuids == NULL) { rc = -DER_NOMEM; @@ -413,12 +428,14 @@ setup(struct dlck_helper_args *args, struct bundle *bundle) fail_engine_stop: (void)dlck_engine_stop(engine); + (void)ABT_finalize(); return rc; } static int teardown(struct bundle *bundle) { + int rc_abt; int rc; dss_unregister_key(dtx_module.sm_key); @@ -426,6 +443,15 @@ teardown(struct bundle *bundle) D_FREE(bundle->co_uuids); rc = dlck_engine_stop(bundle->engine); + if (rc != DER_SUCCESS) { + (void)ABT_finalize(); + return rc; + } + + rc_abt = ABT_finalize(); + if (rc_abt != ABT_SUCCESS) { + rc = dss_abterr2der(rc_abt); + } return rc; } @@ -445,7 +471,7 @@ main(int argc, char **argv) goto fail_args_free; } - rc = dlck_engine_exec_all(bundle.engine, exec_one, arg_alloc, &bundle, arg_free); + rc = dlck_engine_exec_all(bundle.engine, exec_one, arg_alloc, &bundle, arg_free, NULL); if (rc != DER_SUCCESS) { goto fail_teardown; } diff --git a/src/utils/dlck/tests/fault_injection_dlck.yaml b/src/utils/dlck/tests/fault_injection_dlck.yaml new file mode 100644 index 00000000000..36cb8095976 --- /dev/null +++ b/src/utils/dlck/tests/fault_injection_dlck.yaml @@ -0,0 +1,33 @@ +# Uncomment a fault you would like to trigger +fault_config: + # - id: 131328 # DLCK_FAULT_CREATE_LOG_DIR + # - id: 131329 # DLCK_FAULT_CREATE_POOL_DIR + # - id: 131330 # DLCK_FAULT_ENGINE_START + # - id: 131331 # DLCK_FAULT_ENGINE_EXEC + # - id: 131332 # DLCK_FAULT_ENGINE_JOIN + # - id: 131333 # DLCK_FAULT_ENGINE_STOP + # - id: 131584 # DAOS_FAULT_POOL_NVME_HEALTH + # interval: 2 # skip sys_db + # - id: 131585 # DAOS_FAULT_POOL_OPEN_BIO + # - id: 131586 # DAOS_FAULT_POOL_OPEN_UMEM + # interval: 2 # skip sys_db + # - id: 131587 # DAOS_FAULT_POOL_OPEN_MAGIC + # interval: 2 # skip sys_db + # - id: 131588 # DAOS_FAULT_POOL_OPEN_VERSION + # interval: 2 # skip sys_db + # - id: 131589 # DAOS_FAULT_POOL_OPEN_UUID + # interval: 2 # skip sys_db + # - id: 131590 # DAOS_FAULT_BTREE_OPEN_INV_CLASS + # interval: 28 # containers tree fine-tuned; note: -t 1 + # interval: 29 # gc tree fine-tuned; note: -t 1 + # max_faults: 1 + # - id: 131591 # DAOS_FAULT_BTREE_OPEN_UNREG_CLASS + # interval: 28 # containers tree fine-tuned; note: -t 1 + # interval: 29 # gc tree fine-tuned; note: -t 1 + # max_faults: 1 + # - id: 131592 # DAOS_FAULT_BTREE_FEATURES + # interval: 28 # containers tree fine-tuned; note: -t 1 + # interval: 29 # gc tree fine-tuned; note: -t 1 + # max_faults: 1 + # - id: 131593 # DAOS_FAULT_POOL_EXT_PADDING + # - id: 131594 # DAOS_FAULT_POOL_EXT_RESERVED diff --git a/src/vos/vos_gc.c b/src/vos/vos_gc.c index dc76f95297a..7726bb05bf1 100644 --- a/src/vos/vos_gc.c +++ b/src/vos/vos_gc.c @@ -1492,14 +1492,31 @@ gc_close_bkt(struct vos_gc_info *gc_info) gc_info->gi_last_pinned = UMEM_DEFAULT_MBKT_ID; } +#define CK_GC_TREE_STR "Garbage collector's tree" + static inline int -gc_open_bkt(struct umem_attr *uma, struct vos_gc_bkt_df *bkt_df, struct vos_gc_info *gc_info) +gc_open_bkt(struct umem_attr *uma, struct vos_gc_bkt_df *bkt_df, struct checker *ck, + struct vos_gc_info *gc_info) { - int rc; + const bool error_on_non_zero_padding = + (IS_CHECKER(ck) ? (ck->ck_options.cko_non_zero_padding == CHECKER_EVENT_ERROR) : false); + int rc; + + if (IS_CHECKER(ck)) { + CK_PRINT(ck, CK_GC_TREE_STR "...\n"); + CK_INDENT(ck, rc = dbtree_check_inplace(&bkt_df->gd_bins_root, uma, ck_report, ck, + error_on_non_zero_padding)); + CK_PRINTL_RC(ck, rc, CK_GC_TREE_STR); + if (rc != DER_SUCCESS) { + return rc; + } + } rc = dbtree_open_inplace(&bkt_df->gd_bins_root, uma, &gc_info->gi_bins_btr); - if (rc) + if (rc) { DL_ERROR(rc, "Failed to open GC bin tree."); + } + return rc; } @@ -1509,13 +1526,61 @@ gc_close_pool(struct vos_pool *pool) return gc_close_bkt(&pool->vp_gc_info); } +#define CK_NON_ZERO_PADDING_FMT "non-zero padding[%d] (%#" PRIx64 ")" +#define CK_NON_ZERO_RESERVED_FMT "non-zero reserved space (%#" PRIx64 ")" + +static int +dlck_pd_ext_check(struct vos_pool_ext_df *pd_ext, umem_off_t off, struct checker *ck) +{ + CK_PRINTF(ck, "Pool extension (off=%#lx)... ", off); + + if (pd_ext == NULL) { + CK_APPENDL_OK(ck); + return DER_SUCCESS; + } + + for (int i = 0; i < VOS_POOL_EXT_DF_PADDING_SIZE; ++i) { + if (pd_ext->ped_paddings[i] != 0 || DAOS_FAIL_CHECK(DAOS_FAULT_POOL_EXT_PADDING)) { + if (ck->ck_options.cko_non_zero_padding == CHECKER_EVENT_ERROR) { + CK_APPENDFL_ERR(ck, CK_NON_ZERO_PADDING_FMT, i, + pd_ext->ped_paddings[i]); + return -DER_NOTYPE; + } else { + CK_APPENDFL_WARN(ck, CK_NON_ZERO_PADDING_FMT, i, + pd_ext->ped_paddings[i]); + } + } + } + + if (pd_ext->ped_reserve != 0 || DAOS_FAIL_CHECK(DAOS_FAULT_POOL_EXT_RESERVED)) { + if (ck->ck_options.cko_non_zero_padding == CHECKER_EVENT_ERROR) { + CK_APPENDFL_ERR(ck, CK_NON_ZERO_RESERVED_FMT, pd_ext->ped_reserve); + return -DER_NOTYPE; + } else { + CK_APPENDFL_WARN(ck, CK_NON_ZERO_RESERVED_FMT, pd_ext->ped_reserve); + } + } + + CK_APPENDL_OK(ck); + + return DER_SUCCESS; +} + int -gc_open_pool(struct vos_pool *pool) +gc_open_pool(struct vos_pool *pool, struct checker *ck) { - struct vos_pool_ext_df *pd_ext = umem_off2ptr(&pool->vp_umm, pool->vp_pool_df->pd_ext); + struct vos_pool_ext_df *pd_ext = umem_off2ptr(&pool->vp_umm, pool->vp_pool_df->pd_ext); + int rc; + + if (IS_CHECKER(ck)) { + rc = dlck_pd_ext_check(pd_ext, pool->vp_pool_df->pd_ext, ck); + if (rc != DER_SUCCESS) { + return rc; + } + } if (pd_ext != NULL) - return gc_open_bkt(&pool->vp_uma, &pd_ext->ped_gc_bkt, &pool->vp_gc_info); + return gc_open_bkt(&pool->vp_uma, &pd_ext->ped_gc_bkt, ck, &pool->vp_gc_info); return 0; } @@ -1533,7 +1598,7 @@ gc_open_cont(struct vos_container *cont) struct vos_cont_ext_df *cd_ext = umem_off2ptr(&pool->vp_umm, cont->vc_cont_df->cd_ext); if (cd_ext != NULL) - return gc_open_bkt(&pool->vp_uma, &cd_ext->ced_gc_bkt, &cont->vc_gc_info); + return gc_open_bkt(&pool->vp_uma, &cd_ext->ced_gc_bkt, NULL, &cont->vc_gc_info); return 0; } diff --git a/src/vos/vos_internal.h b/src/vos/vos_internal.h index 8c4a3e800be..832624eba6a 100644 --- a/src/vos/vos_internal.h +++ b/src/vos/vos_internal.h @@ -1458,7 +1458,7 @@ vos_gc_pool_tight(daos_handle_t poh, int *credits); void gc_reserve_space(struct vos_pool *pool, daos_size_t *rsrvd); int -gc_open_pool(struct vos_pool *pool); +gc_open_pool(struct vos_pool *pool, struct checker *ck); void gc_close_pool(struct vos_pool *pool); int diff --git a/src/vos/vos_layout.h b/src/vos/vos_layout.h index b13790fe74f..aaae854327b 100644 --- a/src/vos/vos_layout.h +++ b/src/vos/vos_layout.h @@ -116,6 +116,8 @@ struct vos_gc_bkt_df { /** 2.8 features */ #define VOS_POOL_FEAT_2_8 (VOS_POOL_FEAT_GANG_SV) +#define VOS_POOL_EXT_DF_PADDING_SIZE 53 + /* VOS pool durable format extension */ struct vos_pool_ext_df { /* Extension for GC bucket */ @@ -123,7 +125,7 @@ struct vos_pool_ext_df { /* Memory file size for md-on-ssd phase2 pool */ uint64_t ped_mem_sz; /* Paddings for other potential new feature */ - uint64_t ped_paddings[53]; + uint64_t ped_paddings[VOS_POOL_EXT_DF_PADDING_SIZE]; /* Reserved for future extension */ uint64_t ped_reserve; }; diff --git a/src/vos/vos_pool.c b/src/vos/vos_pool.c index ad558c62fd8..96d516b32dd 100644 --- a/src/vos/vos_pool.c +++ b/src/vos/vos_pool.c @@ -1018,9 +1018,11 @@ vos_pmemobj_create(const char *path, uuid_t pool_id, const char *layout, return rc; } +#define BIO_META_CLOSE_FAIL_STR "Failed to close BIO meta context" + static int vos_pmemobj_open(const char *path, uuid_t pool_id, const char *layout, unsigned int flags, - void *metrics, struct umem_pool **ph) + void *metrics, struct checker *ck, struct umem_pool **ph) { struct bio_xs_context *xs_ctxt = vos_xsctxt_get(); struct umem_store store = { 0 }; @@ -1047,6 +1049,7 @@ vos_pmemobj_open(const char *path, uuid_t pool_id, const char *layout, unsigned xs_ctxt, DP_UUID(pool_id)); rc = bio_mc_open(xs_ctxt, pool_id, mc_flags, &mc); + CK_PRINTL_RC(ck, rc, "Open BIO meta context"); if (rc) { D_ERROR("Failed to open BIO meta context for xs:%p pool:"DF_UUID", "DF_RC"\n", xs_ctxt, DP_UUID(pool_id), DP_RC(rc)); @@ -1058,17 +1061,21 @@ vos_pmemobj_open(const char *path, uuid_t pool_id, const char *layout, unsigned umem_open: pop = umempobj_open(path, layout, UMEMPOBJ_ENABLE_STATS, &store); + rc = (pop == NULL) ? daos_errno2der(errno) : DER_SUCCESS; + CK_PRINTL_RC(ck, rc, "Open the pool"); if (pop != NULL) { *ph = pop; return 0; } - rc = daos_errno2der(errno); + D_ASSERT(rc != 0); if (store.stor_priv != NULL) { ret = bio_mc_close(store.stor_priv); - if (ret) - D_ERROR("Failed to close BIO meta context. "DF_RC"\n", DP_RC(ret)); + if (ret) { + CK_PRINTL_RC(ck, ret, BIO_META_CLOSE_FAIL_STR); + D_ERROR(BIO_META_CLOSE_FAIL_STR ". " DF_RC "\n", DP_RC(ret)); + } } return rc; @@ -1317,7 +1324,7 @@ pool_open_prep(uuid_t uuid, unsigned int flags, struct vos_pool **p_pool); static int pool_open_post(struct umem_pool **p_ph, struct vos_pool_df *pool_df, unsigned int flags, - void *metrics, struct vos_pool *pool, int ret); + void *metrics, struct vos_pool *pool, struct checker *ck, int ret); int vos_pool_create_ex(const char *path, uuid_t uuid, daos_size_t scm_sz, daos_size_t nvme_sz, @@ -1488,7 +1495,7 @@ vos_pool_create_ex(const char *path, uuid_t uuid, daos_size_t scm_sz, daos_size_ post: if (rc == 0 && poh != NULL) { - rc = pool_open_post(&ph, pool_df, flags, NULL, pool, rc); + rc = pool_open_post(&ph, pool_df, flags, NULL, pool, NULL, rc); if (rc == 0) *poh = vos_pool2hdl(pool); } else { @@ -1701,11 +1708,15 @@ pool_open_prep(uuid_t uuid, unsigned int flags, struct vos_pool **p_pool) return rc; } +#define CK_CONT_TREE_STR "Containers tree" + static int pool_open_post(struct umem_pool **p_ph, struct vos_pool_df *pool_df, unsigned int flags, - void *metrics, struct vos_pool *pool, int ret) + void *metrics, struct vos_pool *pool, struct checker *ck, int ret) { struct umem_attr *uma; + const bool error_on_non_zero_padding = + (IS_CHECKER(ck) ? (ck->ck_options.cko_non_zero_padding == CHECKER_EVENT_ERROR) : false); daos_handle_t poh; int rc; @@ -1741,6 +1752,16 @@ pool_open_post(struct umem_pool **p_ph, struct vos_pool_df *pool_df, unsigned in goto out; } + if (IS_CHECKER(ck)) { + CK_PRINT(ck, CK_CONT_TREE_STR "...\n"); + CK_INDENT(ck, rc = dbtree_check_inplace(&pool_df->pd_cont_root, &pool->vp_uma, + ck_report, ck, error_on_non_zero_padding)); + CK_PRINTL_RC(ck, rc, CK_CONT_TREE_STR); + if (rc != DER_SUCCESS) { + goto out; + } + } + /* Cache container table btree hdl */ rc = dbtree_open_inplace_ex(&pool_df->pd_cont_root, &pool->vp_uma, DAOS_HDL_INVAL, pool, &pool->vp_cont_th); @@ -1781,7 +1802,7 @@ pool_open_post(struct umem_pool **p_ph, struct vos_pool_df *pool_df, unsigned in if (rc) goto out; - rc = gc_open_pool(pool); + rc = gc_open_pool(pool, ck); if (rc) goto out; @@ -1811,7 +1832,7 @@ pool_open_post(struct umem_pool **p_ph, struct vos_pool_df *pool_df, unsigned in int vos_pool_open_metrics(const char *path, uuid_t uuid, unsigned int flags, void *metrics, - daos_handle_t *poh) + struct checker *ck, daos_handle_t *poh) { struct vos_pool_df *pool_df = NULL; struct vos_pool *pool = NULL; @@ -1830,6 +1851,10 @@ vos_pool_open_metrics(const char *path, uuid_t uuid, unsigned int flags, void *m return -DER_NOTSUPPORTED; } + /** header with parameters */ + CK_PRINTF(ck, "Check pool:\n\tpath: %s\n\tuuid: " DF_UUIDF "\n", path, DP_UUID(uuid)); + checker_print_indent_inc(ck); + D_DEBUG(DB_MGMT, "Pool Path: %s, UUID: "DF_UUID"\n", path, DP_UUID(uuid)); @@ -1840,7 +1865,8 @@ vos_pool_open_metrics(const char *path, uuid_t uuid, unsigned int flags, void *m rc = pool_lookup(&ukey, &pool, true); if (rc == 0) { - D_ASSERT(pool != NULL); + CK_ASSERT(ck, "Pool is not NULL... ", pool != NULL); + CK_PRINT(ck, "Pool is already opened.\n"); D_DEBUG(DB_MGMT, "Found already opened(%d) pool : %p\n", pool->vp_opened, pool); if (pool->vp_dying) { @@ -1863,12 +1889,13 @@ vos_pool_open_metrics(const char *path, uuid_t uuid, unsigned int flags, void *m return rc; rc = bio_xsctxt_health_check(vos_xsctxt_get(), false, false); + CK_PRINTL_RC(ck, rc, "NVMe devices (if applicable)"); if (rc) { DL_WARN(rc, DF_UUID": Skip pool open due to faulty NVMe.", DP_UUID(uuid)); goto out; } - rc = vos_pmemobj_open(path, uuid, VOS_POOL_LAYOUT, flags, metrics, &ph); + rc = vos_pmemobj_open(path, uuid, VOS_POOL_LAYOUT, flags, metrics, ck, &ph); if (rc) { D_ERROR("Error in opening the pool "DF_UUID". "DF_RC"\n", DP_UUID(uuid), DP_RC(rc)); @@ -1876,14 +1903,19 @@ vos_pool_open_metrics(const char *path, uuid_t uuid, unsigned int flags, void *m } pool_df = vos_pool_pop2df(ph); - if (pool_df->pd_magic != POOL_DF_MAGIC) { + CK_PRINT(ck, "Magic... "); + if (pool_df->pd_magic != POOL_DF_MAGIC || DAOS_FAIL_CHECK(DAOS_FAULT_POOL_OPEN_MAGIC)) { + CK_APPENDFL_ERR(ck, "invalid (%#x)", pool_df->pd_magic); D_CRIT("Unknown DF magic %x\n", pool_df->pd_magic); rc = -DER_DF_INVAL; goto out; } + CK_APPENDL_OK(ck); - if (pool_df->pd_version > POOL_DF_VERSION || - pool_df->pd_version < POOL_DF_VER_1) { + CK_PRINT(ck, "Version... "); + if (pool_df->pd_version > POOL_DF_VERSION || pool_df->pd_version < POOL_DF_VER_1 || + DAOS_FAIL_CHECK(DAOS_FAULT_POOL_OPEN_VERSION)) { + CK_APPENDFL_ERR(ck, "unsupported (%#x)", pool_df->pd_version); D_ERROR("Unsupported DF version %x\n", pool_df->pd_version); /** Send a RAS notification */ vos_report_layout_incompat("VOS pool", pool_df->pd_version, @@ -1892,31 +1924,41 @@ vos_pool_open_metrics(const char *path, uuid_t uuid, unsigned int flags, void *m rc = -DER_DF_INCOMPT; goto out; } + CK_APPENDL_OK(ck); - if (uuid_compare(uuid, pool_df->pd_id)) { - D_ERROR("Mismatch uuid, user="DF_UUIDF", pool="DF_UUIDF"\n", - DP_UUID(uuid), DP_UUID(pool_df->pd_id)); + CK_PRINT(ck, "UUID... "); + if (uuid_compare(uuid, pool_df->pd_id) || DAOS_FAIL_CHECK(DAOS_FAULT_POOL_OPEN_UUID)) { + CK_APPENDFL_ERR(ck, "mismatch (requested=" DF_UUIDF ", received=" DF_UUIDF ")", + DP_UUID(uuid), DP_UUID(pool_df->pd_id)); + D_ERROR("Mismatch uuid, user=" DF_UUIDF ", pool=" DF_UUIDF "\n", DP_UUID(uuid), + DP_UUID(pool_df->pd_id)); rc = -DER_ID_MISMATCH; goto out; } + CK_APPENDL_OK(ck); out: - rc = pool_open_post(&ph, pool_df, flags, metrics, pool, rc); - if (rc == 0) + rc = pool_open_post(&ph, pool_df, flags, metrics, pool, ck, rc); + if (rc == 0) { *poh = vos_pool2hdl(pool); + checker_print_indent_dec(ck); + CK_PRINTL_RC(ck, rc, "Check pool"); + } + /* Close this local handle, if it hasn't been consumed nor already * been closed by pool_open upon error. */ if (ph != NULL) vos_pmemobj_close(ph); + return rc; } int vos_pool_open(const char *path, uuid_t uuid, unsigned int flags, daos_handle_t *poh) { - return vos_pool_open_metrics(path, uuid, flags, NULL, poh); + return vos_pool_open_metrics(path, uuid, flags, NULL, NULL, poh); } int From 9c9fd42ef2133c930e6dee18a7d0beda974f763e Mon Sep 17 00:00:00 2001 From: wiliamhuang Date: Mon, 10 Nov 2025 07:44:30 -0600 Subject: [PATCH 002/253] DAOS-18117 client: fix issue in free_fd related to fd duplication (#17039) properly close a fake fd when fd duplication is involved. add utests for dup() and dup3(). Signed-off-by: Lei Huang --- src/client/dfuse/pil4dfs/int_dfs.c | 3 +- src/tests/suite/dfuse_test.c | 65 ++++++++++++++++++++++++++++-- 2 files changed, 62 insertions(+), 6 deletions(-) diff --git a/src/client/dfuse/pil4dfs/int_dfs.c b/src/client/dfuse/pil4dfs/int_dfs.c index 03eeaaef6a7..d55d88128a3 100644 --- a/src/client/dfuse/pil4dfs/int_dfs.c +++ b/src/client/dfuse/pil4dfs/int_dfs.c @@ -1655,7 +1655,6 @@ find_next_available_map(int *idx) return 0; } -/* May need to support duplicated fd as duplicated dirfd too. */ static void free_fd(int idx, bool closing_dup_fd) { @@ -1676,7 +1675,7 @@ free_fd(int idx, bool closing_dup_fd) d_file_list[idx]->ref_count--; if (d_file_list[idx]->ref_count == 0) saved_obj = d_file_list[idx]; - if (dup_ref_count[idx] > 0 || ((d_file_list[idx]->ref_count > 0) && !d_compatible_mode)) { + if ((dup_ref_count[idx] > 0) || (closing_dup_fd && (d_file_list[idx]->ref_count > 0))) { D_MUTEX_UNLOCK(&lock_fd); return; } diff --git a/src/tests/suite/dfuse_test.c b/src/tests/suite/dfuse_test.c index 24c9d38aa1c..9f14659651d 100644 --- a/src/tests/suite/dfuse_test.c +++ b/src/tests/suite/dfuse_test.c @@ -858,8 +858,10 @@ do_fdcallscheck(void **state) char path_old[512]; char path_new[512]; char *env_ldpreload; - bool use_dfuse = true; - bool with_pil4dfs = false; + char *env_compatible; + bool use_dfuse = true; + bool with_pil4dfs = false; + bool compatible_mode = false; /* "/tmp/dfuse-test" is assigned in src/tests/ftest/daos_test/dfuse.py */ char native_mount_dir[] = "/tmp/dfuse-test"; @@ -871,6 +873,11 @@ do_fdcallscheck(void **state) /* libioil cannot pass this test since low fds are only temporarily blocked */ with_pil4dfs = true; + env_compatible = getenv("D_IL_COMPATIBLE"); + if ((env_compatible != NULL) && (strcmp(env_compatible, "1") == 0)) + /* libioil cannot pass this test since low fds are only temporarily blocked */ + compatible_mode = true; + root = open(test_dir, O_PATH | O_DIRECTORY); assert_return_code(root, errno); @@ -984,7 +991,7 @@ do_fdcallscheck(void **state) fd = openat(root, "test_file", O_RDWR | O_CREAT, S_IWUSR | S_IRUSR); assert_return_code(fd, errno); - if (with_pil4dfs && use_dfuse) + if (with_pil4dfs && use_dfuse && !compatible_mode) assert_true(is_fd_large(fd)); fd_new = 10000; @@ -1004,12 +1011,62 @@ do_fdcallscheck(void **state) rc = close(fd); assert_return_code(rc, errno); + rc = close(root); + assert_return_code(rc, errno); + /* end testing dup3() */ + + /* start testing dup3() - closing old fd first */ + root = open(test_dir, O_PATH | O_DIRECTORY); + assert_return_code(root, errno); + + fd = openat(root, "test_file", O_RDWR | O_CREAT, S_IWUSR | S_IRUSR); + assert_return_code(fd, errno); + + fd_new = 10000; + flag = O_CLOEXEC; + rc = dup3(fd, fd_new, flag); + assert_true(rc == fd_new); + + rc = close(fd); + assert_return_code(rc, errno); + + rc = close(fd_new); + assert_return_code(rc, errno); + /* end testing dup3() - closing old fd first */ + + /* start testing dup() */ + fd = openat(root, "test_file", O_RDWR | O_CREAT, S_IWUSR | S_IRUSR); + assert_return_code(fd, errno); + + fd_new = dup(fd); + assert_true(fd_new > 0); + + /* close the new fd first */ + rc = close(fd_new); + assert_return_code(rc, errno); + + rc = close(fd); + assert_return_code(rc, errno); + + fd = openat(root, "test_file", O_RDWR | O_CREAT, S_IWUSR | S_IRUSR); + assert_return_code(fd, errno); + + fd_new = dup(fd); + assert_true(fd_new > 0); + + /* close the old fd first */ + rc = close(fd); + assert_return_code(rc, errno); + + rc = close(fd_new); + assert_return_code(rc, errno); + /* end testing dup3() - closing old fd first */ + rc = unlinkat(root, "test_file", 0); assert_return_code(rc, errno); rc = close(root); assert_return_code(rc, errno); - /* end testing dup3() */ } /* From abfa0bc2271ea2b56049929cc294a7bffacdba48 Mon Sep 17 00:00:00 2001 From: Tomasz Gromadzki Date: Mon, 10 Nov 2025 19:52:25 +0100 Subject: [PATCH 003/253] DAOS-18175 packaging: add daos changelog to FPM (#17072) Provide DAOS changelog to fpm. Signed-off-by: Tomasz Gromadzki --- docs/dev/development.md | 30 +- utils/rpms/daos.changelog | 822 +++++++++++++++++++++++++++++++++++++ utils/rpms/daos.sh | 2 + utils/rpms/daos.spec | 823 -------------------------------------- 4 files changed, 842 insertions(+), 835 deletions(-) create mode 100644 utils/rpms/daos.changelog diff --git a/docs/dev/development.md b/docs/dev/development.md index db0bcc0dbf7..d267a2ede76 100644 --- a/docs/dev/development.md +++ b/docs/dev/development.md @@ -371,27 +371,33 @@ can be created using the daos admin tool (see next section). For more advanced configurations involving SCM, SSD or a real fabric, please refer to the next section. -## Updating a 3rd party component +## DAOS RPMs build process The DAOS build process now covers building RPMs for both DAOS and dependencies -specified in [`utils/build.config`](../../utils/build.config) (or those that we build regularly with -`--build-deps=yes`). The RPM (and deb) build process uses +specified in [`utils/build.config`](../../utils/build.config) (or those that we +build regularly with `--build-deps=yes`). The complete list of RPMs is defined +in the [`utils/rpms/build_packages.sh`](../../utils/rpms/build_packages.sh) +script. The RPM (and deb) build process uses [FPM](https://fpm.readthedocs.io/en/latest/getting-started.html). Essentially, it creates rpm packages after a DAOS build. Regardless of how that build is done, it will put files in the right places in the final packages. Most of the magic -is in [`utils/rpms/fpm_common.sh`](../../utils/rpms/fpm_common.sh) with component specific code in -`utils/rpms/.sh`. +is in [`utils/rpms/fpm_common.sh`](../../utils/rpms/fpm_common.sh) with +component specific code in `utils/rpms/.sh`. + +### Updating a 3rd party component In order to properly upgrade a 3rd party component, do all of the following: -1. Change the `utils/build.config` to point to the new version or to add a new patch. Patches should - be stored in `deps/patches/`. -1. Update [`utils/rpms/daos.spec`](../../utils/rpms/daos.spec) changelog and release iteration. - This is important to document the change. +1. Change the [`utils/build.config`](../../utils/build.config) to point to + the new version or to add a new patch. Patches should be stored in + `deps/patches/`. +1. Update (increase) the `Release` variable in [`utils/rpms/daos.spec`](../../utils/rpms/daos.spec). +1. Update [`utils/rpms/daos.changelog`](../../utils/rpms/daos.changelog) + changelog and release iteration. This is important to document the change. 1. Update the `_release` and/or `_version` in [`utils/rpms/package_info.sh`](../../utils/rpms/package_info.sh) 1. Make any necessary changes to `utils/rpms/.sh` such as adding new files to various packages. -1. Update the `utils/rpms/.changelog` file to document the change and make sure - the changelog file is referenced by the `RPM_CHANGELOG=".changelog"` variable - in `utils/rpms/.sh`. +1. Update the `utils/rpms/.changelog` file to document the change and + make sure the file is referenced by the + `RPM_CHANGELOG=".changelog"` variable in `utils/rpms/.sh`. diff --git a/utils/rpms/daos.changelog b/utils/rpms/daos.changelog new file mode 100644 index 00000000000..6384eef6a8a --- /dev/null +++ b/utils/rpms/daos.changelog @@ -0,0 +1,822 @@ +%changelog +* Thu Oct 16 2025 Jeff Olivier 2.7.101-16 +- Make daos-spdk conflict with spdk + +* Thu Sep 12 2025 Jeff Olivier 2.7.101-15 +- Fix leap package name + +* Thu Sep 11 2025 Jeff Olivier 2.7.101-14 +- Fix pmdk package for leap +- Fix daos-spdk package + +* Mon Aug 11 2025 Jeff Olivier 2.7.101-13 +- Switch to fpm build for RPMs + +* Wed Jul 30 2025 Tomasz Gromadzki 2.7.101-12 +- pmemobj errors and warnings reported via DAOS logging system + +* Mon Jun 2 2025 Samirkumar Raval 2.7.101-11 +- Changing the default log location to /var/log/daos from /tmp + +* Mon May 19 2025 Jeff Olivier 2.7.101-10 +- Start to deprecate this file being used to build DAOS but rather only source + RPM + +* Mon May 12 2025 Tomasz Gromadzki 2.7.101-9 +- Bump lua-lmod version to >=8.7.36 +- Bump lmod version to >=8.7.36 +- Bump mpich version to 4.1~a1 +- Bump python3-mpi4py-tests version to >= 3.1.6 +- Add openmpi requiremnent for daos-client-tests on Leap. + +* Fri Mar 21 2025 Cedric Koch-Hofer 2.7.101-8 +- Add support of the libasan + +* Tue Mar 18 2025 Jeff Olivier 2.7.101-7 +- Remove raft as external dependency + +* Mon Mar 10 2025 Jeff Olivier 2.7.101-6 +- Remove server from Ubuntu packaging and fix client only build + +* Wed Jan 22 2025 Jan Michalski 2.7.101-5 +- Add ddb_ut and dtx_ut to the server-tests package + +* Fri Dec 20 2024 Jeff Olivier 2.7.101-4 +- Switch libfuse3 to libfused + +* Thu Dec 19 2024 Phillip Henderson 2.7.101-3 +- Fix protobuf-c requiremnent for daos-client-tests on Leap. + +* Thu Nov 14 2024 Denis Barakhtanov 2.7.101-2 +- Add pydaos.torch module to daos-client rpm. + +* Fri Nov 08 2024 Phillip Henderson 2.7.101-1 +- Bump version to 2.7.100 + +* Tue Nov 5 2024 Michael MacDonald 2.7.100-11 +- Move daos_metrics tool to daos package for use on both clients + and servers. + +* Fri Nov 1 2024 Sherin T George 2.7.100-10 +- The modified DAV allocator with memory bucket support for md_on_ssd + phase-2 is delivered as dav_v2.so. + +* Tue Oct 15 2024 Brian J. Murrell - 2.7.100-9 +- Drop BRs for UCX as they were obsoleted as of e01970d + +* Mon Oct 07 2024 Cedric Koch-Hofer 2.7.100-8 +- Update BR: argobots to 1.2 + +* Tue Oct 01 2024 Tomasz Gromadzki 2.7.100-7 +- Add support of the PMDK package 2.1.0 with NDCTL enabled. + * Increase the default ULT stack size to 20KiB if the engine uses + the DCPM storage class. + * Prevent using the RAM storage class (simulated PMem) when + the shutdown state (SDS) is active. + * Automatically disable SDS for the RAM storage class on engine startup. + * Force explicitly setting the PMEMOBJ_CONF='sds.at_create=0' + environment variable to deactivate SDS for the DAOS tools + (ddb, daos_perf, vos_perf, etc.) when used WITHOUT DCPM. + Otherwise, a user is supposed to be stopped by an error + like: "Unsafe shutdown count is not supported for this source". + +* Mon Sep 23 2024 Kris Jacque 2.7.100-6 +- Bump min supported go version to 1.21 + +* Thu Aug 15 2024 Michael MacDonald 2.7.100-5 +- Add libdaos_self_test.so to client RPM + +* Mon Aug 05 2024 Jerome Soumagne 2.7.100-4 +- Bump mercury version to 2.4.0rc4 + +* Thu Jul 11 2024 Dalton Bohning 2.7.100-3 +- Add pciutils-devel build dep for client-tests package + +* Mon Jun 24 2024 Tom Nabarro 2.7.100-2 +- Add pciutils runtime dep for daos_server lspci call +- Add pciutils-devel build dep for pciutils CGO bindings + +* Mon May 20 2024 Phillip Henderson 2.7.100-1 +- Bump version to 2.7.100 + +* Fri May 03 2024 Lei Huang 2.5.101-5 +- Add libaio as a dependent package + +* Fri Apr 05 2024 Fan Yong 2.5.101-4 +- Catastrophic Recovery + +* Thu Apr 04 2024 Ashley M. Pittman 2.5.101-3 +- Update pydaos install process +- Add a dependency from daos-client-tests to daos-devel + +* Mon Mar 18 2024 Jan Michalski 2.5.101-2 +- Add dtx_tests to the server-tests package + +* Fri Mar 15 2024 Phillip Henderson 2.5.101-1 +- Bump version to 2.5.101 + +* Tue Feb 27 2024 Li Wei 2.5.100-16 +- Update raft to 0.11.0-1.416.g12dbc15 + +* Mon Feb 12 2024 Ryon Jensen 2.5.100-15 +- Updated isa-l package name to match EPEL + +* Tue Jan 09 2024 Brian J. Murrell 2.5.100-14 +- Move /etc/ld.so.conf.d/daos.conf to daos-server sub-package + +* Wed Dec 06 2023 Brian J. Murrell 2.5.100-13 +- Update for EL 8.8 and Leap 15.5 +- Update raft to 0.10.1-2.411.gefa15f4 + +* Fri Nov 17 2023 Tomasz Gromadzki 2.5.100-12 +- Update to PMDK 2.0.0 + * Remove libpmemblk from dependencies. + * Start using BUILD_EXAMPLES=n and BUILD_BENCHMARKS=n instead of patches. + * Stop using BUILD_RPMEM=n (removed) and NDCTL_DISABLE=y (invalid). + * Point https://github.com/pmem/pmdk as the main PMDK reference source. + NOTE: PMDK upgrade to 2.0.0 does not affect any API call used by DAOS. + libpmemobj (and libpmem) API stays unchanged. + +* Wed Nov 15 2023 Jerome Soumagne 2.5.100-11 +- Bump mercury min version to 2.3.1 + +* Fri Nov 03 2023 Phillip Henderson 2.5.100-10 +- Move verify_perms.py location + +* Wed Aug 23 2023 Brian J. Murrell 2.5.100-9 +- Update fuse3 requirement to R: /usr/bin/fusermount3 by path + rather than by package name, for portability and future-proofing +- Adding fuse3-devel as a requirement for daos-client-tests subpackage + +* Tue Aug 08 2023 Brian J. Murrell 2.5.100-8 +- Build on EL9 +- Add a client-tests-mpich subpackage for mpich test dependencies. + +* Fri Jul 07 2023 Brian J. Murrell 2.5.100-7 +- Fix golang daos-client-tests dependency to be go instead + +* Thu Jun 29 2023 Michael MacDonald 2.5.100-6 +- Install golang >= 1.18 as a daos-client-tests dependency + +* Thu Jun 22 2023 Li Wei 2.5.100-5 +- Update raft to 0.10.1-1.408.g9524cdb + +* Wed Jun 14 2023 Mohamad Chaarawi - 2.5.100-4 +- Add pipeline lib + +* Wed Jun 14 2023 Wang Shilong 2.5.100-3 +- Remove lmdb-devel for MD on SSD + +* Wed Jun 07 2023 Ryon Jensen 2.5.100-2 +- Removed unnecessary test files + +* Tue Jun 06 2023 Jeff Olivier 2.5.100-1 +- Switch version to 2.5.100 for 2.6 test builds + +* Mon Jun 5 2023 Jerome Soumagne 2.3.107-7 +- Remove libfabric pinning and allow for 1.18 builds + +* Fri May 26 2023 Jeff Olivier 2.3.107-6 +- Add lmdb-devel and bio_ut for MD on SSD + +* Tue May 23 2023 Lei Huang 2.3.107-5 +- Add libcapstone-devel to deps of client-tests package + +* Tue May 16 2023 Lei Huang 2.3.107-4 +- Add libcapstone as a new prerequisite package +- Add libpil4dfs.so in daos-client rpm + +* Mon May 15 2023 Jerome Soumagne 2.3.107-3 +- Fix libfabric/libfabric1 dependency mismatch on SuSE + +* Wed May 10 2023 Jerome Soumagne 2.3.107-2 +- Temporarily pin libfabric to < 1.18 + +* Fri May 5 2023 Johann Lombardi 2.3.107-1 +- Bump version to 2.3.107 + +* Fri Mar 17 2023 Tom Nabarro 2.3.106-2 +- Add numactl requires for server package + +* Tue Mar 14 2023 Brian J. Murrell 2.3.106-1 +- Bump version to be higher than TB5 + +* Wed Feb 22 2023 Li Wei 2.3.103-6 +- Update raft to 0.9.2-1.403.g3d20556 + +* Tue Feb 21 2023 Michael MacDonald 2.3.103-5 +- Bump min supported go version to 1.17 + +* Fri Feb 17 2023 Ashley M. Pittman 2.3.103-4 +- Add protobuf-c-devel to deps of client-tests package + +* Mon Feb 13 2023 Brian J. Murrell 2.3.103-3 +- Remove explicit R: protobuf-c and let the auto-dependency generator + handle it + +* Wed Feb 8 2023 Michael Hennecke 2.3.103-2 +- Change ipmctl requirement from v2 to v3 + +* Fri Jan 27 2023 Phillip Henderson 2.3.103-1 +- Bump version to 2.3.103 + +* Wed Jan 25 2023 Johann Lombardi 2.3.102-1 +- Bump version to 2.3.102 + +* Tue Jan 24 2023 Phillip Henderson 2.3.101-7 +- Fix daos-tests-internal requirement for daos-tests + +* Fri Jan 6 2023 Brian J. Murrell 2.3.101-6 +- Don't need to O: cart any more +- Add %%doc to all packages +- _datadir -> _datarootdir +- Don't use PREFIX= with scons in %%build +- Fix up some hard-coded paths to use macros instead +- Use some guards to prevent creating empty scriptlets + +* Tue Dec 06 2022 Joseph G. Moore 2.3.101-5 +- Update Mercury to 2.2.0-6 + +* Thu Dec 01 2022 Tom Nabarro 2.3.101-4 +- Update SPDK dependency requirement to greater than or equal to 22.01.2. + +* Tue Oct 18 2022 Brian J. Murrell 2.3.101-3 +- Set flag to build per-subpackage debuginfo packages for Leap 15 + +* Thu Oct 6 2022 Michael MacDonald 2.3.101-2 +- Rename daos_admin -> daos_server_helper + +* Tue Sep 20 2022 Johann Lombardi 2.3.101-1 +- Bump version to 2.3.101 + +* Thu Sep 8 2022 Jeff Olivier 2.3.100-22 +- Move io_conf files from bin to TESTING + +* Tue Aug 16 2022 Jeff Olivier 2.3.100-21 +- Update PMDK to 1.12.1~rc1 to fix DAOS-11151 + +* Thu Aug 11 2022 Wang Shilong 2.3.100-20 +- Add daos_debug_set_params to daos-client-tests rpm for fault injection test. + +* Fri Aug 5 2022 Jerome Soumagne 2.3.100-19 +- Update to mercury 2.2.0 + +* Tue Jul 26 2022 Michael MacDonald 2.3.100-18 +- Bump min supported go version to 1.16 + +* Mon Jul 18 2022 Jerome Soumagne 2.3.100-17 +- Remove now unused openpa dependency + +* Fri Jul 15 2022 Jeff Olivier 2.3.100-16 +- Add pool_scrubbing_tests to test package + +* Wed Jul 13 2022 Tom Nabarro 2.3.100-15 +- Update SPDK dependency requirement to greater than or equal to 22.01.1. + +* Mon Jun 27 2022 Jerome Soumagne 2.3.100-14 +- Update to mercury 2.2.0rc6 + +* Fri Jun 17 2022 Jeff Olivier 2.3.100-13 +- Remove libdts.so, replace with build time static + +* Thu Jun 2 2022 Jeff Olivier 2.3.100-12 +- Make ucx required for build on all platforms + +* Wed Jun 1 2022 Michael MacDonald 2.3.100-11 +- Move dmg to new daos-admin RPM + +* Wed May 18 2022 Lei Huang 2.3.100-10 +- Update to libfabric to v1.15.1-1 to include critical performance patches + +* Tue May 17 2022 Phillip Henderson 2.3.100-9 +- Remove doas-client-tests-openmpi dependency from daos-tests +- Add daos-tests-internal package + +* Mon May 9 2022 Ashley Pittman 2.3.100-8 +- Extend dfusedaosbuild test to run in different configurations. + +* Fri May 6 2022 Ashley Pittman 2.3.100-7 +- Add dfuse unit-test binary to call from ftest. + +* Wed May 4 2022 Joseph Moore 2.3.100-6 +- Update to mercury 2.1.0.rc4-9 to enable non-unified mode in UCX + +* Tue Apr 26 2022 Phillip Henderson 2.3.100-5 +- Move daos_gen_io_conf and daos_run_io_conf to daos-client-tests + +* Wed Apr 20 2022 Lei Huang 2.3.100-4 +- Update to libfabric to v1.15.0rc3-1 to include critical performance patches + +* Tue Apr 12 2022 Li Wei 2.3.100-3 +- Update raft to 0.9.1-1401.gc18bcb8 to fix uninitialized node IDs + +* Wed Apr 6 2022 Jeff Olivier 2.3.100-2 +- Remove direct MPI dependency from most of tests + +* Wed Apr 6 2022 Johann Lombardi 2.3.100-1 +- Switch version to 2.3.100 for 2.4 test builds + +* Wed Apr 6 2022 Joseph Moore 2.1.100-26 +- Add build depends entries for UCX libraries. + +* Sat Apr 2 2022 Joseph Moore 2.1.100-25 +- Update to mercury 2.1.0.rc4-8 to include UCX provider patch + +* Fri Mar 11 2022 Alexander Oganezov 2.1.100-24 +- Update to mercury 2.1.0.rc4-6 to include CXI provider patch + +* Wed Mar 02 2022 Michael Hennecke 2.1.100-23 +- DAOS-6344: Create secondary group daos_daemons for daos_server and daos_agent + +* Tue Feb 22 2022 Alexander Oganezov 2.1.100-22 +- Update mercury to include DAOS-9561 workaround + +* Sun Feb 13 2022 Michael MacDonald 2.1.100-21 +- Update go toolchain requirements + +* Thu Feb 10 2022 Li Wei 2.1.100-20 +- Update raft to 0.9.0-1394.gc81505f to fix membership change bugs + +* Wed Jan 19 2022 Michael MacDonald 2.1.100-19 +- Move libdaos_common.so from daos-client to daos package + +* Mon Jan 17 2022 Johann Lombardi 2.1.100-18 +- Update libfabric to 1.14.0 GA and apply fix for DAOS-9376 + +* Thu Dec 23 2021 Alexander Oganezov 2.1.100-17 +- Update to v2.1.0-rc4-3 to pick fix for DAOS-9325 high cpu usage +- Change mercury pinning to be >= instead of strict = + +* Thu Dec 16 2021 Brian J. Murrell 2.1.100-16 +- Add BR: python-rpm-macros for Leap 15 as python3-base dropped that + as a R: + +* Sat Dec 11 2021 Brian J. Murrell 2.1.100-15 +- Create a shim package to allow daos openmpi packages built with the + distribution openmpi to install on MOFED systems + +* Fri Dec 10 2021 Brian J. Murrell 2.1.100-14 +- Don't make daos-*-tests-openmi a dependency of anything + - If they are wanted, they should be installed explicitly, due to + potential conflicts with other MPI stacks + +* Wed Dec 08 2021 Alexander Oganezov 2.1.100-13 +- Remove DAOS-9173 workaround from mercury. Apply DAOS-9173 to ofi + +* Tue Dec 07 2021 Alexander Oganezov 2.1.100-12 +- Apply DAOS-9173 workaround to mercury + +* Fri Dec 03 2021 Alexander Oganezov 2.1.100-11 +- Update mercury to v2.1.0rc4 + +* Thu Dec 02 2021 Danielle M. Sikich 2.1.100-10 +- Fix name of daos serialize package + +* Sun Nov 28 2021 Tom Nabarro 2.1.100-9 +- Set rmem_{max,default} sysctl values on server package install to enable + SPDK pci_event module to operate in unprivileged process (daos_engine). + +* Wed Nov 24 2021 Brian J. Murrell 2.1.100-8 +- Remove invalid "%%else if" syntax +- Fix a few other rpmlint warnings + +* Tue Nov 16 2021 Wang Shilong 2.1.100-7 +- Update for libdaos major version bump +- Fix version of libpemobj1 for SUSE + +* Sat Nov 13 2021 Alexander Oganezov 2.1.100-6 +- Update OFI to v1.14.0rc3 + +* Tue Oct 26 2021 Brian J. Murrell 2.1.100-5 +- Create new daos-{client,server}tests-openmpi and daos-server-tests subpackages +- Rename daos-tests daos-client-tests and make daos-tests require all + other test suites to maintain existing behavior + +* Mon Oct 25 2021 Alexander Oganezov 2.1.100-4 +- Update mercury to v2.1.0rc2 + +* Wed Oct 20 2021 Jeff Olivier 2.1.100-3 +- Explicitly require 1.11.0-3 of PMDK + +* Wed Oct 13 2021 David Quigley 2.1.100-2 +- Add defusedxml as a required dependency for the test package. + +* Wed Oct 13 2021 Johann Lombardi 2.1.100-1 +- Switch version to 2.1.100 for 2.2 test builds + +* Tue Oct 12 2021 Johann Lombardi 1.3.106-1 +- Version bump to 1.3.106 for 2.0 test build 6 + +* Fri Oct 8 2021 Alexander Oganezov 1.13.105-4 +- Update OFI to v1.13.2rc1 + +* Wed Sep 15 2021 Li Wei 1.3.105-3 +- Update raft to fix InstallSnapshot performance as well as to avoid some + incorrect 0.8.0 RPMs + +* Fri Sep 03 2021 Brian J. Murrell 1.3.105-2 +- Remove R: hwloc; RPM's auto-requires/provides will take care of this + +* Tue Aug 24 2021 Jeff Olivier 1.3.105-1 +- Version bump to 1.3.105 for 2.0 test build 5 + +* Mon Aug 09 2021 Yawei 1.3.104-5 +- Fix duplicates +- Add vos_perf + +* Thu Aug 05 2021 Christopher Hoffman 1.3.104-4 +- Update conditional statement to include checking for distributions to + determine which unit files to use for daos-server and daos-agent + +* Wed Aug 04 2021 Kris Jacque 1.3.104-3 +- Move daos_metrics tool from tests package to server package + +* Wed Aug 04 2021 Tom Nabarro 1.3.104-2 +- Update to spdk 21.07 and (indirectly) dpdk 21.05 + +* Mon Aug 02 2021 Jeff Olivier 1.3.104-1 +- Version bump to 1.3.104 for 2.0 test build 4 + +* Mon Jul 19 2021 Danielle M. Sikich 1.3.103-5 +- Add DAOS serialization library that requires hdf5 + +* Wed Jul 14 2021 Li Wei 1.3.103-4 +- Update raft to fix slow leader re-elections + +* Tue Jul 13 2021 Maureen Jean 1.3.103-3 +- Add python modules to python3.6 site-packages + +* Mon Jul 12 2021 Alexander Oganezov 1.3.103-2 +- Update to mercury release v2.0.1 + +* Mon Jul 12 2021 Johann Lombardi 1.3.103-1 +- Version bump to 1.3.103 for 2.0 test build 3 + +* Wed Jul 7 2021 Phillip Henderson 1.3.102-6 +- Update daos-devel to always require the same version daos-client + +* Wed Jun 30 2021 Tom Nabarro 1.3.102-5 +- Update to spdk 21.04 and (indirectly) dpdk 21.05 + +* Fri Jun 25 2021 Brian J. Murrell - 1.3.102-4 +- Add libuuid-devel back as a requirement of daos-devel + +* Wed Jun 23 2021 Li Wei 1.3.102-3 +- Update raft to pick up Pre-Vote + +* Mon Jun 14 2021 Jeff Olivier 1.3.102-2 +- Update to pmdk 1.11.0-rc1 +- Remove dependence on libpmem since we use libpmemobj directly + +* Fri Jun 11 2021 Johann Lombardi 1.3.102-1 +- Version bump to 1.3.102 for 2.0 test build 2 + +* Wed Jun 02 2021 Johann Lombardi 1.3.101-3 +- Remove libs from devel package + +* Thu May 20 2021 Jeff Olivier 1.3.0-101-2 +- Remove client libs from common package + +* Wed May 19 2021 Johann Lombardi 1.3.101-1 +- Version bump to 1.3.101 for 2.0 test build 1 + +* Fri May 07 2021 Brian J. Murrell 1.3.0-16 +- Enable debuginfo package building on SUSE platforms + +* Thu May 06 2021 Brian J. Murrell 1.3.0-15 +- Update to build on EL8 + +* Wed May 05 2021 Brian J. Murrell 1.3.0-14 +- Package /etc/daos/certs in main/common package so that both server + and client get it created + +* Wed Apr 21 2021 Tom Nabarro - 1.3.0-13 +- Relax ipmctl version requirement on leap15 as we have runtime checks + +* Fri Apr 16 2021 Mohamad Chaarawi - 1.3.0-12 +- remove dfuse_hl + +* Wed Apr 14 2021 Jeff Olivier - 1.3.0-11 +- Remove storage_estimator and io_conf from client packages to remove + any client side dependence on bio and vos (and and PMDK/SPDK) + +* Mon Apr 12 2021 Dalton A. Bohning - 1.3.0-10 +- Add attr to the test dependencies + +* Tue Apr 06 2021 Kris Jacque 1.3.0-9 +- Add package for daos_firmware helper binary + +* Fri Apr 02 2021 Jeff Olivier 1.3.0-8 +- Remove unused readline-devel + +* Thu Apr 01 2021 Brian J. Murrell 1.3.0-7 +- Update argobots to 1.1 + +* Tue Mar 30 2021 Maureen Jean 1.3.0-6 +- Change pydaos_shim_3 to pydaos_shim + +* Mon Mar 29 2021 Brian J. Murrell - 1.3.0-5 +- Move libdts.so to the daos-tests subpackage + +* Tue Mar 23 2021 Alexander Oganezov 1.3.0-4 +- Update libfabric to v1.12.0 +- Disable grdcopy/gdrapi linkage in libfabric + + +* Thu Mar 18 2021 Maureen Jean 1.3.0-3 +- Update to python3 + +* Thu Feb 25 2021 Li Wei 1.3.0-2 +- Require raft-devel 0.7.3 that fixes an unstable leadership problem caused by + removed replicas as well as some Coverity issues + +* Wed Feb 24 2021 Brian J. Murrell - 1.3.0-1 +- Version bump up to 1.3.0 + +* Mon Feb 22 2021 Brian J. Murrell 1.1.3-3 +- Remove all *-devel Requires from daos-devel as none of those are + actually necessary to build libdaos clients + +* Tue Feb 16 2021 Alexander Oganezov 1.1.3-2 +- Update libfabric to v1.12.0rc1 + +* Wed Feb 10 2021 Johann Lombardi 1.1.3-1 +- Version bump up to 1.1.3 + +* Tue Feb 9 2021 Vish Venkatesan 1.1.2.1-11 +- Add new pmem specific version of DAOS common library + +* Fri Feb 5 2021 Saurabh Tandan 1.1.2.1-10 +- Added dbench as requirement for test package. + +* Wed Feb 3 2021 Hua Kuang 1.1.2.1-9 +- Changed License to BSD-2-Clause-Patent + +* Wed Feb 03 2021 Brian J. Murrell - 1.1.2-8 +- Update minimum required libfabric to 1.11.1 + +* Thu Jan 28 2021 Phillip Henderson 1.1.2.1-7 +- Change ownership and permissions for the /etc/daos/certs directory. + +* Sat Jan 23 2021 Alexander Oganezov 1.1.2.1-6 +- Update to mercury v2.0.1rc1 + +* Fri Jan 22 2021 Michael MacDonald 1.1.2.1-5 +- Install daos_metrics utility to %%{_bindir} + +* Wed Jan 20 2021 Kenneth Cain 1.1.2.1-4 +- Version update for API major version 1, libdaos.so.1 (1.0.0) + +* Fri Jan 15 2021 Michael Hennecke 1.1.2.1-3 +- Harmonize daos_server and daos_agent groups. + +* Tue Dec 15 2020 Ashley Pittman 1.1.2.1-2 +- Combine the two memcheck suppressions files. + +* Wed Dec 09 2020 Johann Lombardi 1.1.2.1-1 +- Version bump up to 1.1.2.1 + +* Fri Dec 04 2020 Li Wei 1.1.2-3 +- Require raft-devel 0.7.1 that fixes recent Coverity issues + +* Wed Dec 02 2020 Maureen Jean - 1.1.2-2 +- define scons_args to be BUILD_TYPE= +- the scons default is BUILD_TYPE=release +- BUILD_TYPE=release will disable fault injection in build + +* Tue Dec 01 2020 Brian J. Murrell - 1.1.2-1 +- Version bump up to 1.1.2 + +* Tue Nov 17 2020 Li Wei 1.1.1-8 +- Require raft-devel 0.7.0 that changes log indices and terms to 63-bit + +* Wed Nov 11 2020 Tom Nabarro 1.1.1-7 +- Add version validation for runtime daos_server ipmctl requirement to avoid + potential corruption of PMMs when setting PMem goal, issue fixed in + https://github.com/intel/ipmctl/commit/9e3898cb15fa9eed3ef3e9de4488be1681d53ff4 + +* Thu Oct 29 2020 Jonathan Martinez Montes 1.1.1-6 +- Restore obj_ctl utility + +* Wed Oct 28 2020 Brian J. Murrell - 1.1.1-5 +- Use %%autosetup +- Only use systemd_requires if it exists +- Obsoletes: cart now that it's included in daos + +* Sat Oct 24 2020 Maureen Jean 1.1.1-4 +- Add daos.conf to the daos package to resolve the path to libbio.so + +* Tue Oct 13 2020 Jonathan Martinez Montes 1.1.1-3 +- Remove obj_ctl from Tests RPM package +- Add libdts.so shared library that is used by daos_perf, daos_racer and + the daos utility. + +* Tue Oct 13 2020 Amanda Justiniano 1.1.1-3 +- Add lbzip2 requirement to the daos-tests package + +* Tue Oct 13 2020 Michael MacDonald 1.1.1-2 +- Create unprivileged user for daos_agent + +* Mon Oct 12 2020 Johann Lombardi 1.1.1-1 +- Version bump up to 1.1.1 + +* Sat Oct 03 2020 Michael MacDonald 1.1.0-34 +- Add go-race to BuildRequires on OpenSUSE Leap + +* Wed Sep 16 2020 Alexander Oganezov 1.1.0-33 +- Update OFI to v1.11.0 + +* Mon Aug 17 2020 Michael MacDonald 1.1.0-32 +- Install completion script in /etc/bash_completion.d + +* Wed Aug 05 2020 Brian J. Murrell - 1.1.0-31 +- Change fuse requirement to fuse3 +- Use Lmod for MPI module loading +- Remove unneeded (and un-distro gated) Requires: json-c + +* Wed Jul 29 2020 Jonathan Martinez Montes - 1.1.0-30 +- Add the daos_storage_estimator.py tool. It merges the functionality of the + former tools vos_size, vos_size.py, vos_size_dfs_sample.py and parse_csv.py. + +* Wed Jul 29 2020 Jeffrey V Olivier - 1.1.0-29 +- Revert prior changes from version 28 + +* Mon Jul 13 2020 Brian J. Murrell - 1.1.0-28 +- Change fuse requirement to fuse3 +- Use Lmod for MPI module loading + +* Tue Jul 7 2020 Alexander A Oganezov - 1.1.0-27 +- Update to mercury release 2.0.0~rc1-1 + +* Sun Jun 28 2020 Jonathan Martinez Montes - 1.1.0-26 +- Add the vos_size_dfs_sample.py tool. It is used to generate dynamically + the vos_dfs_sample.yaml file using the real DFS super block data. + +* Tue Jun 23 2020 Jeff Olivier - 1.1.0-25 +- Add -no-rpath option and use it for rpm build rather than modifying + SCons files in place + +* Tue Jun 16 2020 Jeff Olivier - 1.1.0-24 +- Modify RPATH removal snippet to replace line with pass as some lines + can't be removed without breaking the code + +* Fri Jun 05 2020 Ryon Jensen - 1.1.0-23 +- Add libisa-l_crypto dependency + +* Fri Jun 05 2020 Tom Nabarro - 1.1.0-22 +- Change server systemd run-as user to daos_server in unit file + +* Thu Jun 04 2020 Hua Kuang - 1.1.0-21 +- Remove dmg_old from DAOS RPM package + +* Thu May 28 2020 Tom Nabarro - 1.1.0-20 +- Create daos group to run as in systemd unit file + +* Tue May 26 2020 Brian J. Murrell - 1.1.0-19 +- Enable parallel building with _smp_mflags + +* Fri May 15 2020 Kenneth Cain - 1.1.0-18 +- Require raft-devel >= 0.6.0 that adds new API raft_election_start() + +* Thu May 14 2020 Brian J. Murrell - 1.1.0-17 +- Add cart-devel's Requires to daos-devel as they were forgotten + during the cart merge + +* Thu May 14 2020 Brian J. Murrell - 1.1.0-16 +- Fix fuse3-libs -> libfuse3 for SLES/Leap 15 + +* Thu Apr 30 2020 Brian J. Murrell - 1.1.0-15 +- Use new properly pre-release tagged mercury RPM + +* Thu Apr 30 2020 Brian J. Murrell - 1.1.0-14 +- Move fuse dependencies to the client subpackage + +* Mon Apr 27 2020 Michael MacDonald 1.1.0-13 +- Rename /etc/daos.yml -> /etc/daos_control.yml + +* Thu Apr 16 2020 Brian J. Murrell - 1.1.0-12 +- Use distro fuse + +* Fri Apr 10 2020 Alexander Oganezov - 1.1.0-11 +- Update to mercury 4871023 to pick na_ofi.c race condition fix for + "No route to host" errors. + +* Sun Apr 05 2020 Brian J. Murrell - 1.1.0-10 +- Clean up spdk dependencies + +* Mon Mar 30 2020 Tom Nabarro - 1.1.0-9 +- Set version of spdk to < v21, > v19 + +* Fri Mar 27 2020 David Quigley - 1.1.0-8 +- add daos and dmg man pages to the daos-client files list + +* Thu Mar 26 2020 Michael MacDonald 1.1.0-7 +- Add systemd scriptlets for managing daos_server/daos_agent services + +* Thu Mar 26 2020 Alexander Oganeozv - 1.1.0-6 +- Update ofi to 62f6c937601776dac8a1f97c8bb1b1a6acfbc3c0 + +* Tue Mar 24 2020 Jeffrey V. Olivier - 1.1.0-5 +- Remove cart as an external dependence + +* Mon Mar 23 2020 Jeffrey V. Olivier - 1.1.0-4 +- Remove scons_local as dependency + +* Tue Mar 03 2020 Brian J. Murrell - 1.1.0-3 +- Bump up go minimum version to 1.12 + +* Thu Feb 20 2020 Brian J. Murrell - 1.1.0-2 +- daos-server requires daos-client (same version) + +* Fri Feb 14 2020 Brian J. Murrell - 1.1.0-1 +- Version bump up to 1.1.0 + +* Wed Feb 12 2020 Brian J. Murrell - 0.9.0-2 +- Remove undefine _missing_build_ids_terminate_build + +* Thu Feb 06 2020 Johann Lombardi - 0.9.0-1 +- Version bump up to 0.9.0 + +* Sat Jan 18 2020 Jeff Olivier - 0.8.0-3 +- Fixing a few warnings in the RPM spec file + +* Fri Dec 27 2019 Jeff Olivier - 0.8.0-2 +- Remove openmpi, pmix, and hwloc builds, use hwloc and openmpi packages + +* Tue Dec 17 2019 Johann Lombardi - 0.8.0-1 +- Version bump up to 0.8.0 + +* Thu Dec 05 2019 Johann Lombardi - 0.7.0-1 +- Version bump up to 0.7.0 + +* Tue Nov 19 2019 Tom Nabarro 0.6.0-15 +- Temporarily unconstrain max. version of spdk + +* Wed Nov 06 2019 Brian J. Murrell 0.6.0-14 +- Constrain max. version of spdk + +* Wed Nov 06 2019 Brian J. Murrell 0.6.0-13 +- Use new cart with R: mercury to < 1.0.1-20 due to incompatibility + +* Wed Nov 06 2019 Michael MacDonald 0.6.0-12 +- Add daos_admin privileged helper for daos_server + +* Fri Oct 25 2019 Brian J. Murrell 0.6.0-11 +- Handle differences in Leap 15 Python packaging + +* Wed Oct 23 2019 Brian J. Murrell 0.6.0-9 +- Update BR: libisal-devel for Leap + +* Mon Oct 07 2019 Brian J. Murrell 0.6.0-8 +- Use BR: cart-devel-%%{cart_sha1} if available +- Remove cart's BRs as it's -devel Requires them now + +* Tue Oct 01 2019 Brian J. Murrell 0.6.0-7 +- Constrain cart BR to <= 1.0.0 + +* Sat Sep 21 2019 Brian J. Murrell +- Remove Requires: {argobots, cart} + - autodependencies should take care of these + +* Thu Sep 19 2019 Jeff Olivier +- Add valgrind-devel requirement for argobots change + +* Tue Sep 10 2019 Tom Nabarro +- Add requires ndctl as runtime dep for control plane. + +* Thu Aug 15 2019 David Quigley +- Add systemd unit files to packaging. + +* Thu Jul 25 2019 Brian J. Murrell +- Add git hash and commit count to release + +* Thu Jul 18 2019 David Quigley +- Add certificate generation files to packaging. + +* Tue Jul 09 2019 Johann Lombardi +- Version bump up to 0.6.0 + +* Fri Jun 21 2019 David Quigley +- Add daos_agent.yml to the list of packaged files + +* Thu Jun 13 2019 Brian J. Murrell +- move obj_ctl daos_gen_io_conf daos_run_io_conf to + daos-tests sub-package +- daos-server needs spdk-tools + +* Fri May 31 2019 Ken Cain +- Add new daos utility binary + +* Wed May 29 2019 Brian J. Murrell +- Version bump up to 0.5.0 +- Add Requires: libpsm_infinipath1 for SLES 12.3 + +* Tue May 07 2019 Brian J. Murrell +- Move some files around among the sub-packages + +* Mon May 06 2019 Brian J. Murrell +- Only BR fio + - fio-{devel,src} is not needed + +* Wed Apr 03 2019 Brian J. Murrell +- initial package diff --git a/utils/rpms/daos.sh b/utils/rpms/daos.sh index 5d7b3abd7a4..f6d4a97df42 100755 --- a/utils/rpms/daos.sh +++ b/utils/rpms/daos.sh @@ -31,6 +31,8 @@ to-end data integrity, fine grained data control and elastic storage to optimize performance and cost." URL="https://daos.io" +RPM_CHANGELOG="daos.changelog" + # Some extra "install" steps # daos package files=() diff --git a/utils/rpms/daos.spec b/utils/rpms/daos.spec index 0098ec2a96c..c4a67b5b2cf 100644 --- a/utils/rpms/daos.spec +++ b/utils/rpms/daos.spec @@ -657,826 +657,3 @@ fi %doc README.md # No files in a shim package %endif - -%changelog -* Thu Oct 16 2025 Jeff Olivier 2.7.101-16 -- Make daos-spdk conflict with spdk - -* Thu Sep 12 2025 Jeff Olivier 2.7.101-15 -- Fix leap package name - -* Thu Sep 11 2025 Jeff Olivier 2.7.101-14 -- Fix pmdk package for leap -- Fix daos-spdk package - -* Mon Aug 11 2025 Jeff Olivier 2.7.101-13 -- Switch to fpm build for RPMs - -* Wed Jul 30 2025 Tomasz Gromadzki 2.7.101-12 -- pmemobj errors and warnings reported via DAOS logging system - -* Mon Jun 2 2025 Samirkumar Raval 2.7.101-11 -- Changing the default log location to /var/log/daos from /tmp - -* Mon May 19 2025 Jeff Olivier 2.7.101-10 -- Start to deprecate this file being used to build DAOS but rather only source - RPM - -* Mon May 12 2025 Tomasz Gromadzki 2.7.101-9 -- Bump lua-lmod version to >=8.7.36 -- Bump lmod version to >=8.7.36 -- Bump mpich version to 4.1~a1 -- Bump python3-mpi4py-tests version to >= 3.1.6 -- Add openmpi requiremnent for daos-client-tests on Leap. - -* Fri Mar 21 2025 Cedric Koch-Hofer 2.7.101-8 -- Add support of the libasan - -* Tue Mar 18 2025 Jeff Olivier 2.7.101-7 -- Remove raft as external dependency - -* Mon Mar 10 2025 Jeff Olivier 2.7.101-6 -- Remove server from Ubuntu packaging and fix client only build - -* Wed Jan 22 2025 Jan Michalski 2.7.101-5 -- Add ddb_ut and dtx_ut to the server-tests package - -* Fri Dec 20 2024 Jeff Olivier 2.7.101-4 -- Switch libfuse3 to libfused - -* Thu Dec 19 2024 Phillip Henderson 2.7.101-3 -- Fix protobuf-c requiremnent for daos-client-tests on Leap. - -* Thu Nov 14 2024 Denis Barakhtanov 2.7.101-2 -- Add pydaos.torch module to daos-client rpm. - -* Fri Nov 08 2024 Phillip Henderson 2.7.101-1 -- Bump version to 2.7.100 - -* Tue Nov 5 2024 Michael MacDonald 2.7.100-11 -- Move daos_metrics tool to daos package for use on both clients - and servers. - -* Fri Nov 1 2024 Sherin T George 2.7.100-10 -- The modified DAV allocator with memory bucket support for md_on_ssd - phase-2 is delivered as dav_v2.so. - -* Tue Oct 15 2024 Brian J. Murrell - 2.7.100-9 -- Drop BRs for UCX as they were obsoleted as of e01970d - -* Mon Oct 07 2024 Cedric Koch-Hofer 2.7.100-8 -- Update BR: argobots to 1.2 - -* Tue Oct 01 2024 Tomasz Gromadzki 2.7.100-7 -- Add support of the PMDK package 2.1.0 with NDCTL enabled. - * Increase the default ULT stack size to 20KiB if the engine uses - the DCPM storage class. - * Prevent using the RAM storage class (simulated PMem) when - the shutdown state (SDS) is active. - * Automatically disable SDS for the RAM storage class on engine startup. - * Force explicitly setting the PMEMOBJ_CONF='sds.at_create=0' - environment variable to deactivate SDS for the DAOS tools - (ddb, daos_perf, vos_perf, etc.) when used WITHOUT DCPM. - Otherwise, a user is supposed to be stopped by an error - like: "Unsafe shutdown count is not supported for this source". - -* Mon Sep 23 2024 Kris Jacque 2.7.100-6 -- Bump min supported go version to 1.21 - -* Thu Aug 15 2024 Michael MacDonald 2.7.100-5 -- Add libdaos_self_test.so to client RPM - -* Mon Aug 05 2024 Jerome Soumagne 2.7.100-4 -- Bump mercury version to 2.4.0rc4 - -* Thu Jul 11 2024 Dalton Bohning 2.7.100-3 -- Add pciutils-devel build dep for client-tests package - -* Mon Jun 24 2024 Tom Nabarro 2.7.100-2 -- Add pciutils runtime dep for daos_server lspci call -- Add pciutils-devel build dep for pciutils CGO bindings - -* Mon May 20 2024 Phillip Henderson 2.7.100-1 -- Bump version to 2.7.100 - -* Fri May 03 2024 Lei Huang 2.5.101-5 -- Add libaio as a dependent package - -* Fri Apr 05 2024 Fan Yong 2.5.101-4 -- Catastrophic Recovery - -* Thu Apr 04 2024 Ashley M. Pittman 2.5.101-3 -- Update pydaos install process -- Add a dependency from daos-client-tests to daos-devel - -* Mon Mar 18 2024 Jan Michalski 2.5.101-2 -- Add dtx_tests to the server-tests package - -* Fri Mar 15 2024 Phillip Henderson 2.5.101-1 -- Bump version to 2.5.101 - -* Tue Feb 27 2024 Li Wei 2.5.100-16 -- Update raft to 0.11.0-1.416.g12dbc15 - -* Mon Feb 12 2024 Ryon Jensen 2.5.100-15 -- Updated isa-l package name to match EPEL - -* Tue Jan 09 2024 Brian J. Murrell 2.5.100-14 -- Move /etc/ld.so.conf.d/daos.conf to daos-server sub-package - -* Wed Dec 06 2023 Brian J. Murrell 2.5.100-13 -- Update for EL 8.8 and Leap 15.5 -- Update raft to 0.10.1-2.411.gefa15f4 - -* Fri Nov 17 2023 Tomasz Gromadzki 2.5.100-12 -- Update to PMDK 2.0.0 - * Remove libpmemblk from dependencies. - * Start using BUILD_EXAMPLES=n and BUILD_BENCHMARKS=n instead of patches. - * Stop using BUILD_RPMEM=n (removed) and NDCTL_DISABLE=y (invalid). - * Point https://github.com/pmem/pmdk as the main PMDK reference source. - NOTE: PMDK upgrade to 2.0.0 does not affect any API call used by DAOS. - libpmemobj (and libpmem) API stays unchanged. - -* Wed Nov 15 2023 Jerome Soumagne 2.5.100-11 -- Bump mercury min version to 2.3.1 - -* Fri Nov 03 2023 Phillip Henderson 2.5.100-10 -- Move verify_perms.py location - -* Wed Aug 23 2023 Brian J. Murrell 2.5.100-9 -- Update fuse3 requirement to R: /usr/bin/fusermount3 by path - rather than by package name, for portability and future-proofing -- Adding fuse3-devel as a requirement for daos-client-tests subpackage - -* Tue Aug 08 2023 Brian J. Murrell 2.5.100-8 -- Build on EL9 -- Add a client-tests-mpich subpackage for mpich test dependencies. - -* Fri Jul 07 2023 Brian J. Murrell 2.5.100-7 -- Fix golang daos-client-tests dependency to be go instead - -* Thu Jun 29 2023 Michael MacDonald 2.5.100-6 -- Install golang >= 1.18 as a daos-client-tests dependency - -* Thu Jun 22 2023 Li Wei 2.5.100-5 -- Update raft to 0.10.1-1.408.g9524cdb - -* Wed Jun 14 2023 Mohamad Chaarawi - 2.5.100-4 -- Add pipeline lib - -* Wed Jun 14 2023 Wang Shilong 2.5.100-3 -- Remove lmdb-devel for MD on SSD - -* Wed Jun 07 2023 Ryon Jensen 2.5.100-2 -- Removed unnecessary test files - -* Tue Jun 06 2023 Jeff Olivier 2.5.100-1 -- Switch version to 2.5.100 for 2.6 test builds - -* Mon Jun 5 2023 Jerome Soumagne 2.3.107-7 -- Remove libfabric pinning and allow for 1.18 builds - -* Fri May 26 2023 Jeff Olivier 2.3.107-6 -- Add lmdb-devel and bio_ut for MD on SSD - -* Tue May 23 2023 Lei Huang 2.3.107-5 -- Add libcapstone-devel to deps of client-tests package - -* Tue May 16 2023 Lei Huang 2.3.107-4 -- Add libcapstone as a new prerequisite package -- Add libpil4dfs.so in daos-client rpm - -* Mon May 15 2023 Jerome Soumagne 2.3.107-3 -- Fix libfabric/libfabric1 dependency mismatch on SuSE - -* Wed May 10 2023 Jerome Soumagne 2.3.107-2 -- Temporarily pin libfabric to < 1.18 - -* Fri May 5 2023 Johann Lombardi 2.3.107-1 -- Bump version to 2.3.107 - -* Fri Mar 17 2023 Tom Nabarro 2.3.106-2 -- Add numactl requires for server package - -* Tue Mar 14 2023 Brian J. Murrell 2.3.106-1 -- Bump version to be higher than TB5 - -* Wed Feb 22 2023 Li Wei 2.3.103-6 -- Update raft to 0.9.2-1.403.g3d20556 - -* Tue Feb 21 2023 Michael MacDonald 2.3.103-5 -- Bump min supported go version to 1.17 - -* Fri Feb 17 2023 Ashley M. Pittman 2.3.103-4 -- Add protobuf-c-devel to deps of client-tests package - -* Mon Feb 13 2023 Brian J. Murrell 2.3.103-3 -- Remove explicit R: protobuf-c and let the auto-dependency generator - handle it - -* Wed Feb 8 2023 Michael Hennecke 2.3.103-2 -- Change ipmctl requirement from v2 to v3 - -* Fri Jan 27 2023 Phillip Henderson 2.3.103-1 -- Bump version to 2.3.103 - -* Wed Jan 25 2023 Johann Lombardi 2.3.102-1 -- Bump version to 2.3.102 - -* Tue Jan 24 2023 Phillip Henderson 2.3.101-7 -- Fix daos-tests-internal requirement for daos-tests - -* Fri Jan 6 2023 Brian J. Murrell 2.3.101-6 -- Don't need to O: cart any more -- Add %%doc to all packages -- _datadir -> _datarootdir -- Don't use PREFIX= with scons in %%build -- Fix up some hard-coded paths to use macros instead -- Use some guards to prevent creating empty scriptlets - -* Tue Dec 06 2022 Joseph G. Moore 2.3.101-5 -- Update Mercury to 2.2.0-6 - -* Thu Dec 01 2022 Tom Nabarro 2.3.101-4 -- Update SPDK dependency requirement to greater than or equal to 22.01.2. - -* Tue Oct 18 2022 Brian J. Murrell 2.3.101-3 -- Set flag to build per-subpackage debuginfo packages for Leap 15 - -* Thu Oct 6 2022 Michael MacDonald 2.3.101-2 -- Rename daos_admin -> daos_server_helper - -* Tue Sep 20 2022 Johann Lombardi 2.3.101-1 -- Bump version to 2.3.101 - -* Thu Sep 8 2022 Jeff Olivier 2.3.100-22 -- Move io_conf files from bin to TESTING - -* Tue Aug 16 2022 Jeff Olivier 2.3.100-21 -- Update PMDK to 1.12.1~rc1 to fix DAOS-11151 - -* Thu Aug 11 2022 Wang Shilong 2.3.100-20 -- Add daos_debug_set_params to daos-client-tests rpm for fault injection test. - -* Fri Aug 5 2022 Jerome Soumagne 2.3.100-19 -- Update to mercury 2.2.0 - -* Tue Jul 26 2022 Michael MacDonald 2.3.100-18 -- Bump min supported go version to 1.16 - -* Mon Jul 18 2022 Jerome Soumagne 2.3.100-17 -- Remove now unused openpa dependency - -* Fri Jul 15 2022 Jeff Olivier 2.3.100-16 -- Add pool_scrubbing_tests to test package - -* Wed Jul 13 2022 Tom Nabarro 2.3.100-15 -- Update SPDK dependency requirement to greater than or equal to 22.01.1. - -* Mon Jun 27 2022 Jerome Soumagne 2.3.100-14 -- Update to mercury 2.2.0rc6 - -* Fri Jun 17 2022 Jeff Olivier 2.3.100-13 -- Remove libdts.so, replace with build time static - -* Thu Jun 2 2022 Jeff Olivier 2.3.100-12 -- Make ucx required for build on all platforms - -* Wed Jun 1 2022 Michael MacDonald 2.3.100-11 -- Move dmg to new daos-admin RPM - -* Wed May 18 2022 Lei Huang 2.3.100-10 -- Update to libfabric to v1.15.1-1 to include critical performance patches - -* Tue May 17 2022 Phillip Henderson 2.3.100-9 -- Remove doas-client-tests-openmpi dependency from daos-tests -- Add daos-tests-internal package - -* Mon May 9 2022 Ashley Pittman 2.3.100-8 -- Extend dfusedaosbuild test to run in different configurations. - -* Fri May 6 2022 Ashley Pittman 2.3.100-7 -- Add dfuse unit-test binary to call from ftest. - -* Wed May 4 2022 Joseph Moore 2.3.100-6 -- Update to mercury 2.1.0.rc4-9 to enable non-unified mode in UCX - -* Tue Apr 26 2022 Phillip Henderson 2.3.100-5 -- Move daos_gen_io_conf and daos_run_io_conf to daos-client-tests - -* Wed Apr 20 2022 Lei Huang 2.3.100-4 -- Update to libfabric to v1.15.0rc3-1 to include critical performance patches - -* Tue Apr 12 2022 Li Wei 2.3.100-3 -- Update raft to 0.9.1-1401.gc18bcb8 to fix uninitialized node IDs - -* Wed Apr 6 2022 Jeff Olivier 2.3.100-2 -- Remove direct MPI dependency from most of tests - -* Wed Apr 6 2022 Johann Lombardi 2.3.100-1 -- Switch version to 2.3.100 for 2.4 test builds - -* Wed Apr 6 2022 Joseph Moore 2.1.100-26 -- Add build depends entries for UCX libraries. - -* Sat Apr 2 2022 Joseph Moore 2.1.100-25 -- Update to mercury 2.1.0.rc4-8 to include UCX provider patch - -* Fri Mar 11 2022 Alexander Oganezov 2.1.100-24 -- Update to mercury 2.1.0.rc4-6 to include CXI provider patch - -* Wed Mar 02 2022 Michael Hennecke 2.1.100-23 -- DAOS-6344: Create secondary group daos_daemons for daos_server and daos_agent - -* Tue Feb 22 2022 Alexander Oganezov 2.1.100-22 -- Update mercury to include DAOS-9561 workaround - -* Sun Feb 13 2022 Michael MacDonald 2.1.100-21 -- Update go toolchain requirements - -* Thu Feb 10 2022 Li Wei 2.1.100-20 -- Update raft to 0.9.0-1394.gc81505f to fix membership change bugs - -* Wed Jan 19 2022 Michael MacDonald 2.1.100-19 -- Move libdaos_common.so from daos-client to daos package - -* Mon Jan 17 2022 Johann Lombardi 2.1.100-18 -- Update libfabric to 1.14.0 GA and apply fix for DAOS-9376 - -* Thu Dec 23 2021 Alexander Oganezov 2.1.100-17 -- Update to v2.1.0-rc4-3 to pick fix for DAOS-9325 high cpu usage -- Change mercury pinning to be >= instead of strict = - -* Thu Dec 16 2021 Brian J. Murrell 2.1.100-16 -- Add BR: python-rpm-macros for Leap 15 as python3-base dropped that - as a R: - -* Sat Dec 11 2021 Brian J. Murrell 2.1.100-15 -- Create a shim package to allow daos openmpi packages built with the - distribution openmpi to install on MOFED systems - -* Fri Dec 10 2021 Brian J. Murrell 2.1.100-14 -- Don't make daos-*-tests-openmi a dependency of anything - - If they are wanted, they should be installed explicitly, due to - potential conflicts with other MPI stacks - -* Wed Dec 08 2021 Alexander Oganezov 2.1.100-13 -- Remove DAOS-9173 workaround from mercury. Apply DAOS-9173 to ofi - -* Tue Dec 07 2021 Alexander Oganezov 2.1.100-12 -- Apply DAOS-9173 workaround to mercury - -* Fri Dec 03 2021 Alexander Oganezov 2.1.100-11 -- Update mercury to v2.1.0rc4 - -* Thu Dec 02 2021 Danielle M. Sikich 2.1.100-10 -- Fix name of daos serialize package - -* Sun Nov 28 2021 Tom Nabarro 2.1.100-9 -- Set rmem_{max,default} sysctl values on server package install to enable - SPDK pci_event module to operate in unprivileged process (daos_engine). - -* Wed Nov 24 2021 Brian J. Murrell 2.1.100-8 -- Remove invalid "%%else if" syntax -- Fix a few other rpmlint warnings - -* Tue Nov 16 2021 Wang Shilong 2.1.100-7 -- Update for libdaos major version bump -- Fix version of libpemobj1 for SUSE - -* Sat Nov 13 2021 Alexander Oganezov 2.1.100-6 -- Update OFI to v1.14.0rc3 - -* Tue Oct 26 2021 Brian J. Murrell 2.1.100-5 -- Create new daos-{client,server}tests-openmpi and daos-server-tests subpackages -- Rename daos-tests daos-client-tests and make daos-tests require all - other test suites to maintain existing behavior - -* Mon Oct 25 2021 Alexander Oganezov 2.1.100-4 -- Update mercury to v2.1.0rc2 - -* Wed Oct 20 2021 Jeff Olivier 2.1.100-3 -- Explicitly require 1.11.0-3 of PMDK - -* Wed Oct 13 2021 David Quigley 2.1.100-2 -- Add defusedxml as a required dependency for the test package. - -* Wed Oct 13 2021 Johann Lombardi 2.1.100-1 -- Switch version to 2.1.100 for 2.2 test builds - -* Tue Oct 12 2021 Johann Lombardi 1.3.106-1 -- Version bump to 1.3.106 for 2.0 test build 6 - -* Fri Oct 8 2021 Alexander Oganezov 1.13.105-4 -- Update OFI to v1.13.2rc1 - -* Wed Sep 15 2021 Li Wei 1.3.105-3 -- Update raft to fix InstallSnapshot performance as well as to avoid some - incorrect 0.8.0 RPMs - -* Fri Sep 03 2021 Brian J. Murrell 1.3.105-2 -- Remove R: hwloc; RPM's auto-requires/provides will take care of this - -* Tue Aug 24 2021 Jeff Olivier 1.3.105-1 -- Version bump to 1.3.105 for 2.0 test build 5 - -* Mon Aug 09 2021 Yawei 1.3.104-5 -- Fix duplicates -- Add vos_perf - -* Thu Aug 05 2021 Christopher Hoffman 1.3.104-4 -- Update conditional statement to include checking for distributions to - determine which unit files to use for daos-server and daos-agent - -* Wed Aug 04 2021 Kris Jacque 1.3.104-3 -- Move daos_metrics tool from tests package to server package - -* Wed Aug 04 2021 Tom Nabarro 1.3.104-2 -- Update to spdk 21.07 and (indirectly) dpdk 21.05 - -* Mon Aug 02 2021 Jeff Olivier 1.3.104-1 -- Version bump to 1.3.104 for 2.0 test build 4 - -* Mon Jul 19 2021 Danielle M. Sikich 1.3.103-5 -- Add DAOS serialization library that requires hdf5 - -* Wed Jul 14 2021 Li Wei 1.3.103-4 -- Update raft to fix slow leader re-elections - -* Tue Jul 13 2021 Maureen Jean 1.3.103-3 -- Add python modules to python3.6 site-packages - -* Mon Jul 12 2021 Alexander Oganezov 1.3.103-2 -- Update to mercury release v2.0.1 - -* Mon Jul 12 2021 Johann Lombardi 1.3.103-1 -- Version bump to 1.3.103 for 2.0 test build 3 - -* Wed Jul 7 2021 Phillip Henderson 1.3.102-6 -- Update daos-devel to always require the same version daos-client - -* Wed Jun 30 2021 Tom Nabarro 1.3.102-5 -- Update to spdk 21.04 and (indirectly) dpdk 21.05 - -* Fri Jun 25 2021 Brian J. Murrell - 1.3.102-4 -- Add libuuid-devel back as a requirement of daos-devel - -* Wed Jun 23 2021 Li Wei 1.3.102-3 -- Update raft to pick up Pre-Vote - -* Mon Jun 14 2021 Jeff Olivier 1.3.102-2 -- Update to pmdk 1.11.0-rc1 -- Remove dependence on libpmem since we use libpmemobj directly - -* Fri Jun 11 2021 Johann Lombardi 1.3.102-1 -- Version bump to 1.3.102 for 2.0 test build 2 - -* Wed Jun 02 2021 Johann Lombardi 1.3.101-3 -- Remove libs from devel package - -* Thu May 20 2021 Jeff Olivier 1.3.0-101-2 -- Remove client libs from common package - -* Wed May 19 2021 Johann Lombardi 1.3.101-1 -- Version bump to 1.3.101 for 2.0 test build 1 - -* Fri May 07 2021 Brian J. Murrell 1.3.0-16 -- Enable debuginfo package building on SUSE platforms - -* Thu May 06 2021 Brian J. Murrell 1.3.0-15 -- Update to build on EL8 - -* Wed May 05 2021 Brian J. Murrell 1.3.0-14 -- Package /etc/daos/certs in main/common package so that both server - and client get it created - -* Wed Apr 21 2021 Tom Nabarro - 1.3.0-13 -- Relax ipmctl version requirement on leap15 as we have runtime checks - -* Fri Apr 16 2021 Mohamad Chaarawi - 1.3.0-12 -- remove dfuse_hl - -* Wed Apr 14 2021 Jeff Olivier - 1.3.0-11 -- Remove storage_estimator and io_conf from client packages to remove - any client side dependence on bio and vos (and and PMDK/SPDK) - -* Mon Apr 12 2021 Dalton A. Bohning - 1.3.0-10 -- Add attr to the test dependencies - -* Tue Apr 06 2021 Kris Jacque 1.3.0-9 -- Add package for daos_firmware helper binary - -* Fri Apr 02 2021 Jeff Olivier 1.3.0-8 -- Remove unused readline-devel - -* Thu Apr 01 2021 Brian J. Murrell 1.3.0-7 -- Update argobots to 1.1 - -* Tue Mar 30 2021 Maureen Jean 1.3.0-6 -- Change pydaos_shim_3 to pydaos_shim - -* Mon Mar 29 2021 Brian J. Murrell - 1.3.0-5 -- Move libdts.so to the daos-tests subpackage - -* Tue Mar 23 2021 Alexander Oganezov 1.3.0-4 -- Update libfabric to v1.12.0 -- Disable grdcopy/gdrapi linkage in libfabric - - -* Thu Mar 18 2021 Maureen Jean 1.3.0-3 -- Update to python3 - -* Thu Feb 25 2021 Li Wei 1.3.0-2 -- Require raft-devel 0.7.3 that fixes an unstable leadership problem caused by - removed replicas as well as some Coverity issues - -* Wed Feb 24 2021 Brian J. Murrell - 1.3.0-1 -- Version bump up to 1.3.0 - -* Mon Feb 22 2021 Brian J. Murrell 1.1.3-3 -- Remove all *-devel Requires from daos-devel as none of those are - actually necessary to build libdaos clients - -* Tue Feb 16 2021 Alexander Oganezov 1.1.3-2 -- Update libfabric to v1.12.0rc1 - -* Wed Feb 10 2021 Johann Lombardi 1.1.3-1 -- Version bump up to 1.1.3 - -* Tue Feb 9 2021 Vish Venkatesan 1.1.2.1-11 -- Add new pmem specific version of DAOS common library - -* Fri Feb 5 2021 Saurabh Tandan 1.1.2.1-10 -- Added dbench as requirement for test package. - -* Wed Feb 3 2021 Hua Kuang 1.1.2.1-9 -- Changed License to BSD-2-Clause-Patent - -* Wed Feb 03 2021 Brian J. Murrell - 1.1.2-8 -- Update minimum required libfabric to 1.11.1 - -* Thu Jan 28 2021 Phillip Henderson 1.1.2.1-7 -- Change ownership and permissions for the /etc/daos/certs directory. - -* Sat Jan 23 2021 Alexander Oganezov 1.1.2.1-6 -- Update to mercury v2.0.1rc1 - -* Fri Jan 22 2021 Michael MacDonald 1.1.2.1-5 -- Install daos_metrics utility to %%{_bindir} - -* Wed Jan 20 2021 Kenneth Cain 1.1.2.1-4 -- Version update for API major version 1, libdaos.so.1 (1.0.0) - -* Fri Jan 15 2021 Michael Hennecke 1.1.2.1-3 -- Harmonize daos_server and daos_agent groups. - -* Tue Dec 15 2020 Ashley Pittman 1.1.2.1-2 -- Combine the two memcheck suppressions files. - -* Wed Dec 09 2020 Johann Lombardi 1.1.2.1-1 -- Version bump up to 1.1.2.1 - -* Fri Dec 04 2020 Li Wei 1.1.2-3 -- Require raft-devel 0.7.1 that fixes recent Coverity issues - -* Wed Dec 02 2020 Maureen Jean - 1.1.2-2 -- define scons_args to be BUILD_TYPE= -- the scons default is BUILD_TYPE=release -- BUILD_TYPE=release will disable fault injection in build - -* Tue Dec 01 2020 Brian J. Murrell - 1.1.2-1 -- Version bump up to 1.1.2 - -* Tue Nov 17 2020 Li Wei 1.1.1-8 -- Require raft-devel 0.7.0 that changes log indices and terms to 63-bit - -* Wed Nov 11 2020 Tom Nabarro 1.1.1-7 -- Add version validation for runtime daos_server ipmctl requirement to avoid - potential corruption of PMMs when setting PMem goal, issue fixed in - https://github.com/intel/ipmctl/commit/9e3898cb15fa9eed3ef3e9de4488be1681d53ff4 - -* Thu Oct 29 2020 Jonathan Martinez Montes 1.1.1-6 -- Restore obj_ctl utility - -* Wed Oct 28 2020 Brian J. Murrell - 1.1.1-5 -- Use %%autosetup -- Only use systemd_requires if it exists -- Obsoletes: cart now that it's included in daos - -* Sat Oct 24 2020 Maureen Jean 1.1.1-4 -- Add daos.conf to the daos package to resolve the path to libbio.so - -* Tue Oct 13 2020 Jonathan Martinez Montes 1.1.1-3 -- Remove obj_ctl from Tests RPM package -- Add libdts.so shared library that is used by daos_perf, daos_racer and - the daos utility. - -* Tue Oct 13 2020 Amanda Justiniano 1.1.1-3 -- Add lbzip2 requirement to the daos-tests package - -* Tue Oct 13 2020 Michael MacDonald 1.1.1-2 -- Create unprivileged user for daos_agent - -* Mon Oct 12 2020 Johann Lombardi 1.1.1-1 -- Version bump up to 1.1.1 - -* Sat Oct 03 2020 Michael MacDonald 1.1.0-34 -- Add go-race to BuildRequires on OpenSUSE Leap - -* Wed Sep 16 2020 Alexander Oganezov 1.1.0-33 -- Update OFI to v1.11.0 - -* Mon Aug 17 2020 Michael MacDonald 1.1.0-32 -- Install completion script in /etc/bash_completion.d - -* Wed Aug 05 2020 Brian J. Murrell - 1.1.0-31 -- Change fuse requirement to fuse3 -- Use Lmod for MPI module loading -- Remove unneeded (and un-distro gated) Requires: json-c - -* Wed Jul 29 2020 Jonathan Martinez Montes - 1.1.0-30 -- Add the daos_storage_estimator.py tool. It merges the functionality of the - former tools vos_size, vos_size.py, vos_size_dfs_sample.py and parse_csv.py. - -* Wed Jul 29 2020 Jeffrey V Olivier - 1.1.0-29 -- Revert prior changes from version 28 - -* Mon Jul 13 2020 Brian J. Murrell - 1.1.0-28 -- Change fuse requirement to fuse3 -- Use Lmod for MPI module loading - -* Tue Jul 7 2020 Alexander A Oganezov - 1.1.0-27 -- Update to mercury release 2.0.0~rc1-1 - -* Sun Jun 28 2020 Jonathan Martinez Montes - 1.1.0-26 -- Add the vos_size_dfs_sample.py tool. It is used to generate dynamically - the vos_dfs_sample.yaml file using the real DFS super block data. - -* Tue Jun 23 2020 Jeff Olivier - 1.1.0-25 -- Add -no-rpath option and use it for rpm build rather than modifying - SCons files in place - -* Tue Jun 16 2020 Jeff Olivier - 1.1.0-24 -- Modify RPATH removal snippet to replace line with pass as some lines - can't be removed without breaking the code - -* Fri Jun 05 2020 Ryon Jensen - 1.1.0-23 -- Add libisa-l_crypto dependency - -* Fri Jun 05 2020 Tom Nabarro - 1.1.0-22 -- Change server systemd run-as user to daos_server in unit file - -* Thu Jun 04 2020 Hua Kuang - 1.1.0-21 -- Remove dmg_old from DAOS RPM package - -* Thu May 28 2020 Tom Nabarro - 1.1.0-20 -- Create daos group to run as in systemd unit file - -* Tue May 26 2020 Brian J. Murrell - 1.1.0-19 -- Enable parallel building with _smp_mflags - -* Fri May 15 2020 Kenneth Cain - 1.1.0-18 -- Require raft-devel >= 0.6.0 that adds new API raft_election_start() - -* Thu May 14 2020 Brian J. Murrell - 1.1.0-17 -- Add cart-devel's Requires to daos-devel as they were forgotten - during the cart merge - -* Thu May 14 2020 Brian J. Murrell - 1.1.0-16 -- Fix fuse3-libs -> libfuse3 for SLES/Leap 15 - -* Thu Apr 30 2020 Brian J. Murrell - 1.1.0-15 -- Use new properly pre-release tagged mercury RPM - -* Thu Apr 30 2020 Brian J. Murrell - 1.1.0-14 -- Move fuse dependencies to the client subpackage - -* Mon Apr 27 2020 Michael MacDonald 1.1.0-13 -- Rename /etc/daos.yml -> /etc/daos_control.yml - -* Thu Apr 16 2020 Brian J. Murrell - 1.1.0-12 -- Use distro fuse - -* Fri Apr 10 2020 Alexander Oganezov - 1.1.0-11 -- Update to mercury 4871023 to pick na_ofi.c race condition fix for - "No route to host" errors. - -* Sun Apr 05 2020 Brian J. Murrell - 1.1.0-10 -- Clean up spdk dependencies - -* Mon Mar 30 2020 Tom Nabarro - 1.1.0-9 -- Set version of spdk to < v21, > v19 - -* Fri Mar 27 2020 David Quigley - 1.1.0-8 -- add daos and dmg man pages to the daos-client files list - -* Thu Mar 26 2020 Michael MacDonald 1.1.0-7 -- Add systemd scriptlets for managing daos_server/daos_agent services - -* Thu Mar 26 2020 Alexander Oganeozv - 1.1.0-6 -- Update ofi to 62f6c937601776dac8a1f97c8bb1b1a6acfbc3c0 - -* Tue Mar 24 2020 Jeffrey V. Olivier - 1.1.0-5 -- Remove cart as an external dependence - -* Mon Mar 23 2020 Jeffrey V. Olivier - 1.1.0-4 -- Remove scons_local as dependency - -* Tue Mar 03 2020 Brian J. Murrell - 1.1.0-3 -- Bump up go minimum version to 1.12 - -* Thu Feb 20 2020 Brian J. Murrell - 1.1.0-2 -- daos-server requires daos-client (same version) - -* Fri Feb 14 2020 Brian J. Murrell - 1.1.0-1 -- Version bump up to 1.1.0 - -* Wed Feb 12 2020 Brian J. Murrell - 0.9.0-2 -- Remove undefine _missing_build_ids_terminate_build - -* Thu Feb 06 2020 Johann Lombardi - 0.9.0-1 -- Version bump up to 0.9.0 - -* Sat Jan 18 2020 Jeff Olivier - 0.8.0-3 -- Fixing a few warnings in the RPM spec file - -* Fri Dec 27 2019 Jeff Olivier - 0.8.0-2 -- Remove openmpi, pmix, and hwloc builds, use hwloc and openmpi packages - -* Tue Dec 17 2019 Johann Lombardi - 0.8.0-1 -- Version bump up to 0.8.0 - -* Thu Dec 05 2019 Johann Lombardi - 0.7.0-1 -- Version bump up to 0.7.0 - -* Tue Nov 19 2019 Tom Nabarro 0.6.0-15 -- Temporarily unconstrain max. version of spdk - -* Wed Nov 06 2019 Brian J. Murrell 0.6.0-14 -- Constrain max. version of spdk - -* Wed Nov 06 2019 Brian J. Murrell 0.6.0-13 -- Use new cart with R: mercury to < 1.0.1-20 due to incompatibility - -* Wed Nov 06 2019 Michael MacDonald 0.6.0-12 -- Add daos_admin privileged helper for daos_server - -* Fri Oct 25 2019 Brian J. Murrell 0.6.0-11 -- Handle differences in Leap 15 Python packaging - -* Wed Oct 23 2019 Brian J. Murrell 0.6.0-9 -- Update BR: libisal-devel for Leap - -* Mon Oct 07 2019 Brian J. Murrell 0.6.0-8 -- Use BR: cart-devel-%%{cart_sha1} if available -- Remove cart's BRs as it's -devel Requires them now - -* Tue Oct 01 2019 Brian J. Murrell 0.6.0-7 -- Constrain cart BR to <= 1.0.0 - -* Sat Sep 21 2019 Brian J. Murrell -- Remove Requires: {argobots, cart} - - autodependencies should take care of these - -* Thu Sep 19 2019 Jeff Olivier -- Add valgrind-devel requirement for argobots change - -* Tue Sep 10 2019 Tom Nabarro -- Add requires ndctl as runtime dep for control plane. - -* Thu Aug 15 2019 David Quigley -- Add systemd unit files to packaging. - -* Thu Jul 25 2019 Brian J. Murrell -- Add git hash and commit count to release - -* Thu Jul 18 2019 David Quigley -- Add certificate generation files to packaging. - -* Tue Jul 09 2019 Johann Lombardi -- Version bump up to 0.6.0 - -* Fri Jun 21 2019 David Quigley -- Add daos_agent.yml to the list of packaged files - -* Thu Jun 13 2019 Brian J. Murrell -- move obj_ctl daos_gen_io_conf daos_run_io_conf to - daos-tests sub-package -- daos-server needs spdk-tools - -* Fri May 31 2019 Ken Cain -- Add new daos utility binary - -* Wed May 29 2019 Brian J. Murrell -- Version bump up to 0.5.0 -- Add Requires: libpsm_infinipath1 for SLES 12.3 - -* Tue May 07 2019 Brian J. Murrell -- Move some files around among the sub-packages - -* Mon May 06 2019 Brian J. Murrell -- Only BR fio - - fio-{devel,src} is not needed - -* Wed Apr 03 2019 Brian J. Murrell -- initial package From b2e90ca74d1a56a1b687a6f466f73e1db9d4a2a1 Mon Sep 17 00:00:00 2001 From: Jerome Soumagne Date: Mon, 10 Nov 2025 13:18:35 -0600 Subject: [PATCH 004/253] DAOS-18187 build: fix scons libfabric version check (#17088) Use 1.20 as min required version Signed-off-by: Jerome Soumagne --- site_scons/components/__init__.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/site_scons/components/__init__.py b/site_scons/components/__init__.py index 18adfca2f1f..fdfd0739f9e 100644 --- a/site_scons/components/__init__.py +++ b/site_scons/components/__init__.py @@ -95,10 +95,12 @@ def check(reqs, name, built_str, installed_str=""): def ofi_config(config): """Check ofi version""" if not GetOption('silent'): - print('Checking for libfabric > 1.11...', end=' ') + print('Checking for libfabric >= 1.20...', end=' ') code = """#include -_Static_assert(FI_MAJOR_VERSION == 1 && FI_MINOR_VERSION >= 11, - "libfabric must be >= 1.11");""" +_Static_assert(FI_VERSION_GE( + FI_VERSION(FI_MAJOR_VERSION, FI_MINOR_VERSION), + FI_VERSION(1, 20)), + "libfabric must be >= 1.20");""" rc = config.TryCompile(code, ".c") if not GetOption('silent'): print('yes' if rc else 'no') From 64b05b2a40895c64d69f289650e40949b6e5b417 Mon Sep 17 00:00:00 2001 From: Ken Cain Date: Mon, 10 Nov 2025 14:36:42 -0500 Subject: [PATCH 005/253] DAOS-17358 rebuild: rebuilding test in obj_inflight_io_check (#16970) This change removes the check of incoming client IO with the ORF_REBUILDING_IO flag set. Before the change, the intent of the check was to temporarily disallow such IO while a rebuild was starting, when the PS leader engine first distributes a fence / epoch value to all engines. The check caused the IO to get an error -DER_UPDATE_AGAIN, causing the client to retry. With forthcoming features like interactive/explicit rebuild control, the rebuild *stop* case is negatively affected by this test, causing all subsequent client IO after rebuild stops to block indefinitely, until the rebuild is restarted by the administrator. Other intermittent test failures, unrelated to the new feature, are also being seen. Discussion with other developers have suggested that this test in the engine code is not required. Signed-off-by: Kenneth Cain --- src/object/srv_obj.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/src/object/srv_obj.c b/src/object/srv_obj.c index 806e904cab4..f94a0f9db21 100644 --- a/src/object/srv_obj.c +++ b/src/object/srv_obj.c @@ -2458,19 +2458,6 @@ obj_inflight_io_check(struct ds_cont_child *child, uint32_t opc, return -DER_UPDATE_AGAIN; } - /* All I/O during rebuilding, needs to wait for the rebuild fence to - * be generated (see rebuild_prepare_one()), which will create a boundary - * for rebuild, so the data after boundary(epoch) should not be rebuilt, - * which otherwise might be written duplicately, which might cause - * the failure in VOS. - */ - if ((flags & ORF_REBUILDING_IO) && - (is_pool_rebuild_allowed(child->sc_pool->spc_pool, false) && - child->sc_pool->spc_rebuild_fence == 0)) { - D_ERROR("rebuilding "DF_UUID" retry.\n", DP_UUID(child->sc_pool->spc_uuid)); - return -DER_UPDATE_AGAIN; - } - return 0; } From 9e8136d04892bc412822543c0285cdbcac93e914 Mon Sep 17 00:00:00 2001 From: Dalton Bohning Date: Mon, 10 Nov 2025 13:13:43 -0800 Subject: [PATCH 006/253] Revert "SRE-3440 build: Remove stage with intel compiler" (#17101) This reverts commit d059e151d64021f20a8d118d5fb481c60a2bc99d which was accidently pushed directly to master. To be properly merged by #17057 Also adds back an import removed by #17063 Signed-off-by: Dalton Bohning --- Jenkinsfile | 37 +++++++++++++++++++++++++++++++++ site_scons/prereq_tools/base.py | 30 ++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) diff --git a/Jenkinsfile b/Jenkinsfile index b53f8f36315..cf92efd1610 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -675,6 +675,43 @@ pipeline { } } } + stage('Build on Leap 15.5 with Intel-C and TARGET_PREFIX') { + when { + beforeAgent true + expression { !skip_build_stage('leap15', 'icc') } + } + agent { + dockerfile { + filename 'utils/docker/Dockerfile.leap.15' + label 'docker_runner' + additionalBuildArgs dockerBuildArgs(repo_type: 'stable', + parallel_build: true, + deps_build: true) + + " -t ${sanitized_JOB_NAME()}-leap15" + + ' --build-arg DAOS_PACKAGES_BUILD=no ' + + ' --build-arg COMPILER=icc' + } + } + steps { + job_step_update( + sconsBuild(parallel_build: true, + scons_args: sconsFaultsArgs() + + ' PREFIX=/opt/daos TARGET_TYPE=release', + build_deps: 'no')) + } + post { + unsuccessful { + sh '''if [ -f config.log ]; then + mv config.log config.log-leap15-intelc + fi''' + archiveArtifacts artifacts: 'config.log-leap15-intelc', + allowEmptyArchive: true + } + cleanup { + job_status_update() + } + } + } } } stage('Unit Tests') { diff --git a/site_scons/prereq_tools/base.py b/site_scons/prereq_tools/base.py index 207bdbc0f8b..00d00d2b1fb 100644 --- a/site_scons/prereq_tools/base.py +++ b/site_scons/prereq_tools/base.py @@ -34,6 +34,7 @@ import traceback from copy import deepcopy +from SCons.Errors import InternalError from SCons.Script import BUILD_TARGETS, Dir, Exit, GetOption, SetOption, WhereIs from SCons.Variables import BoolVariable, EnumVariable, ListVariable, PathVariable @@ -637,6 +638,33 @@ def _setup_build_type(self): return self.__env.subst("$BUILD_ROOT/$BUILD_TYPE/$COMPILER") + def _setup_intelc(self): + """Setup environment to use Intel compilers""" + try: + env = self.__env.Clone(tools=['doneapi']) + self._has_icx = True + except InternalError: + print("No oneapi compiler, trying legacy") + env = self.__env.Clone(tools=['intelc']) + self.__env["ENV"]["PATH"] = env["ENV"]["PATH"] + self.__env["ENV"]["LD_LIBRARY_PATH"] = env["ENV"]["LD_LIBRARY_PATH"] + self.__env.Replace(AR=env.get("AR")) + self.__env.Replace(ENV=env.get("ENV")) + self.__env.Replace(CC=env.get("CC")) + self.__env.Replace(CXX=env.get("CXX")) + version = env.get("INTEL_C_COMPILER_VERSION") + self.__env.Replace(INTEL_C_COMPILER_VERSION=version) + self.__env.Replace(LINK=env.get("LINK")) + # disable the warning about Cilk since we don't use it + if not self._has_icx: + self.__env.AppendUnique(LINKFLAGS=["-static-intel", + "-diag-disable=10237"]) + self.__env.AppendUnique(CCFLAGS=["-diag-disable:2282", + "-diag-disable:188", + "-diag-disable:2405", + "-diag-disable:1338"]) + return {'CC': env.get("CC"), "CXX": env.get("CXX")} + def _setup_compiler(self): """Setup the compiler to use""" compiler_map = {'gcc': {'CC': 'gcc', 'CXX': 'g++'}, @@ -650,6 +678,8 @@ def _setup_compiler(self): return compiler = self.__env.get('COMPILER') + if compiler == 'icc': + compiler_map['icc'] = self._setup_intelc() if self.__env.get('WARNING_LEVEL') == 'error': if compiler == 'icc' and not self._has_icx: From 174f3f5e1d22c866ec8ee2dcb0363905dd8b32f7 Mon Sep 17 00:00:00 2001 From: Ding Date: Mon, 10 Nov 2025 17:46:46 -0500 Subject: [PATCH 007/253] DAOS-18155 test: Update pool/create.py with retry (#17042) Update ftest/pool/create.py to try pool create 3 times Signed-off-by: Ding-Hwa Ho --- src/tests/ftest/pool/create.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/tests/ftest/pool/create.py b/src/tests/ftest/pool/create.py index 433dd88110d..89bd8a2bd75 100644 --- a/src/tests/ftest/pool/create.py +++ b/src/tests/ftest/pool/create.py @@ -1,5 +1,6 @@ """ (C) Copyright 2021-2023 Intel Corporation. +(C) Copyright 2025 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -116,11 +117,19 @@ def test_create_no_space_loop(self): "existing pool on one server consuming the required space.") # Create the third of three pools which should succeed. - pools[2].create() + attempts = 1 + while attempts <= 3: + pools[2].create() + if pools[2].dmg.result.exit_status == 0: + break + self.log.info( + "Loop %s: Pool create on ranks %s failed in %s/3 attempts", + index, pools[2].target_list.value, attempts) + attempts += 1 if pools[2].dmg.result.exit_status != 0: self.fail( "Creating a large capacity pool that spans across all but the first server " - "should succeed.") + f"should succeed - failed after {attempts} loops.") # Destroy the third of three pools so it can be created again in the next loop pools[2].destroy() From 79bf2ac15756635cc796017695529db6bc2c4e3a Mon Sep 17 00:00:00 2001 From: Li Wei Date: Tue, 11 Nov 2025 08:58:41 +0900 Subject: [PATCH 008/253] DAOS-18162 mgmt: Fix pool create error handling (#17043) It has been observed that, during a dmg pool create command, the ds_mgmt_pool_query call in pool_create_fill_resp timed out, and ds_mgmt_drpc_pool_create returned the error to the MS without cleaning up the newly-created pool service replicas. The dmg pool create command retried with the same pool UUID, but this time a different set of PS replicas were chosen and created "on top of" the PS replicas created in the first attempt. As a result, some of the PS replicas had been bootstrapped with the earlier set of replicas, whereas some with the later set of replicas---an inconsistent Raft cluster right from the beginning. Later, such inconsistency was ignored (for unknown reasons) by rdb_raft_update_node, and led to assertion failures in raft. This patch works around the problem like this: - Fix ds_mgmt_drpc_pool_create to clean up the pool if pool_create_fill_resp returns an error. Shorten the timeout of the query call, for the PS is just created by the MS and shouldn't take five minutes respond to the query. - Tighten the check in ds_rsvc_start to refuse to create and bootstrap "on top of" an existing replica, just to be safe. - Fix rdb_raft_update_node to report unexpected replica states and abort, rather than silently ignoring it. This should prevent the assertion failure from being reached. Signed-off-by: Li Wei --- src/include/daos/common.h | 2 ++ src/mgmt/srv_drpc.c | 20 +++++++++--- src/mgmt/srv_internal.h | 11 ++++--- src/mgmt/srv_pool.c | 9 ++--- src/mgmt/tests/mocks.c | 4 +-- src/rdb/rdb_raft.c | 12 +++---- src/rsvc/srv.c | 11 +++++-- src/tests/suite/daos_mgmt.c | 65 ++++++++++++++++++++++++++----------- 8 files changed, 92 insertions(+), 42 deletions(-) diff --git a/src/include/daos/common.h b/src/include/daos/common.h index da4e41e24ab..f963d77aae6 100644 --- a/src/include/daos/common.h +++ b/src/include/daos/common.h @@ -928,6 +928,8 @@ enum { #define DAOS_CHK_ENGINE_DEATH (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0xb9) #define DAOS_CHK_VERIFY_CONT_SHARDS (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0xba) +#define DAOS_MGMT_FAIL_CREATE_QUERY (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0xe0) + /* WAL && checkpoint failure inject */ #define DAOS_WAL_NO_REPLAY (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0x100) #define DAOS_WAL_FAIL_REPLAY (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0x101) diff --git a/src/mgmt/srv_drpc.c b/src/mgmt/srv_drpc.c index 84424997cc8..568aa87755a 100644 --- a/src/mgmt/srv_drpc.c +++ b/src/mgmt/srv_drpc.c @@ -406,8 +406,11 @@ static int pool_create_fill_resp(Mgmt__PoolCreateResp *resp, uuid_t uuid, d_rank D_DEBUG(DB_MGMT, "%d service replicas\n", svc_ranks->rl_nr); - rc = ds_mgmt_pool_query(uuid, svc_ranks, &enabled_ranks, NULL, NULL, &pool_info, NULL, NULL, + rc = ds_mgmt_pool_query(uuid, svc_ranks, &enabled_ranks, NULL, NULL, + daos_getmtime_coarse() + 2 * 60 * 1000, &pool_info, NULL, NULL, &mem_file_bytes); + if (DAOS_FAIL_CHECK(DAOS_MGMT_FAIL_CREATE_QUERY)) + rc = -DER_TIMEDOUT; if (rc != 0) { D_ERROR("Failed to query created pool: rc=%d\n", rc); D_GOTO(out, rc); @@ -470,7 +473,7 @@ ds_mgmt_drpc_pool_create(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) return; } - D_INFO("Received request to create pool on %zu ranks.\n", req->n_ranks); + D_INFO("Received request to create pool %s on %zu ranks.\n", req->uuid, req->n_ranks); if (req->n_tier_bytes != DAOS_MEDIA_MAX) D_GOTO(out, rc = -DER_INVAL); @@ -534,6 +537,15 @@ ds_mgmt_drpc_pool_create(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) rc = pool_create_fill_resp(&resp, pool_uuid, svc); d_rank_list_free(svc); + if (rc != 0) { + int rc_tmp; + + DL_ERROR(rc, DF_UUID ": failed to fill pool create response", DP_UUID(pool_uuid)); + rc_tmp = ds_mgmt_destroy_pool(pool_uuid, targets); + if (rc_tmp != 0) + DL_ERROR(rc_tmp, DF_UUID ": failed to clean up pool", DP_UUID(pool_uuid)); + goto out; + } out: resp.status = rc; @@ -1807,8 +1819,8 @@ ds_mgmt_drpc_pool_query(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) pool_info.pi_bits = req->query_mask; rc = ds_mgmt_pool_query(uuid, svc_ranks, &enabled_ranks, &disabled_ranks, &dead_ranks, - &pool_info, &resp.pool_layout_ver, &resp.upgrade_layout_ver, - &resp.mem_file_bytes); + mgmt_ps_call_deadline(), &pool_info, &resp.pool_layout_ver, + &resp.upgrade_layout_ver, &resp.mem_file_bytes); if (rc != 0) { DL_ERROR(rc, DF_UUID ": Failed to query the pool", DP_UUID(uuid)); D_GOTO(error, rc); diff --git a/src/mgmt/srv_internal.h b/src/mgmt/srv_internal.h index 5bc977eae12..511cae1b7c4 100644 --- a/src/mgmt/srv_internal.h +++ b/src/mgmt/srv_internal.h @@ -88,7 +88,8 @@ int ds_mgmt_create_pool(uuid_t pool_uuid, const char *group, d_rank_list_t *targets, size_t scm_size, size_t nvme_size, size_t meta_size, daos_prop_t *prop, d_rank_list_t **svcp, int domains_nr, uint32_t *domains); -int ds_mgmt_destroy_pool(uuid_t pool_uuid, d_rank_list_t *svc_ranks); +int + ds_mgmt_destroy_pool(uuid_t pool_uuid, d_rank_list_t *ranks); int ds_mgmt_evict_pool(uuid_t pool_uuid, d_rank_list_t *svc_ranks, uuid_t *handles, size_t n_handles, uint32_t destroy, uint32_t force_destroy, char *machine, uint32_t *count); @@ -124,10 +125,10 @@ int ds_mgmt_pool_list_cont(uuid_t uuid, d_rank_list_t *svc_ranks, struct daos_pool_cont_info **containers, uint64_t *ncontainers); int - ds_mgmt_pool_query(uuid_t pool_uuid, d_rank_list_t *svc_ranks, d_rank_list_t **enabled_ranks, - d_rank_list_t **disabled_ranks, d_rank_list_t **dead_ranks, - daos_pool_info_t *pool_info, uint32_t *pool_layout_ver, - uint32_t *upgrade_layout_ver, uint64_t *mem_file_bytes); +ds_mgmt_pool_query(uuid_t pool_uuid, d_rank_list_t *svc_ranks, d_rank_list_t **enabled_ranks, + d_rank_list_t **disabled_ranks, d_rank_list_t **dead_ranks, uint64_t deadline, + daos_pool_info_t *pool_info, uint32_t *pool_layout_ver, + uint32_t *upgrade_layout_ver, uint64_t *mem_file_bytes); int ds_mgmt_pool_query_targets(uuid_t pool_uuid, d_rank_list_t *svc_ranks, d_rank_t rank, d_rank_list_t *tgts, daos_target_info_t **infos, diff --git a/src/mgmt/srv_pool.c b/src/mgmt/srv_pool.c index 9d7ef2d81ae..12030e5b883 100644 --- a/src/mgmt/srv_pool.c +++ b/src/mgmt/srv_pool.c @@ -397,6 +397,7 @@ ds_mgmt_pool_list_cont(uuid_t uuid, d_rank_list_t *svc_ranks, * \param[out] enabled_ranks Optional, returned storage ranks with enabled targets. * \param[out] disabled_ranks Optional, returned storage ranks with disabled targets. * \param[out] dead_ranks Optional, returned storage ranks marked DEAD by SWIM. + * \param[in] deadline Unix time deadline in milliseconds * \param[in][out] pool_info Query results * \param[in][out] pool_layout_ver Pool global version * \param[in][out] upgrade_layout_ver Latest pool global version this pool might be upgraded @@ -407,7 +408,7 @@ ds_mgmt_pool_list_cont(uuid_t uuid, d_rank_list_t *svc_ranks, */ int ds_mgmt_pool_query(uuid_t pool_uuid, d_rank_list_t *svc_ranks, d_rank_list_t **enabled_ranks, - d_rank_list_t **disabled_ranks, d_rank_list_t **dead_ranks, + d_rank_list_t **disabled_ranks, d_rank_list_t **dead_ranks, uint64_t deadline, daos_pool_info_t *pool_info, uint32_t *pool_layout_ver, uint32_t *upgrade_layout_ver, uint64_t *mem_file_bytes) { @@ -418,9 +419,9 @@ ds_mgmt_pool_query(uuid_t pool_uuid, d_rank_list_t *svc_ranks, d_rank_list_t **e D_DEBUG(DB_MGMT, "Querying pool "DF_UUID"\n", DP_UUID(pool_uuid)); - return dsc_pool_svc_query(pool_uuid, svc_ranks, mgmt_ps_call_deadline(), enabled_ranks, - disabled_ranks, dead_ranks, pool_info, pool_layout_ver, - upgrade_layout_ver, mem_file_bytes); + return dsc_pool_svc_query(pool_uuid, svc_ranks, deadline, enabled_ranks, disabled_ranks, + dead_ranks, pool_info, pool_layout_ver, upgrade_layout_ver, + mem_file_bytes); } /** diff --git a/src/mgmt/tests/mocks.c b/src/mgmt/tests/mocks.c index fa593c84f92..5b30c71f7c1 100644 --- a/src/mgmt/tests/mocks.c +++ b/src/mgmt/tests/mocks.c @@ -287,7 +287,7 @@ d_rank_list_t *ds_mgmt_pool_query_dead_ranks_out; int ds_mgmt_pool_query(uuid_t pool_uuid, d_rank_list_t *svc_ranks, d_rank_list_t **enabled_ranks, - d_rank_list_t **disabled_ranks, d_rank_list_t **dead_ranks, + d_rank_list_t **disabled_ranks, d_rank_list_t **dead_ranks, uint64_t deadline, daos_pool_info_t *pool_info, uint32_t *pool_layout_ver, uint32_t *upgrade_layout_ver, uint64_t *mem_file_bytes) { @@ -555,7 +555,7 @@ ds_mgmt_create_pool(uuid_t pool_uuid, const char *group, d_rank_list_t *targets, } int -ds_mgmt_destroy_pool(uuid_t pool_uuid, d_rank_list_t *svc_ranks) +ds_mgmt_destroy_pool(uuid_t pool_uuid, d_rank_list_t *ranks) { return 0; } diff --git a/src/rdb/rdb_raft.c b/src/rdb/rdb_raft.c index e7420e5bc51..cd778108c58 100644 --- a/src/rdb/rdb_raft.c +++ b/src/rdb/rdb_raft.c @@ -1085,14 +1085,14 @@ rdb_raft_update_node(struct rdb *db, uint64_t index, raft_entry_t *entry, rdb_vo found = d_rank_list_find(replicas, rank, NULL); if (found && entry->type == RAFT_LOGTYPE_ADD_NODE) { - D_WARN(DF_DB": %s: rank %u already exists\n", DP_DB(db), - rdb_raft_entry_type_str(entry->type), rank); - rc = 0; + D_ERROR(DF_DB ": %s: rank %u already exists\n", DP_DB(db), + rdb_raft_entry_type_str(entry->type), rank); + rc = -DER_INVAL; goto out_replicas; } else if (!found && entry->type == RAFT_LOGTYPE_REMOVE_NODE) { - D_WARN(DF_DB": %s: rank %u does not exist\n", DP_DB(db), - rdb_raft_entry_type_str(entry->type), rank); - rc = 0; + D_ERROR(DF_DB ": %s: rank %u does not exist\n", DP_DB(db), + rdb_raft_entry_type_str(entry->type), rank); + rc = -DER_INVAL; goto out_replicas; } diff --git a/src/rsvc/srv.c b/src/rsvc/srv.c index af0324c10d9..14a5321424e 100644 --- a/src/rsvc/srv.c +++ b/src/rsvc/srv.c @@ -977,8 +977,15 @@ ds_rsvc_start(enum ds_rsvc_class_id class, d_iov_t *id, uuid_t db_uuid, uint64_t entry = d_hash_rec_find(&rsvc_hash, id->iov_buf, id->iov_len); if (entry != NULL) { svc = rsvc_obj(entry); - D_DEBUG(DB_MD, "%s: found: stop=%d\n", svc->s_name, svc->s_stop); - if (mode == DS_RSVC_DICTATE && !svc->s_stop) { + D_DEBUG(DB_MD, "%s: found: stop=%d mode=%s replicas=%p\n", svc->s_name, svc->s_stop, + start_mode_str(mode), replicas); + if (mode == DS_RSVC_CREATE && replicas != NULL) { + D_ERROR("%s: creating and bootstrapping existing replica not allowed\n", + svc->s_name); + rc = -DER_EXIST; + ds_rsvc_put(svc); + goto out; + } else if (mode == DS_RSVC_DICTATE && !svc->s_stop) { /* * If we need to dictate, and the service is not * stopping, then stop it, which should not fail in diff --git a/src/tests/suite/daos_mgmt.c b/src/tests/suite/daos_mgmt.c index 5f0cf5d8079..49c520fa53b 100644 --- a/src/tests/suite/daos_mgmt.c +++ b/src/tests/suite/daos_mgmt.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2016-2024 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -527,26 +528,52 @@ pool_destroy_cancel_rfcheck(void **state) test_set_engine_fail_loc(arg, CRT_NO_RANK, 0); } +static void +pool_create_query_fail(void **state) +{ + test_arg_t *arg = *state; + uuid_t uuid; + int rc; + + FAULT_INJECTION_REQUIRED(); + + if (arg->myrank != 0) + return; + + test_set_engine_fail_loc(arg, CRT_NO_RANK, DAOS_MGMT_FAIL_CREATE_QUERY | DAOS_FAIL_ONCE); + + print_message("creating pool synchronously ... "); + rc = dmg_pool_create(dmg_config_file, geteuid(), getegid(), arg->group, NULL /* tgts */, + 256 * 1024 * 1024 /* minimal size */, 0 /* nvme size */, + NULL /* prop */, arg->pool.svc, uuid); + assert_rc_equal(rc, 0); + print_message("success uuid = " DF_UUIDF "\n", DP_UUID(uuid)); + + test_set_engine_fail_loc(arg, CRT_NO_RANK, 0); + + print_message("destroying pool synchronously ... "); + rc = dmg_pool_destroy(dmg_config_file, uuid, arg->group, 1); + assert_rc_equal(rc, 0); + print_message("success\n"); +} + static const struct CMUnitTest tests[] = { - { "MGMT1: create/destroy pool on all tgts", - pool_create_all, async_disable, test_case_teardown}, - { "MGMT2: create/destroy pool on all tgts (async)", - pool_create_all, async_enable, test_case_teardown}, - { "MGMT3: list-pools with no pools in sys", - list_pools_test, setup_zeropools, teardown_pools}, - { "MGMT4: list-pools with multiple pools in sys", - list_pools_test, setup_manypools, teardown_pools}, - { "MGMT5: retry MGMT_POOL_{CREATE,DESETROY} upon errors", - pool_create_and_destroy_retry, async_disable, test_case_teardown}, - { "MGMT6: daos_mgmt_get_sys_info", - get_sys_info_test, async_disable, test_case_teardown}, - { "MGMT7: create: PS steps down from UP_EMPTY", - pool_create_steps_down_from_up_empty, async_disable, test_case_teardown}, - { "MGMT8: pool destroy disconnect all", - pool_destroy_disconnect_all, async_disable, test_case_teardown}, - { "MGMT9: pool destroy cancels rfcheck", - pool_destroy_cancel_rfcheck, NULL, test_case_teardown} -}; + {"MGMT1: create/destroy pool on all tgts", pool_create_all, async_disable, test_case_teardown}, + {"MGMT2: create/destroy pool on all tgts (async)", pool_create_all, async_enable, + test_case_teardown}, + {"MGMT3: list-pools with no pools in sys", list_pools_test, setup_zeropools, teardown_pools}, + {"MGMT4: list-pools with multiple pools in sys", list_pools_test, setup_manypools, + teardown_pools}, + {"MGMT5: retry MGMT_POOL_{CREATE,DESETROY} upon errors", pool_create_and_destroy_retry, + async_disable, test_case_teardown}, + {"MGMT6: daos_mgmt_get_sys_info", get_sys_info_test, async_disable, test_case_teardown}, + {"MGMT7: create: PS steps down from UP_EMPTY", pool_create_steps_down_from_up_empty, + async_disable, test_case_teardown}, + {"MGMT8: pool destroy disconnect all", pool_destroy_disconnect_all, async_disable, + test_case_teardown}, + {"MGMT9: pool destroy cancels rfcheck", pool_destroy_cancel_rfcheck, NULL, test_case_teardown}, + {"MGMT10: query in pool create fails", pool_create_query_fail, async_disable, + test_case_teardown}}; static int setup(void **state) From 7b6acff6fa5243400dc5675a40d4d702878c5929 Mon Sep 17 00:00:00 2001 From: Li Wei Date: Wed, 12 Nov 2025 09:16:47 +0900 Subject: [PATCH 009/253] DAOS-17802 pool: Remove a forgotten comment (#17109) Remove a note that I forgot to remove in the main fix. Signed-off-by: Li Wei --- src/pool/srv_pool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pool/srv_pool.c b/src/pool/srv_pool.c index bbf553ca740..8ff21acf226 100644 --- a/src/pool/srv_pool.c +++ b/src/pool/srv_pool.c @@ -7234,7 +7234,7 @@ log_unavailable_targets(struct pool_svc *svc, struct pool_map *map) if (doms[i].do_comp.co_status & PO_COMP_ST_DOWN) { D_ERROR(DF_UUID ": rank %u\n", DP_UUID(svc->ps_uuid), doms[i].do_comp.co_rank); - } else if (doms[i].do_comp.co_status & PO_COMP_ST_UPIN) { // XXX: ask Xuezhao + } else if (doms[i].do_comp.co_status & PO_COMP_ST_UPIN) { int j; for (j = 0; j < doms[i].do_target_nr; j++) From d15333d5444092099cb8a987675e1972a6c2c578 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Wed, 12 Nov 2025 16:34:51 +0800 Subject: [PATCH 010/253] DAOS-17928 rebuild: migrate object processing from system xstream to main xstream (#17031) To enhance object migration efficiency, we recommend processing OIDs directly in main xstreams rather than routing them through system xstreams. Currently, the workflow involves main xstreams scanning and gathering OIDs before distributing them to corresponding ranks' system xstreams for processing. However, this approach introduces significant overhead from B+ Tree operations. Signed-off-by: Wang Shilong --- src/include/daos_srv/object.h | 8 +- src/object/srv_internal.h | 19 +- src/object/srv_obj_migrate.c | 720 ++++++++++++++-------------------- src/rebuild/scan.c | 4 +- 4 files changed, 315 insertions(+), 436 deletions(-) diff --git a/src/include/daos_srv/object.h b/src/include/daos_srv/object.h index bc816d98c6e..9fd82b76d0d 100644 --- a/src/include/daos_srv/object.h +++ b/src/include/daos_srv/object.h @@ -87,10 +87,10 @@ ds_object_migrate_send(struct ds_pool *pool, uuid_t pool_hdl_uuid, uuid_t cont_u uint32_t new_gl_ver, unsigned int migrate_opc, uint64_t *enqueue_id, uint32_t *max_delay); int -ds_migrate_object(struct ds_pool *pool, uuid_t po_hdl, uuid_t co_hdl, uuid_t co_uuid, - uint32_t version, uint32_t generation, uint64_t max_eph, uint32_t opc, - daos_unit_oid_t *oids, daos_epoch_t *epochs, daos_epoch_t *punched_epochs, - unsigned int *shards, uint32_t count, unsigned int tgt_idx, uint32_t new_gl_ver); +ds_migrate_object(uuid_t pool_uuid, uuid_t po_hdl, uuid_t co_hdl, uuid_t co_uuid, uint32_t version, + uint32_t generation, uint64_t max_eph, uint32_t opc, daos_unit_oid_t *oids, + daos_epoch_t *epochs, daos_epoch_t *punched_epochs, unsigned int *shards, + uint32_t count, unsigned int tgt_idx, uint32_t new_gl_ver); void ds_migrate_stop(struct ds_pool *pool, uint32_t ver, unsigned int generation); diff --git a/src/object/srv_internal.h b/src/object/srv_internal.h index 8c0d92940ed..577f55ccf45 100644 --- a/src/object/srv_internal.h +++ b/src/object/srv_internal.h @@ -73,12 +73,8 @@ struct migrate_pool_tls { /* The ULT number on each target xstream, which actually refer * back to the item within mpt_obj/dkey_ult_cnts array. */ - ATOMIC uint32_t *mpt_tgt_obj_ult_cnt; - ATOMIC uint32_t *mpt_tgt_dkey_ult_cnt; - - /* ULT count array from all targets, obj: enumeration, dkey:fetch/update */ - ATOMIC uint32_t *mpt_obj_ult_cnts; - ATOMIC uint32_t *mpt_dkey_ult_cnts; + uint32_t mpt_tgt_obj_ult_cnt; + uint32_t mpt_tgt_dkey_ult_cnt; /* reference count for the structure */ uint64_t mpt_refcount; @@ -93,18 +89,13 @@ struct migrate_pool_tls { uint32_t mpt_inflight_max_ult; uint32_t mpt_opc; - ABT_cond mpt_init_cond; - ABT_mutex mpt_init_mutex; - /* The new layout version for upgrade job */ uint32_t mpt_new_layout_ver; /* migrate leader ULT */ - unsigned int mpt_ult_running:1, - mpt_init_tls:1, - mpt_fini:1, - mpt_reintegrating:1, /* incremental reint flag */ - mpt_post_process_started:1; /* reint post process started flag */ + unsigned int mpt_ult_running : 1, mpt_fini : 1, + mpt_reintegrating : 1, /* incremental reint flag */ + mpt_post_process_started : 1; /* reint post process started flag */ /* migration init error */ int mpt_init_err; diff --git a/src/object/srv_obj_migrate.c b/src/object/srv_obj_migrate.c index 1d936ad6557..a5200b9d9b2 100644 --- a/src/object/srv_obj_migrate.c +++ b/src/object/srv_obj_migrate.c @@ -383,10 +383,6 @@ migrate_pool_tls_destroy(struct migrate_pool_tls *tls) if (daos_handle_is_valid(tls->mpt_pool_hdl)) dsc_pool_close(tls->mpt_pool_hdl); - if (tls->mpt_obj_ult_cnts) - D_FREE(tls->mpt_obj_ult_cnts); - if (tls->mpt_dkey_ult_cnts) - D_FREE(tls->mpt_dkey_ult_cnts); d_list_del(&tls->mpt_list); D_DEBUG(DB_REBUILD, DF_RB ": TLS destroy\n", DP_RB_MPT(tls)); if (tls->mpt_pool) @@ -399,10 +395,6 @@ migrate_pool_tls_destroy(struct migrate_pool_tls *tls) ABT_cond_free(&tls->mpt_inflight_cond); if (tls->mpt_inflight_mutex) ABT_mutex_free(&tls->mpt_inflight_mutex); - if (tls->mpt_init_cond) - ABT_cond_free(&tls->mpt_init_cond); - if (tls->mpt_init_mutex) - ABT_mutex_free(&tls->mpt_init_mutex); if (daos_handle_is_valid(tls->mpt_root_hdl)) obj_tree_destroy(tls->mpt_root_hdl); if (daos_handle_is_valid(tls->mpt_migrated_root_hdl)) @@ -452,56 +444,32 @@ migrate_pool_tls_lookup(uuid_t pool_uuid, unsigned int ver, uint32_t gen) return found; } -#define MPT_CREATE_TGT_INLINE (32) -struct migrate_pool_tls_create_arg { - uuid_t pool_uuid; - uuid_t pool_hdl_uuid; - uuid_t co_hdl_uuid; - d_rank_list_t *svc_list; - uint8_t *tgt_status; - uint8_t tgt_status_inline[MPT_CREATE_TGT_INLINE]; - uint32_t *tgt_in_ver; - uint32_t tgt_in_ver_inline[MPT_CREATE_TGT_INLINE]; - ATOMIC uint32_t *obj_ult_cnts; - ATOMIC uint32_t *dkey_ult_cnts; - uint64_t max_eph; - unsigned int version; - unsigned int generation; - uint32_t opc; - uint32_t new_layout_ver; - uint32_t max_ult_cnt; -}; - -int -migrate_pool_tls_create_one(void *data) +static int +migrate_pool_tls_create(uuid_t pool_uuid, unsigned int version, unsigned int generation, + uuid_t pool_hdl_uuid, uuid_t co_hdl_uuid, uint64_t max_eph, + uint32_t new_layout_ver, uint32_t opc, struct migrate_pool_tls **p_tls, + d_rank_list_t *svc_list, uint8_t tgt_status, uint32_t tgt_in_ver) { - struct migrate_pool_tls_create_arg *arg = data; - struct obj_tls *tls = obj_tls_get(); - uint32_t tgt_id; - struct migrate_pool_tls *pool_tls; + uint32_t max_migrate_ult = MIGRATE_DEFAULT_MAX_ULT; + struct obj_tls *obj_tls = obj_tls_get(); + uint32_t tgt_id = dss_get_module_info()->dmi_tgt_id; + struct migrate_pool_tls *pool_tls = NULL; struct ds_pool_child *pool_child = NULL; int rc = 0; - pool_tls = migrate_pool_tls_lookup(arg->pool_uuid, arg->version, arg->generation); - if (pool_tls != NULL) { - /* Some one else already created, because collective function - * might yield xstream. - */ - migrate_pool_tls_put(pool_tls); - return 0; - } + d_getenv_uint(ENV_MIGRATE_ULT_CNT, &max_migrate_ult); + D_ASSERT(generation != (unsigned int)(-1)); - pool_child = ds_pool_child_lookup(arg->pool_uuid); + pool_child = ds_pool_child_lookup(pool_uuid); if (pool_child == NULL) { /* Local ds_pool_child isn't started yet, return a retry-able error */ if (dss_get_module_info()->dmi_xs_id != 0) { - D_INFO(DF_UUID ": Local VOS pool isn't ready yet.\n", - DP_UUID(arg->pool_uuid)); + D_INFO(DF_UUID ": Local VOS pool isn't ready yet.\n", DP_UUID(pool_uuid)); return -DER_STALE; } } else if (unlikely(pool_child->spc_no_storage)) { - D_DEBUG(DB_REBUILD, DF_UUID" "DF_UUID" lost pool shard, ver %d, skip.\n", - DP_UUID(arg->pool_uuid), DP_UUID(arg->pool_hdl_uuid), arg->version); + D_DEBUG(DB_REBUILD, DF_UUID " " DF_UUID " lost pool shard, ver %d, skip.\n", + DP_UUID(pool_uuid), DP_UUID(pool_hdl_uuid), version); D_GOTO(out, rc = 0); } @@ -525,63 +493,45 @@ migrate_pool_tls_create_one(void *data) if (rc != ABT_SUCCESS) D_GOTO(out, rc = dss_abterr2der(rc)); - uuid_copy(pool_tls->mpt_pool_uuid, arg->pool_uuid); - uuid_copy(pool_tls->mpt_poh_uuid, arg->pool_hdl_uuid); - uuid_copy(pool_tls->mpt_coh_uuid, arg->co_hdl_uuid); - pool_tls->mpt_version = arg->version; - pool_tls->mpt_generation = arg->generation; + uuid_copy(pool_tls->mpt_pool_uuid, pool_uuid); + uuid_copy(pool_tls->mpt_poh_uuid, pool_hdl_uuid); + uuid_copy(pool_tls->mpt_coh_uuid, co_hdl_uuid); + pool_tls->mpt_version = version; + pool_tls->mpt_generation = generation; pool_tls->mpt_rec_count = 0; pool_tls->mpt_obj_count = 0; pool_tls->mpt_size = 0; pool_tls->mpt_root_hdl = DAOS_HDL_INVAL; - pool_tls->mpt_max_eph = arg->max_eph; - pool_tls->mpt_new_layout_ver = arg->new_layout_ver; - pool_tls->mpt_opc = arg->opc; - if (dss_get_module_info()->dmi_xs_id == 0) { - int i; - - pool_tls->mpt_inflight_max_size = MIGRATE_MAX_SIZE; - pool_tls->mpt_inflight_max_ult = arg->max_ult_cnt; - D_ALLOC_ARRAY(pool_tls->mpt_obj_ult_cnts, dss_tgt_nr); - D_ALLOC_ARRAY(pool_tls->mpt_dkey_ult_cnts, dss_tgt_nr); - if (pool_tls->mpt_obj_ult_cnts == NULL || pool_tls->mpt_dkey_ult_cnts == NULL) - D_GOTO(out, rc = -DER_NOMEM); - for (i = 0; i < dss_tgt_nr; i++) { - atomic_init(&pool_tls->mpt_obj_ult_cnts[i], 0); - atomic_init(&pool_tls->mpt_dkey_ult_cnts[i], 0); - } - } else { - tgt_id = dss_get_module_info()->dmi_tgt_id; - - pool_tls->mpt_pool = ds_pool_child_lookup(arg->pool_uuid); - if (pool_tls->mpt_pool == NULL) - D_GOTO(out, rc = -DER_NO_HDL); - pool_tls->mpt_inflight_max_size = MIGRATE_MAX_SIZE / dss_tgt_nr; - pool_tls->mpt_inflight_max_ult = arg->max_ult_cnt / dss_tgt_nr; - pool_tls->mpt_tgt_obj_ult_cnt = &arg->obj_ult_cnts[tgt_id]; - pool_tls->mpt_tgt_dkey_ult_cnt = &arg->dkey_ult_cnts[tgt_id]; - - if (pool_child->spc_pool->sp_incr_reint && arg->opc == RB_OP_REBUILD && - arg->tgt_status[tgt_id] == PO_COMP_ST_UP && - arg->tgt_in_ver[tgt_id] <= pool_tls->mpt_version) - pool_tls->mpt_reintegrating = 1; - D_DEBUG(DB_REBUILD, DF_RB" tgt %d status %u in version %u, mpt_reintegrating %d\n", - DP_RB_MPT(pool_tls), tgt_id, arg->tgt_status[tgt_id], - arg->tgt_in_ver[tgt_id], pool_tls->mpt_reintegrating); - } + pool_tls->mpt_max_eph = max_eph; + pool_tls->mpt_new_layout_ver = new_layout_ver; + pool_tls->mpt_opc = opc; + pool_tls->mpt_pool = ds_pool_child_lookup(pool_uuid); + if (pool_tls->mpt_pool == NULL) + D_GOTO(out, rc = -DER_NO_HDL); + pool_tls->mpt_inflight_max_size = MIGRATE_MAX_SIZE / dss_tgt_nr; + pool_tls->mpt_inflight_max_ult = max_migrate_ult / dss_tgt_nr; + pool_tls->mpt_tgt_obj_ult_cnt = 0; + pool_tls->mpt_tgt_dkey_ult_cnt = 0; + + if (pool_child->spc_pool->sp_incr_reint && opc == RB_OP_REBUILD && + tgt_status == PO_COMP_ST_UP && tgt_in_ver <= pool_tls->mpt_version) + pool_tls->mpt_reintegrating = 1; + D_DEBUG(DB_REBUILD, DF_RB " tgt %d status %u in version %u, mpt_reintegrating %d\n", + DP_RB_MPT(pool_tls), tgt_id, tgt_status, tgt_in_ver, pool_tls->mpt_reintegrating); pool_tls->mpt_inflight_size = 0; pool_tls->mpt_refcount = 1; - if (arg->svc_list) { - rc = daos_rank_list_copy(&pool_tls->mpt_svc_list, arg->svc_list); + if (svc_list) { + rc = daos_rank_list_copy(&pool_tls->mpt_svc_list, svc_list); if (rc) D_GOTO(out, rc); } D_DEBUG(DB_REBUILD, DF_RB ": TLS %p create for hdls " DF_UUID "/" DF_UUID " " DF_RC "\n", - DP_RB_MPT(pool_tls), pool_tls, DP_UUID(arg->pool_hdl_uuid), - DP_UUID(arg->co_hdl_uuid), DP_RC(rc)); - d_list_add(&pool_tls->mpt_list, &tls->ot_pool_list); + DP_RB_MPT(pool_tls), pool_tls, DP_UUID(pool_hdl_uuid), DP_UUID(co_hdl_uuid), + DP_RC(rc)); + d_list_add(&pool_tls->mpt_list, &obj_tls->ot_pool_list); + migrate_pool_tls_get(pool_tls); out: if (rc && pool_tls) migrate_pool_tls_destroy(pool_tls); @@ -589,143 +539,14 @@ migrate_pool_tls_create_one(void *data) if (pool_child != NULL) ds_pool_child_put(pool_child); - return rc; -} - -static int -migrate_pool_tls_lookup_create(struct ds_pool *pool, unsigned int version, unsigned int generation, - uuid_t pool_hdl_uuid, uuid_t co_hdl_uuid, uint64_t max_eph, - uint32_t new_layout_ver, uint32_t opc, struct migrate_pool_tls **p_tls) -{ - struct migrate_pool_tls *tls = NULL; - struct migrate_pool_tls_create_arg arg = { 0 }; - daos_prop_t *prop = NULL; - struct daos_prop_entry *entry; - struct pool_target *tgts; - uint32_t max_migrate_ult = MIGRATE_DEFAULT_MAX_ULT; - d_rank_t rank; - int i, rc = 0; - - D_ASSERT(dss_get_module_info()->dmi_xs_id == 0); - tls = migrate_pool_tls_lookup(pool->sp_uuid, version, generation); - if (tls) { - if (tls->mpt_init_tls) { - ABT_mutex_lock(tls->mpt_init_mutex); - ABT_cond_wait(tls->mpt_init_cond, tls->mpt_init_mutex); - ABT_mutex_unlock(tls->mpt_init_mutex); - if (tls->mpt_init_err) { - migrate_pool_tls_put(tls); - rc = tls->mpt_init_err; - } - } - - if (rc == 0) - *p_tls = tls; - - return rc; - } - - d_getenv_uint(ENV_MIGRATE_ULT_CNT, &max_migrate_ult); - D_ASSERT(generation != (unsigned int)(-1)); - uuid_copy(arg.pool_uuid, pool->sp_uuid); - uuid_copy(arg.pool_hdl_uuid, pool_hdl_uuid); - uuid_copy(arg.co_hdl_uuid, co_hdl_uuid); - arg.version = version; - arg.opc = opc; - arg.max_eph = max_eph; - arg.new_layout_ver = new_layout_ver; - arg.generation = generation; - arg.max_ult_cnt = max_migrate_ult; - - /* - * dss_task_collective does not do collective on sys xstrem, - * sys xstream need some information to track rebuild status. - */ - rc = migrate_pool_tls_create_one(&arg); - if (rc) - D_GOTO(out, rc); - - tls = migrate_pool_tls_lookup(pool->sp_uuid, version, generation); - D_ASSERT(tls != NULL); - pool->sp_rebuilding++; - - rc = ABT_cond_create(&tls->mpt_init_cond); - if (rc != ABT_SUCCESS) - D_GOTO(out, rc = dss_abterr2der(rc)); - - rc = ABT_mutex_create(&tls->mpt_init_mutex); - if (rc != ABT_SUCCESS) - D_GOTO(out, rc = dss_abterr2der(rc)); - - tls->mpt_init_tls = 1; - D_ALLOC_PTR(prop); - if (prop == NULL) - D_GOTO(out, rc = -DER_NOMEM); - - if (likely(dss_tgt_nr <= MPT_CREATE_TGT_INLINE)) { - arg.tgt_status = arg.tgt_status_inline; - arg.tgt_in_ver = arg.tgt_in_ver_inline; - } else { - D_ALLOC_ARRAY(arg.tgt_status, dss_tgt_nr); - if (arg.tgt_status == NULL) - D_GOTO(out, rc = -DER_NOMEM); - D_ALLOC_ARRAY(arg.tgt_in_ver, dss_tgt_nr); - if (arg.tgt_in_ver == NULL) - D_GOTO(out, rc = -DER_NOMEM); - } - - rank = dss_self_rank(); - rc = pool_map_find_target_by_rank_idx(pool->sp_map, rank, -1, &tgts); - D_ASSERT(rc == dss_tgt_nr); - for (i = 0; i < dss_tgt_nr; i++) { - arg.tgt_status[i] = tgts[i].ta_comp.co_status; - arg.tgt_in_ver[i] = tgts[i].ta_comp.co_in_ver; - } - - rc = ds_pool_iv_prop_fetch(pool, prop); - if (rc) - D_GOTO(out, rc); - - entry = daos_prop_entry_get(prop, DAOS_PROP_PO_SVC_LIST); - D_ASSERT(entry != NULL); - arg.svc_list = (d_rank_list_t *)entry->dpe_val_ptr; - arg.obj_ult_cnts = tls->mpt_obj_ult_cnts; - arg.dkey_ult_cnts = tls->mpt_dkey_ult_cnts; - rc = ds_pool_task_collective(pool->sp_uuid, - PO_COMP_ST_NEW | PO_COMP_ST_DOWN | PO_COMP_ST_DOWNOUT, - migrate_pool_tls_create_one, &arg, 0); + D_DEBUG(DB_TRACE, "create tls " DF_UUID ": " DF_RC "\n", DP_UUID(pool_uuid), DP_RC(rc)); if (rc != 0) { - DL_ERROR(rc, DF_RB ": failed to create migrate tls on tgt xstreams", - DP_RB_MPT(tls)); - D_GOTO(out, rc); - } - -out: - if (tls != NULL && tls->mpt_init_tls) { - tls->mpt_init_tls = 0; - /* Set init failed, so the waiting lookup(above) can be notified */ - if (rc != 0) - tls->mpt_init_err = rc; - ABT_mutex_lock(tls->mpt_init_mutex); - ABT_cond_broadcast(tls->mpt_init_cond); - ABT_mutex_unlock(tls->mpt_init_mutex); - } - D_DEBUG(DB_TRACE, "create tls " DF_UUID ": " DF_RC "\n", DP_UUID(pool->sp_uuid), DP_RC(rc)); - - if (rc != 0) { - if (tls != NULL) - migrate_pool_tls_put(tls); + if (pool_tls != NULL) + migrate_pool_tls_put(pool_tls); } else { - *p_tls = tls; + *p_tls = pool_tls; } - if (prop != NULL) - daos_prop_free(prop); - if (arg.tgt_status != NULL && arg.tgt_status != arg.tgt_status_inline) - D_FREE(arg.tgt_status); - if (arg.tgt_in_ver != NULL && arg.tgt_in_ver != arg.tgt_in_ver_inline) - D_FREE(arg.tgt_in_ver); - return rc; } @@ -1948,83 +1769,56 @@ enum { DKEY_ULT = 2, }; -/* Check if there are enough resource for the migration to proceed. */ -static int -migrate_system_enter(struct migrate_pool_tls *tls, int tgt_idx, bool *yielded) +static inline uint32_t +migrate_tgt_ult_cnt(struct migrate_pool_tls *tls, int ult_type) { - uint32_t tgt_cnt = 0; - int rc = 0; - - D_ASSERT(dss_get_module_info()->dmi_xs_id == 0); - D_ASSERTF(tgt_idx < dss_tgt_nr, "tgt idx %d tgt nr %u\n", tgt_idx, dss_tgt_nr); - - tgt_cnt = atomic_load(&tls->mpt_obj_ult_cnts[tgt_idx]) + - atomic_load(&tls->mpt_dkey_ult_cnts[tgt_idx]); - - while ((tls->mpt_inflight_max_ult / dss_tgt_nr) <= tgt_cnt) { - D_DEBUG(DB_REBUILD, DF_RB ": tgt%d:%u max %u\n", DP_RB_MPT(tls), tgt_idx, tgt_cnt, - tls->mpt_inflight_max_ult / dss_tgt_nr); - *yielded = true; - dss_sleep(0); - if (tls->mpt_fini) - D_GOTO(out, rc = -DER_SHUTDOWN); - - tgt_cnt = atomic_load(&tls->mpt_obj_ult_cnts[tgt_idx]) + - atomic_load(&tls->mpt_dkey_ult_cnts[tgt_idx]); - } - - atomic_fetch_add(&tls->mpt_obj_ult_cnts[tgt_idx], 1); -out: - return rc; + if (ult_type == OBJ_ULT) + return tls->mpt_tgt_obj_ult_cnt; + else + return tls->mpt_tgt_dkey_ult_cnt; } static int -migrate_tgt_enter(struct migrate_pool_tls *tls) +migrate_tgt_enter(struct migrate_pool_tls *tls, int ult_type, bool *yielded) { - uint32_t dkey_cnt = 0; + uint32_t ult_cnt = 0; int rc = 0; D_ASSERT(dss_get_module_info()->dmi_xs_id != 0); - dkey_cnt = atomic_load(tls->mpt_tgt_dkey_ult_cnt); - while (tls->mpt_inflight_max_ult / 2 <= dkey_cnt) { - D_DEBUG(DB_REBUILD, DF_RB ": tgt %u max %u\n", DP_RB_MPT(tls), dkey_cnt, - tls->mpt_inflight_max_ult); + ult_cnt = migrate_tgt_ult_cnt(tls, ult_type); + while (tls->mpt_inflight_max_ult / 2 <= ult_cnt) { + D_DEBUG(DB_REBUILD, "tgt %u max %u\n", ult_cnt, tls->mpt_inflight_max_ult); + if (yielded) + *yielded = true; ABT_mutex_lock(tls->mpt_inflight_mutex); ABT_cond_wait(tls->mpt_inflight_cond, tls->mpt_inflight_mutex); ABT_mutex_unlock(tls->mpt_inflight_mutex); if (tls->mpt_fini) D_GOTO(out, rc = -DER_SHUTDOWN); - dkey_cnt = atomic_load(tls->mpt_tgt_dkey_ult_cnt); + ult_cnt = migrate_tgt_ult_cnt(tls, ult_type); } - atomic_fetch_add(tls->mpt_tgt_dkey_ult_cnt, 1); + if (ult_type == OBJ_ULT) + tls->mpt_tgt_obj_ult_cnt++; + else + tls->mpt_tgt_dkey_ult_cnt++; out: return rc; } static void -migrate_system_exit(struct migrate_pool_tls *tls, unsigned int tgt_idx) +migrate_tgt_try_wakeup(struct migrate_pool_tls *tls, int ult_type) { - /* NB: this will only be called during errr handling. In normal case - * the migrate ULT created by system will be exit on each target XS. - */ - D_ASSERT(dss_get_module_info()->dmi_xs_id == 0); - atomic_fetch_sub(&tls->mpt_obj_ult_cnts[tgt_idx], 1); -} - -static void -migrate_tgt_try_wakeup(struct migrate_pool_tls *tls) -{ - uint32_t dkey_cnt = 0; + uint32_t ult_cnt = 0; + ult_cnt = migrate_tgt_ult_cnt(tls, ult_type); D_ASSERT(dss_get_module_info()->dmi_xs_id != 0); - dkey_cnt = atomic_load(tls->mpt_tgt_dkey_ult_cnt); - if (tls->mpt_inflight_max_ult / 2 > dkey_cnt) { + if (tls->mpt_inflight_max_ult / 2 > ult_cnt) { ABT_mutex_lock(tls->mpt_inflight_mutex); - ABT_cond_broadcast(tls->mpt_inflight_cond); + ABT_cond_signal(tls->mpt_inflight_cond); ABT_mutex_unlock(tls->mpt_inflight_mutex); } } @@ -2034,12 +1828,13 @@ migrate_tgt_exit(struct migrate_pool_tls *tls, int ult_type) { D_ASSERT(dss_get_module_info()->dmi_xs_id != 0); if (ult_type == OBJ_ULT) { - atomic_fetch_sub(tls->mpt_tgt_obj_ult_cnt, 1); - return; + D_ASSERT(tls->mpt_tgt_obj_ult_cnt > 0); + tls->mpt_tgt_obj_ult_cnt--; + } else { + D_ASSERT(tls->mpt_tgt_dkey_ult_cnt > 0); + tls->mpt_tgt_dkey_ult_cnt--; } - - atomic_fetch_sub(tls->mpt_tgt_dkey_ult_cnt, 1); - migrate_tgt_try_wakeup(tls); + migrate_tgt_try_wakeup(tls, ult_type); } static void @@ -2883,7 +2678,7 @@ migrate_start_ult(struct enum_unpack_arg *unpack_arg) continue; } - rc = migrate_tgt_enter(tls); + rc = migrate_tgt_enter(tls, DKEY_ULT, NULL); if (rc) break; d_list_del_init(&mrone->mo_list); @@ -3183,9 +2978,11 @@ migrate_one_epoch_object(daos_epoch_range_t *epr, struct migrate_pool_tls *tls, } struct migrate_stop_arg { - uuid_t pool_uuid; + uuid_t pool_uuid; unsigned int version; unsigned int generation; + unsigned int stop_count; + ABT_mutex stop_lock; }; static int @@ -3202,6 +2999,10 @@ migrate_fini_one_ult(void *data) D_ASSERT(dss_get_module_info()->dmi_xs_id != 0); tls->mpt_fini = 1; + ABT_mutex_lock(arg->stop_lock); + arg->stop_count++; + ABT_mutex_unlock(arg->stop_lock); + ABT_mutex_lock(tls->mpt_inflight_mutex); ABT_cond_broadcast(tls->mpt_inflight_cond); ABT_mutex_unlock(tls->mpt_inflight_mutex); @@ -3226,44 +3027,27 @@ migrate_fini_one_ult(void *data) void ds_migrate_stop(struct ds_pool *pool, unsigned int version, unsigned int generation) { - struct migrate_pool_tls *tls; struct migrate_stop_arg arg; int rc; - D_ASSERT(dss_get_module_info()->dmi_xs_id == 0); - tls = migrate_pool_tls_lookup(pool->sp_uuid, version, generation); - if (tls == NULL || tls->mpt_fini) { - if (tls != NULL) - migrate_pool_tls_put(tls); - D_INFO(DF_UUID" migrate stopped\n", DP_UUID(pool->sp_uuid)); - return; - } - - tls->mpt_fini = 1; uuid_copy(arg.pool_uuid, pool->sp_uuid); arg.version = version; arg.generation = generation; + arg.stop_count = 0; + rc = ABT_mutex_create(&arg.stop_lock); + if (rc != ABT_SUCCESS) { + D_ERROR(DF_UUID " migrate stop: %d\n", DP_UUID(pool->sp_uuid), rc); + return; + } rc = ds_pool_thread_collective(pool->sp_uuid, 0, migrate_fini_one_ult, &arg, 0); if (rc) D_ERROR(DF_UUID" migrate stop: %d\n", DP_UUID(pool->sp_uuid), rc); - migrate_pool_tls_put(tls); - /* Wait for xstream 0 migrate ULT(migrate_ult) stop */ - if (tls->mpt_ult_running) { - ABT_mutex_lock(tls->mpt_inflight_mutex); - ABT_cond_broadcast(tls->mpt_inflight_cond); - ABT_mutex_unlock(tls->mpt_inflight_mutex); - rc = ABT_eventual_wait(tls->mpt_done_eventual, NULL); - if (rc != ABT_SUCCESS) { - rc = dss_abterr2der(rc); - D_WARN("failed to migrate wait "DF_UUID": "DF_RC"\n", - DP_UUID(pool->sp_uuid), DP_RC(rc)); - } - } + D_ASSERT(pool->sp_rebuilding >= arg.stop_count); + pool->sp_rebuilding -= arg.stop_count; + ABT_mutex_free(&arg.stop_lock); - migrate_pool_tls_put(tls); - pool->sp_rebuilding--; D_INFO(DF_UUID" migrate stopped\n", DP_UUID(pool->sp_uuid)); } @@ -3416,11 +3200,11 @@ migrate_obj_ult(void *data) if (tls->mpt_status == 0 && rc < 0) tls->mpt_status = rc; - D_DEBUG( - DB_REBUILD, - DF_RB ": stop migrate obj " DF_UOID "for shard %u ult %u/%u " DF_U64 " : " DF_RC "\n", - DP_RB_MPT(tls), DP_UOID(arg->oid), arg->shard, atomic_load(tls->mpt_tgt_obj_ult_cnt), - atomic_load(tls->mpt_tgt_dkey_ult_cnt), tls->mpt_obj_count, DP_RC(rc)); + D_DEBUG(DB_REBUILD, + DF_RB ": stop migrate obj " DF_UOID "for shard %u ult %u/%u " DF_U64 " : " DF_RC + "\n", + DP_RB_MPT(tls), DP_UOID(arg->oid), arg->shard, tls->mpt_tgt_obj_ult_cnt, + tls->mpt_tgt_dkey_ult_cnt, tls->mpt_obj_count, DP_RC(rc)); free_notls: if (tls != NULL) migrate_tgt_exit(tls, OBJ_ULT); @@ -3476,7 +3260,6 @@ migrate_one_object(daos_unit_oid_t oid, daos_epoch_t eph, daos_epoch_t punched_e sizeof(*obj_arg->snaps) * cont_arg->snap_cnt); } - /* Let's iterate the object on different xstream */ rc = dss_ult_create(migrate_obj_ult, obj_arg, DSS_XS_VOS, tgt_idx, MIGRATE_STACK_SIZE, NULL); if (rc) @@ -3490,8 +3273,7 @@ migrate_one_object(daos_unit_oid_t oid, daos_epoch_t eph, daos_epoch_t punched_e rc = obj_tree_insert(toh, cont_arg->cont_uuid, -1, oid, &val_iov); D_DEBUG(DB_REBUILD, DF_RB ": insert " DF_UUID "/" DF_UOID ": ult %u/%u " DF_RC "\n", DP_RB_MPT(tls), DP_UUID(cont_arg->cont_uuid), DP_UOID(oid), - atomic_load(&tls->mpt_obj_ult_cnts[tgt_idx]), - atomic_load(&tls->mpt_dkey_ult_cnts[tgt_idx]), DP_RC(rc)); + tls->mpt_tgt_obj_ult_cnt, tls->mpt_tgt_dkey_ult_cnt, DP_RC(rc)); return 0; free: @@ -3523,8 +3305,8 @@ migrate_obj_iter_cb(daos_handle_t ih, d_iov_t *key_iov, d_iov_t *val_iov, void * DF_RB ": obj migrate " DF_UUID "/" DF_UOID " %" PRIx64 " eph " DF_U64 " start\n", DP_RB_MPT(arg->pool_tls), DP_UUID(arg->cont_uuid), DP_UOID(*oid), ih.cookie, epoch); - rc = migrate_system_enter(arg->pool_tls, tgt_idx, &yielded); - if (rc != 0) { + rc = migrate_tgt_enter(arg->pool_tls, OBJ_ULT, &yielded); + if (rc) { DL_ERROR(rc, DF_RB ": " DF_UUID " enter migrate failed.", DP_RB_MPT(arg->pool_tls), DP_UUID(arg->cont_uuid)); return rc; @@ -3534,11 +3316,11 @@ migrate_obj_iter_cb(daos_handle_t ih, d_iov_t *key_iov, d_iov_t *val_iov, void * if (rc != 0) { DL_ERROR(rc, DF_RB ": obj " DF_UOID " migration failed", DP_RB_MPT(arg->pool_tls), DP_UOID(*oid)); - migrate_system_exit(arg->pool_tls, tgt_idx); + migrate_tgt_exit(arg->pool_tls, OBJ_ULT); return rc; } - /* migrate_system_enter possibly yielded the ULT, let's re-probe before delete */ + /* migrate_tgt_enter possibly yielded the ULT, let's re-probe before delete */ if (yielded) { d_iov_set(&tmp_iov, oid, sizeof(*oid)); rc = dbtree_iter_probe(ih, BTR_PROBE_EQ, DAOS_INTENT_MIGRATION, &tmp_iov, NULL); @@ -3572,6 +3354,59 @@ migrate_obj_iter_cb(daos_handle_t ih, d_iov_t *key_iov, d_iov_t *val_iov, void * return rc; } +struct cont_fetch_arg { + uuid_t pool_uuid; + uuid_t cont_uuid; + uint64_t *snapshots; + int snap_cnt; + struct ds_pool *pool; +}; + +static int +cont_fetch_start_ult(void *arg) +{ + int rc; + struct cont_fetch_arg *fetch_arg = (struct cont_fetch_arg *)arg; + + rc = ds_pool_lookup(fetch_arg->pool_uuid, &fetch_arg->pool); + if (rc) { + D_ERROR(DF_UUID " ds_pool_lookup failed: " DF_RC "\n", + DP_UUID(fetch_arg->pool_uuid), DP_RC(rc)); + return rc; + } + + rc = ds_cont_fetch_snaps(fetch_arg->pool->sp_iv_ns, fetch_arg->cont_uuid, + &fetch_arg->snapshots, &fetch_arg->snap_cnt); + if (rc) { + D_ERROR("ds_cont_fetch_snaps failed: " DF_RC "\n", DP_RC(rc)); + return rc; + } + + rc = ds_cont_fetch_ec_agg_boundary(fetch_arg->pool->sp_iv_ns, fetch_arg->cont_uuid); + if (rc) { + /* Sometime it may too early to fetch the EC boundary, + * since EC boundary does not start yet, which is forbidden + * during rebuild anyway, so let's continue. + */ + D_DEBUG(DB_REBUILD, DF_UUID " fetch agg_boundary failed: " DF_RC "\n", + DP_UUID(fetch_arg->cont_uuid), DP_RC(rc)); + } + + return rc; +} + +static int +cont_fetch_end_ult(void *arg) +{ + struct cont_fetch_arg *fetch_arg = (struct cont_fetch_arg *)arg; + + if (fetch_arg->pool) + ds_pool_put(fetch_arg->pool); + + D_FREE(fetch_arg->snapshots); + return 0; +} + /* This iterates the migration database "container", which is different than the * similarly identified by container UUID as the actual container in VOS. * However, this container only contains object IDs that were specified to be @@ -3581,48 +3416,32 @@ static int migrate_cont_iter_cb(daos_handle_t ih, d_iov_t *key_iov, d_iov_t *val_iov, void *data) { - struct ds_pool *dp; struct iter_cont_arg arg = { 0 }; struct tree_cache_root *root = val_iov->iov_buf; - struct migrate_pool_tls *tls = data; - uint64_t *snapshots = NULL; - uuid_t cont_uuid; - int snap_cnt; + struct migrate_pool_tls *tls = data; + uuid_t cont_uuid; d_iov_t tmp_iov; int rc; + struct cont_fetch_arg fetch_arg = {0}; uuid_copy(cont_uuid, *(uuid_t *)key_iov->iov_buf); D_DEBUG(DB_REBUILD, DF_RB ": iter cont " DF_UUID "/%" PRIx64 " %" PRIx64 " start\n", DP_RB_MPT(tls), DP_UUID(cont_uuid), ih.cookie, root->tcr_root_hdl.cookie); - rc = ds_pool_lookup(tls->mpt_pool_uuid, &dp); + uuid_copy(fetch_arg.cont_uuid, cont_uuid); + uuid_copy(fetch_arg.pool_uuid, tls->mpt_pool_uuid); + rc = dss_ult_execute(cont_fetch_start_ult, &fetch_arg, NULL, NULL, DSS_XS_SYS, 0, 0); if (rc) { DL_ERROR(rc, DF_RB ": ds_pool_lookup failed", DP_RB_MPT(tls)); - rc = 0; - D_GOTO(out_put, rc); - } - - rc = ds_cont_fetch_snaps(dp->sp_iv_ns, cont_uuid, &snapshots, - &snap_cnt); - if (rc) { - DL_ERROR(rc, DF_RB ": ds_cont_fetch_snaps failed", DP_RB_MPT(tls)); - D_GOTO(out_put, rc); - } - - rc = ds_cont_fetch_ec_agg_boundary(dp->sp_iv_ns, cont_uuid); - if (rc) { - /* Sometime it may too early to fetch the EC boundary, - * since EC boundary does not start yet, which is forbidden - * during rebuild anyway, so let's continue. - */ - D_DEBUG(DB_REBUILD, DF_RB ": " DF_UUID " fetch agg_boundary failed: " DF_RC "\n", - DP_RB_MPT(tls), DP_UUID(cont_uuid), DP_RC(rc)); + if (rc == -DER_SHUTDOWN) + rc = 0; + D_GOTO(free, rc); } arg.yield_freq = DEFAULT_YIELD_FREQ; arg.cont_root = root; - arg.snaps = snapshots; - arg.snap_cnt = snap_cnt; + arg.snaps = fetch_arg.snapshots; + arg.snap_cnt = fetch_arg.snap_cnt; arg.pool_tls = tls; uuid_copy(arg.cont_uuid, cont_uuid); while (!dbtree_is_empty(root->tcr_root_hdl)) { @@ -3670,14 +3489,10 @@ migrate_cont_iter_cb(daos_handle_t ih, d_iov_t *key_iov, D_GOTO(free, rc); } free: - if (snapshots) - D_FREE(snapshots); - -out_put: + D_ASSERT(dss_ult_execute(cont_fetch_end_ult, &fetch_arg, NULL, NULL, DSS_XS_SYS, 0, 0) == + 0); if (tls->mpt_status == 0 && rc < 0) tls->mpt_status = rc; - if (dp != NULL) - ds_pool_put(dp); return rc; } @@ -3799,26 +3614,131 @@ migrate_try_obj_insert(struct migrate_pool_tls *tls, uuid_t co_uuid, return rc; } +struct ds_pool_migrate_arg { + uuid_t pool_uuid; + struct ds_pool *pool; + uint32_t rebuild_ver; + uint32_t generation; + daos_prop_t *prop; + int tgt_id; + uint8_t tgt_status; + uint32_t tgt_in_ver; + int rebuilding_count; + bool no_iv; +}; + +static int +ds_migrate_end_ult(void *arg) +{ + struct ds_pool_migrate_arg *pool_arg = (struct ds_pool_migrate_arg *)arg; + + if (pool_arg->pool) { + pool_arg->pool->sp_rebuilding += pool_arg->rebuilding_count; + ds_pool_put(pool_arg->pool); + } + if (pool_arg->prop) + daos_prop_free(pool_arg->prop); + return 0; +} + +static int +ds_migrate_prepare_ult(void *arg) +{ + int rc; + uint32_t rebuild_ver; + struct ds_pool_migrate_arg *pool_arg = (struct ds_pool_migrate_arg *)arg; + struct pool_target *tgts; + + rc = ds_pool_lookup(pool_arg->pool_uuid, &pool_arg->pool); + if (rc != 0) { + if (rc == -DER_SHUTDOWN) { + D_DEBUG(DB_REBUILD, DF_UUID " pool service is stopping.\n", + DP_UUID(pool_arg->pool_uuid)); + rc = 0; + } else { + D_DEBUG(DB_REBUILD, DF_UUID " pool service is not started yet. " DF_RC "\n", + DP_UUID(pool_arg->pool_uuid), DP_RC(rc)); + rc = -DER_AGAIN; + } + return rc; + } + + ds_rebuild_running_query(pool_arg->pool_uuid, -1, &rebuild_ver, NULL, NULL); + if (rebuild_ver == 0 || rebuild_ver != pool_arg->rebuild_ver) { + rc = -DER_SHUTDOWN; + D_GOTO(out, rc); + } + + if (pool_arg->no_iv) + D_GOTO(out, rc = 0); + + D_ALLOC_PTR(pool_arg->prop); + if (pool_arg->prop == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + rc = ds_pool_iv_prop_fetch(pool_arg->pool, pool_arg->prop); + if (rc) + D_GOTO(out, rc); + + pool_arg->pool->sp_rebuilding++; + rc = pool_map_find_target_by_rank_idx(pool_arg->pool->sp_map, dss_self_rank(), -1, &tgts); + D_ASSERT(rc == dss_tgt_nr); + pool_arg->tgt_status = tgts[pool_arg->tgt_id].ta_comp.co_status; + pool_arg->tgt_in_ver = tgts[pool_arg->tgt_id].ta_comp.co_in_ver; + rc = 0; +out: + return rc; +} + int -ds_migrate_object(struct ds_pool *pool, uuid_t po_hdl, uuid_t co_hdl, uuid_t co_uuid, - uint32_t version, unsigned int generation, uint64_t max_eph, uint32_t opc, - daos_unit_oid_t *oids, daos_epoch_t *epochs, daos_epoch_t *punched_epochs, - unsigned int *shards, uint32_t count, unsigned int tgt_idx, - uint32_t new_layout_ver) +ds_migrate_object(uuid_t pool_uuid, uuid_t po_hdl, uuid_t co_hdl, uuid_t co_uuid, uint32_t version, + unsigned int generation, uint64_t max_eph, uint32_t opc, daos_unit_oid_t *oids, + daos_epoch_t *epochs, daos_epoch_t *punched_epochs, unsigned int *shards, + uint32_t count, unsigned int tgt_idx, uint32_t new_layout_ver) { - struct migrate_pool_tls *tls = NULL; - int i; - int rc; + struct migrate_pool_tls *tls = NULL; + int i; + int rc; + d_rank_list_t *svc_list = NULL; + struct daos_prop_entry *entry; + struct ds_pool_migrate_arg arg = {0}; + uint32_t tgt_id = dss_get_module_info()->dmi_tgt_id; + + tls = migrate_pool_tls_lookup(pool_uuid, version, generation); + if (tls) + arg.no_iv = true; - /* Check if the pool tls exists */ - rc = migrate_pool_tls_lookup_create(pool, version, generation, po_hdl, co_hdl, max_eph, - new_layout_ver, opc, &tls); + uuid_copy(arg.pool_uuid, pool_uuid); + arg.rebuild_ver = version; + arg.tgt_id = tgt_id; + arg.generation = generation; + rc = dss_ult_execute(ds_migrate_prepare_ult, &arg, NULL, NULL, DSS_XS_SYS, 0, 0); + if (rc || arg.pool == NULL) + D_GOTO(out, rc); + + if (tls) + goto skip_create; + + entry = daos_prop_entry_get(arg.prop, DAOS_PROP_PO_SVC_LIST); + D_ASSERT(entry != NULL); + svc_list = (d_rank_list_t *)entry->dpe_val_ptr; + + /* prepare might yield */ + tls = migrate_pool_tls_lookup(pool_uuid, version, generation); + if (tls) { + arg.rebuilding_count = -1; + goto skip_create; + } + + rc = migrate_pool_tls_create(pool_uuid, version, generation, po_hdl, co_hdl, max_eph, + new_layout_ver, opc, &tls, svc_list, arg.tgt_status, + arg.tgt_in_ver); if (rc != 0) D_GOTO(out, rc); +skip_create: if (tls->mpt_fini) D_GOTO(out, rc = -DER_SHUTDOWN); - /* NB: only create this tree on xstream 0 */ rc = migrate_try_create_object_tree(tls); if (rc) D_GOTO(out, rc); @@ -3859,6 +3779,9 @@ ds_migrate_object(struct ds_pool *pool, uuid_t po_hdl, uuid_t co_hdl, uuid_t co_ out: if (tls) migrate_pool_tls_put(tls); + if (arg.pool) + D_ASSERT(dss_ult_execute(ds_migrate_end_ult, &arg, NULL, NULL, DSS_XS_SYS, 0, 0) == + 0); return rc; } @@ -3880,9 +3803,7 @@ ds_obj_migrate_handler(crt_rpc_t *rpc) uuid_t po_uuid; uuid_t po_hdl_uuid; uuid_t co_uuid; - uuid_t co_hdl_uuid; - struct ds_pool *pool = NULL; - uint32_t rebuild_ver; + uuid_t co_hdl_uuid; int rc; migrate_in = crt_req_get(rpc); @@ -3911,35 +3832,11 @@ ds_obj_migrate_handler(crt_rpc_t *rpc) uuid_copy(po_uuid, migrate_in->om_pool_uuid); uuid_copy(po_hdl_uuid, migrate_in->om_poh_uuid); - rc = ds_pool_lookup(po_uuid, &pool); - if (rc != 0) { - if (rc == -DER_SHUTDOWN) { - D_DEBUG(DB_REBUILD, DF_RB " pool service is stopping.\n", - DP_RB_OMI(migrate_in)); - rc = 0; - } else { - D_DEBUG(DB_REBUILD, DF_RB " pool service is not started yet. " DF_RC "\n", - DP_RB_OMI(migrate_in), DP_RC(rc)); - rc = -DER_AGAIN; - } - D_GOTO(out, rc); - } - - ds_rebuild_running_query(migrate_in->om_pool_uuid, -1, &rebuild_ver, NULL, NULL); - if (rebuild_ver == 0 || rebuild_ver != migrate_in->om_version) { - rc = -DER_SHUTDOWN; - DL_ERROR(rc, DF_RB " rebuild ver %u", DP_RB_OMI(migrate_in), rebuild_ver); - D_GOTO(out, rc); - } - - rc = ds_migrate_object(pool, po_hdl_uuid, co_hdl_uuid, co_uuid, migrate_in->om_version, + rc = ds_migrate_object(po_uuid, po_hdl_uuid, co_hdl_uuid, co_uuid, migrate_in->om_version, migrate_in->om_generation, migrate_in->om_max_eph, migrate_in->om_opc, oids, ephs, punched_ephs, shards, oids_count, migrate_in->om_tgt_idx, migrate_in->om_new_layout_ver); out: - if (pool) - ds_pool_put(pool); - migrate_out = crt_reply_get(rpc); migrate_out->om_status = rc; dss_rpc_reply(rpc, DAOS_REBUILD_DROP_OBJ); @@ -4185,12 +4082,12 @@ reint_post_process_ult(void *data) struct migrate_query_arg { uuid_t pool_uuid; - ABT_mutex status_lock; - struct btr_root *mpt_migrated_root; + ABT_mutex status_lock; struct ds_migrate_status dms; uint32_t version; uint32_t total_ult_cnt; uint32_t generation; + uint32_t ult_running; daos_rebuild_opc_t rebuild_op; uint32_t mpt_reintegrating:1, reint_post_start:1, @@ -4211,8 +4108,7 @@ migrate_check_one(void *data) if (tls == NULL) return 0; - ult_cnt = atomic_load(tls->mpt_tgt_obj_ult_cnt) + atomic_load(tls->mpt_tgt_dkey_ult_cnt); - + ult_cnt = tls->mpt_tgt_obj_ult_cnt + tls->mpt_tgt_dkey_ult_cnt; ABT_mutex_lock(arg->status_lock); arg->dms.dm_rec_count += tls->mpt_rec_count; arg->dms.dm_obj_count += tls->mpt_obj_count; @@ -4220,6 +4116,7 @@ migrate_check_one(void *data) if (arg->dms.dm_status == 0) arg->dms.dm_status = tls->mpt_status; arg->total_ult_cnt += ult_cnt; + arg->ult_running += tls->mpt_ult_running; if (tls->mpt_reintegrating) { arg->mpt_reintegrating = 1; if (arg->reint_post_start) { @@ -4240,9 +4137,8 @@ migrate_check_one(void *data) ABT_mutex_unlock(arg->status_lock); D_DEBUG(DB_REBUILD, DF_RB " status %d/%d/ ult %u/%u rec/obj/size " DF_U64 "/" DF_U64 "/" DF_U64 "\n", - DP_RB_MQA(arg), tls->mpt_status, arg->dms.dm_status, - atomic_load(tls->mpt_tgt_obj_ult_cnt), atomic_load(tls->mpt_tgt_dkey_ult_cnt), - tls->mpt_rec_count, tls->mpt_obj_count, tls->mpt_size); + DP_RB_MQA(arg), tls->mpt_status, arg->dms.dm_status, tls->mpt_tgt_obj_ult_cnt, + tls->mpt_tgt_dkey_ult_cnt, tls->mpt_rec_count, tls->mpt_obj_count, tls->mpt_size); if (reint_post_start && !tls->mpt_post_process_started) { migrate_pool_tls_get(tls); @@ -4252,7 +4148,7 @@ migrate_check_one(void *data) D_GOTO(out, rc = -DER_NOMEM); ult_arg->rpa_tls = tls; - ult_arg->rpa_migrated_root = arg->mpt_migrated_root; + ult_arg->rpa_migrated_root = &tls->mpt_migrated_root; rc = dss_ult_create(reint_post_process_ult, ult_arg, DSS_XS_SELF, 0, MIGRATE_STACK_SIZE, NULL); if (rc) { @@ -4276,19 +4172,13 @@ int ds_migrate_query_status(uuid_t pool_uuid, uint32_t ver, unsigned int generation, int op, bool gl_scan_done, struct ds_migrate_status *dms) { - struct migrate_query_arg arg = { 0 }; - struct migrate_pool_tls *tls; + struct migrate_query_arg arg = {0}; int rc; - tls = migrate_pool_tls_lookup(pool_uuid, ver, generation); - if (tls == NULL) - return 0; - uuid_copy(arg.pool_uuid, pool_uuid); arg.version = ver; arg.generation = generation; arg.rebuild_op = op; - arg.mpt_migrated_root = &tls->mpt_migrated_root; rc = ABT_mutex_create(&arg.status_lock); if (rc != ABT_SUCCESS) D_GOTO(out, rc); @@ -4301,8 +4191,8 @@ ds_migrate_query_status(uuid_t pool_uuid, uint32_t ver, unsigned int generation, /* when globally scan done, and locally pull done, for reintegration need to do some post * processing, cannot report riv_pull_done before the post processing complete. */ - if (gl_scan_done && arg.total_ult_cnt == 0 && !tls->mpt_ult_running && - arg.mpt_reintegrating && !arg.reint_post_processing) { + if (gl_scan_done && arg.total_ult_cnt == 0 && !arg.ult_running && arg.mpt_reintegrating && + !arg.reint_post_processing) { arg.reint_post_start = 1; rc = ds_pool_thread_collective(pool_uuid, PO_COMP_ST_NEW | PO_COMP_ST_DOWN | PO_COMP_ST_DOWNOUT, migrate_check_one, &arg, 0); @@ -4310,8 +4200,7 @@ ds_migrate_query_status(uuid_t pool_uuid, uint32_t ver, unsigned int generation, D_GOTO(out, rc); } - if (!gl_scan_done || arg.total_ult_cnt > 0 || tls->mpt_ult_running || - arg.reint_post_processing) + if (!gl_scan_done || arg.total_ult_cnt > 0 || arg.ult_running || arg.reint_post_processing) arg.dms.dm_migrating = 1; else arg.dms.dm_migrating = 0; @@ -4323,12 +4212,11 @@ ds_migrate_query_status(uuid_t pool_uuid, uint32_t ver, unsigned int generation, DF_RB " migrating=%s, obj_count=" DF_U64 ", rec_count=" DF_U64 ", size=" DF_U64 " ult_cnt %u, mpt_ult_running %d, reint_post_processing %d, status %d\n", DP_RB_MQA(&arg), arg.dms.dm_migrating ? "yes" : "no", arg.dms.dm_obj_count, - arg.dms.dm_rec_count, arg.dms.dm_total_size, arg.total_ult_cnt, - tls->mpt_ult_running, arg.reint_post_processing, arg.dms.dm_status); + arg.dms.dm_rec_count, arg.dms.dm_total_size, arg.total_ult_cnt, arg.ult_running, + arg.reint_post_processing, arg.dms.dm_status); out: ABT_mutex_free(&arg.status_lock); - migrate_pool_tls_put(tls); return rc; } @@ -4402,7 +4290,7 @@ ds_object_migrate_send(struct ds_pool *pool, uuid_t pool_hdl_uuid, uuid_t cont_h tgt_ep.ep_rank = target->ta_comp.co_rank; index = target->ta_comp.co_index; ABT_rwlock_unlock(pool->sp_lock); - tgt_ep.ep_tag = 0; + tgt_ep.ep_tag = daos_rpc_tag(DAOS_REQ_TGT, index); opcode = DAOS_RPC_OPCODE(DAOS_OBJ_RPC_MIGRATE, DAOS_OBJ_MODULE, rpc_ver); rc = crt_req_create(dss_get_module_info()->dmi_ctx, &tgt_ep, opcode, &rpc); diff --git a/src/rebuild/scan.c b/src/rebuild/scan.c index 0272014e025..a13826fd91e 100644 --- a/src/rebuild/scan.c +++ b/src/rebuild/scan.c @@ -587,7 +587,7 @@ rebuild_obj_ult(void *data) struct rebuild_obj_arg *arg = data; struct rebuild_tgt_pool_tracker *rpt = arg->rpt; - ds_migrate_object(rpt->rt_pool, rpt->rt_poh_uuid, rpt->rt_coh_uuid, arg->co_uuid, + ds_migrate_object(rpt->rt_pool_uuid, rpt->rt_poh_uuid, rpt->rt_coh_uuid, arg->co_uuid, rpt->rt_rebuild_ver, rpt->rt_rebuild_gen, rpt->rt_stable_epoch, rpt->rt_rebuild_op, &arg->oid, &arg->epoch, &arg->punched_epoch, &arg->shard, 1, arg->tgt_index, rpt->rt_new_layout_ver); @@ -617,7 +617,7 @@ rebuild_object_local(struct rebuild_tgt_pool_tracker *rpt, uuid_t co_uuid, arg->tgt_index = tgt_index; arg->shard = shard; - rc = dss_ult_create(rebuild_obj_ult, arg, DSS_XS_SYS, 0, 0, NULL); + rc = dss_ult_create(rebuild_obj_ult, arg, DSS_XS_VOS, tgt_index, 0, NULL); if (rc) { D_FREE(arg); rpt_put(rpt); From 19942ab1b183631cec3563171e227419e8f29e6d Mon Sep 17 00:00:00 2001 From: Tomasz Gromadzki Date: Wed, 12 Nov 2025 16:57:21 +0100 Subject: [PATCH 011/253] DAOS-17449 common: Hide the missing SDS warning for sys_db while operating in md-on-ssd mode (#17081) The purpose of this PR is to switch to a stable, clearly defined version of PMDK. The previous version, which was based on the DAOS-specific branch (https://github.com/daos-stack/pmdk/tree/stable-2.1.0-daos), was only intended as a temporary solution due to limitations in the initial implementation of the new RPM building solution based on FPM. Signed-off-by: Tomasz Gromadzki --- utils/build.config | 2 +- utils/rpms/daos.changelog | 3 +++ utils/rpms/daos.spec | 2 +- utils/rpms/package_info.sh | 4 ++-- utils/rpms/pmdk.changelog | 4 ++++ 5 files changed, 11 insertions(+), 4 deletions(-) diff --git a/utils/build.config b/utils/build.config index d565444cf64..7aa44484c0f 100644 --- a/utils/build.config +++ b/utils/build.config @@ -4,7 +4,7 @@ component=daos [commit_versions] argobots=v1.2 fused=v1.0.0 -pmdk=stable-2.1.0-daos +pmdk=2.1.2 isal=v2.31.1 isal_crypto=v2.24.0 spdk=v22.01.2 diff --git a/utils/rpms/daos.changelog b/utils/rpms/daos.changelog index 6384eef6a8a..ce183a90690 100644 --- a/utils/rpms/daos.changelog +++ b/utils/rpms/daos.changelog @@ -1,4 +1,7 @@ %changelog +* Wed Nov 05 2025 Tomasz Gromadzki - 2.7-101-17 +- Update PMDK to release 2.1.2 + * Thu Oct 16 2025 Jeff Olivier 2.7.101-16 - Make daos-spdk conflict with spdk diff --git a/utils/rpms/daos.spec b/utils/rpms/daos.spec index c4a67b5b2cf..3e095b37b1e 100644 --- a/utils/rpms/daos.spec +++ b/utils/rpms/daos.spec @@ -25,7 +25,7 @@ Name: daos Version: 2.7.101 -Release: 16%{?relval}%{?dist} +Release: 17%{?relval}%{?dist} Summary: DAOS Storage Engine License: BSD-2-Clause-Patent diff --git a/utils/rpms/package_info.sh b/utils/rpms/package_info.sh index 54920d77365..60c8caa0e58 100644 --- a/utils/rpms/package_info.sh +++ b/utils/rpms/package_info.sh @@ -43,8 +43,8 @@ export mercury_full="${mercury_version}-${mercury_release}" export argobots_version="1.2" export argobots_release="3${distro_name}" export argobots_full="${argobots_version}-${argobots_release}" -export pmdk_version="2.1.0" -export pmdk_release="7${distro_name}" +export pmdk_version="2.1.2" +export pmdk_release="1${distro_name}" export pmdk_full="${pmdk_version}-${pmdk_release}" export isal_version="2.31.1" export isal_release="7${distro_name}" diff --git a/utils/rpms/pmdk.changelog b/utils/rpms/pmdk.changelog index 464a3120c75..d4cd893efeb 100644 --- a/utils/rpms/pmdk.changelog +++ b/utils/rpms/pmdk.changelog @@ -1,3 +1,7 @@ +%changelog +* Wed Nov 05 2025 Tomasz Gromadzki - 2.1.2-1 +- Update to release 2.1.2 + * Fri Oct 31 2025 Tomasz Gromadzki - 2.1.0-7 - Restore the RPM changelog, which has not been available since version 2.1.0-4. From b62c67e2d2a462ab7c77170733e8ac06251538e6 Mon Sep 17 00:00:00 2001 From: Liu Xuezhao Date: Wed, 12 Nov 2025 23:58:11 +0800 Subject: [PATCH 012/253] DAOS-18172 container: skip nonexist container for cont_agg_eph_sync (#17061) the cont_agg_eph_sync() possibly race with container destroy, in that case skip the non-exist container (cont_lookup get -DER_NONEXIST). Signed-off-by: Xuezhao Liu --- src/container/srv_container.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/src/container/srv_container.c b/src/container/srv_container.c index 5ad305231b4..e9bbda10d74 100644 --- a/src/container/srv_container.c +++ b/src/container/srv_container.c @@ -1896,7 +1896,8 @@ ds_cont_tgt_refresh_track_eph(uuid_t pool_uuid, uuid_t cont_uuid, rc = ds_pool_lookup(pool_uuid, &pool); if (rc != 0) { - D_ERROR(DF_UUID" lookup pool failed: %d\n", DP_UUID(pool_uuid), rc); + DL_CDEBUG(rc != 0 && rc != -DER_SHUTDOWN, DLOG_ERR, DB_MD, rc, + DF_UUID " lookup pool failed", DP_UUID(pool_uuid)); goto out; } rank = dss_self_rank(); @@ -1941,8 +1942,9 @@ cont_agg_eph_load(struct cont_svc *svc, uuid_t cont_uuid, uint64_t *ec_agg_eph) ABT_rwlock_rdlock(svc->cs_lock); rc = cont_lookup(&tx, svc, cont_uuid, &cont); if (rc != 0) { - D_ERROR(DF_CONT ": Failed to look container: %d\n", - DP_CONT(svc->cs_pool_uuid, cont_uuid), rc); + DL_CDEBUG(rc != 0 && rc != -DER_NONEXIST, DLOG_ERR, DB_MD, rc, + DF_CONT ": Failed to look container", + DP_CONT(svc->cs_pool_uuid, cont_uuid)); D_GOTO(out_lock, rc); } @@ -2005,8 +2007,9 @@ cont_agg_eph_store(struct cont_svc *svc, uuid_t cont_uuid, uint64_t ec_agg_eph, ABT_rwlock_wrlock(svc->cs_lock); rc = cont_lookup(&tx, svc, cont_uuid, &cont); if (rc != 0) { - D_ERROR(DF_CONT ": Failed to look container: %d\n", - DP_CONT(svc->cs_pool_uuid, cont_uuid), rc); + DL_CDEBUG(rc != 0 && rc != -DER_NONEXIST, DLOG_ERR, DB_MD, rc, + DF_CONT ": Failed to look container", + DP_CONT(svc->cs_pool_uuid, cont_uuid)); D_GOTO(out_lock, rc); } @@ -2074,9 +2077,15 @@ cont_agg_eph_sync(struct ds_pool *pool, struct cont_svc *svc) if (eph_ldr->cte_rdb_ec_agg_eph == 0) { rc = cont_agg_eph_load(svc, eph_ldr->cte_cont_uuid, &eph_ldr->cte_rdb_ec_agg_eph); - if (rc) + if (rc) { + if (rc == -DER_NONEXIST) { + DL_INFO(rc, DF_CONT " container skipped", + DP_CONT(svc->cs_pool_uuid, eph_ldr->cte_cont_uuid)); + continue; + } DL_ERROR(rc, DF_CONT ": cont_agg_eph_load failed.", DP_CONT(svc->cs_pool_uuid, eph_ldr->cte_cont_uuid)); + } } min_ec_agg_eph = DAOS_EPOCH_MAX; @@ -2135,11 +2144,17 @@ cont_agg_eph_sync(struct ds_pool *pool, struct cont_svc *svc) if (min_ec_agg_eph > eph_ldr->cte_rdb_ec_agg_eph) { rc = cont_agg_eph_store(svc, eph_ldr->cte_cont_uuid, min_ec_agg_eph, &eph_ldr->cte_rdb_ec_agg_eph); - if (rc) + if (rc) { + if (rc == -DER_NONEXIST) { + DL_INFO(rc, DF_CONT " container skipped", + DP_CONT(svc->cs_pool_uuid, eph_ldr->cte_cont_uuid)); + continue; + } DL_ERROR(rc, DF_CONT ": rdb_tx_update ec_agg_eph " DF_X64 " failed.", DP_CONT(svc->cs_pool_uuid, eph_ldr->cte_cont_uuid), min_ec_agg_eph); + } } rc = cont_iv_track_eph_refresh(pool->sp_iv_ns, eph_ldr->cte_cont_uuid, From 349c27b6a5d8ca29723a28facf05ab8da3877fd1 Mon Sep 17 00:00:00 2001 From: Tom Nabarro Date: Wed, 12 Nov 2025 16:18:07 +0000 Subject: [PATCH 013/253] DAOS-18128 control: self_heal related unit test coverage improvements (#17054) Signed-off-by: Tom Nabarro --- src/control/cmd/daos/pretty/pool_test.go | 42 ++++++-- src/control/lib/control/pool.go | 18 ++-- src/control/lib/control/pool_test.go | 127 ++++++++++++++++++++++- src/control/lib/daos/pool_property.go | 7 +- src/control/server/mgmt_pool_test.go | 33 ++++-- 5 files changed, 200 insertions(+), 27 deletions(-) diff --git a/src/control/cmd/daos/pretty/pool_test.go b/src/control/cmd/daos/pretty/pool_test.go index ccc2f4e9537..e743b509a44 100644 --- a/src/control/cmd/daos/pretty/pool_test.go +++ b/src/control/cmd/daos/pretty/pool_test.go @@ -371,12 +371,42 @@ Pool space info: } } -// TODO DAOS-18128: Add more test cases -//func TestPretty_PrintPoolSelfHealDisable(t *testing.T) { -// for name, tc := range map[string]struct { -// sysSelfHeal string -// poolSelfHeal string -//} +func TestPretty_PrintPoolSelfHealDisable(t *testing.T) { + for name, tc := range map[string]struct { + poolSelfHeal string + sysSelfHeal string + expPrintStr string + }{ + "defaults": { + poolSelfHeal: "exclude;rebuild", + sysSelfHeal: "exclude;pool_exclude;pool_rebuild", + }, + "no pool flags": { + poolSelfHeal: "none", + sysSelfHeal: "exclude;pool_exclude;pool_rebuild", + expPrintStr: "exclude disabled on pool due to [pool] policy\nrebuild disabled on pool due to [pool] policy\n", + }, + "no system flags": { + poolSelfHeal: "exclude;rebuild", + sysSelfHeal: "none", + expPrintStr: "exclude disabled on pool due to [system] policy\nrebuild disabled on pool due to [system] policy\n", + }, + "no flags": { + poolSelfHeal: "none", + sysSelfHeal: "none", + expPrintStr: "exclude disabled on pool due to [pool system] policies\nrebuild disabled on pool due to [pool system] policies\n", + }, + } { + t.Run(name, func(t *testing.T) { + var bld strings.Builder + PrintPoolSelfHealDisable(tc.poolSelfHeal, tc.sysSelfHeal, &bld) + + if diff := cmp.Diff(strings.TrimLeft(tc.expPrintStr, "\n"), bld.String()); diff != "" { + t.Fatalf("unexpected print string (-want, +got):\n%s\n", diff) + } + }) + } +} func TestPretty_PrintPoolQueryTarget(t *testing.T) { for name, tc := range map[string]struct { diff --git a/src/control/lib/control/pool.go b/src/control/lib/control/pool.go index a0fb5b33a07..5bca12dfdb7 100644 --- a/src/control/lib/control/pool.go +++ b/src/control/lib/control/pool.go @@ -529,20 +529,25 @@ func (pqr *PoolQueryResp) UpdateSelfHealPolicy(ctx context.Context, rpcClient Un props, err := PoolGetProp(ctx, rpcClient, req) if err != nil { - return err + return errors.Wrapf(err, "PoolGetProp") } switch len(props) { case 0: - rpcClient.Debug("self_heal pool property not found, assuming default value 'exclude;rebuild'") - pqr.SelfHealPolicy = "exclude;rebuild" + rpcClient.Debug("self_heal pool property not found, assuming default 'exclude;rebuild'") + pqr.SelfHealPolicy = daos.DefaultPoolSelfHealStr case 1: pqr.SelfHealPolicy = props[0].StringValue() + if pqr.SelfHealPolicy == "not set" { + pqr.SelfHealPolicy = daos.DefaultPoolSelfHealStr + } default: - return errors.Errorf("unexpected number of pool props returned, want 1 got %d", len(props)) + return errors.Errorf("unexpected number of pool props returned, want 1 got %d", + len(props)) } - rpcClient.Debugf("pool-query: fetched pool self_heal propval: %s", pqr.SelfHealPolicy) + rpcClient.Debugf("pool-query: fetched pool self_heal propval: %s (from props %+v)", + pqr.SelfHealPolicy, props) return nil } @@ -763,7 +768,8 @@ func PoolGetProp(ctx context.Context, rpcClient UnaryInvoker, req *PoolGetPropRe pbMap := make(map[uint32]*mgmtpb.PoolProperty) for _, prop := range pbResp.GetProperties() { if _, found := pbMap[prop.GetNumber()]; found { - return nil, errors.Errorf("got > 1 %d in response", prop.GetNumber()) + return nil, errors.Errorf("got > 1 occurrences of prop %d in resp", + prop.GetNumber()) } pbMap[prop.GetNumber()] = prop } diff --git a/src/control/lib/control/pool_test.go b/src/control/lib/control/pool_test.go index bb2dce0f181..d93e0c5c8ce 100644 --- a/src/control/lib/control/pool_test.go +++ b/src/control/lib/control/pool_test.go @@ -1461,6 +1461,92 @@ func TestControl_PoolQueryResp_UnmarshalJSON(t *testing.T) { } } +func TestControl_PoolQueryResp_UpdateSelfHealPolicy(t *testing.T) { + type prop struct { + number uint32 + value interface{} + } + makePropResp := func(props ...prop) *mgmtpb.PoolGetPropResp { + pbProps := make([]*mgmtpb.PoolProperty, 0, len(props)) + for _, p := range props { + switch v := p.value.(type) { + case string: + pbProps = append(pbProps, &mgmtpb.PoolProperty{ + Number: p.number, + Value: &mgmtpb.PoolProperty_Strval{Strval: v}, + }) + case int: + pbProps = append(pbProps, &mgmtpb.PoolProperty{ + Number: p.number, + Value: &mgmtpb.PoolProperty_Numval{Numval: uint64(v)}, + }) + } + } + return &mgmtpb.PoolGetPropResp{ + Properties: pbProps, + } + } + selfHealPropNum := propWithVal("self_heal", "").Number + + for name, tc := range map[string]struct { + getPropResp *mgmtpb.PoolGetPropResp + getPropErr error + expValue string + expErr string + }{ + "no properties returned": { + getPropResp: makePropResp(), // no properties + expValue: "exclude;rebuild", + }, + "single string value; not set value ignored": { + getPropResp: makePropResp(prop{selfHealPropNum, "rebuild"}), + expValue: "exclude;rebuild", + }, + "single num value": { + getPropResp: makePropResp(prop{selfHealPropNum, daos.PoolSelfHealingAutoRebuild}), + expValue: "rebuild", + }, + "multiple properties returned": { + getPropResp: makePropResp( + prop{selfHealPropNum, daos.PoolSelfHealingAutoRebuild}, + prop{selfHealPropNum, daos.PoolSelfHealingAutoExclude}, + ), + expErr: "> 1 occurrences of prop 4", + }, + "get-prop returns error": { + getPropErr: errors.New("something bad"), + expErr: "something bad", + }, + } { + t.Run(name, func(t *testing.T) { + log, buf := logging.NewTestLogger(t.Name()) + defer test.ShowBufferOnFailure(t, buf) + + mic := &MockInvokerConfig{ + UnaryResponseSet: []*UnaryResponse{ + MockMSResponse("host1", tc.getPropErr, tc.getPropResp), + }, + } + resp := &PoolQueryResp{} + gotErr := resp.UpdateSelfHealPolicy(context.Background(), + NewMockInvoker(log, mic)) + + var expErr error + if tc.expErr != "" { + expErr = errors.New(tc.expErr) + } + test.CmpErr(t, expErr, gotErr) + if expErr != nil { + return + } + + if resp.SelfHealPolicy != tc.expValue { + t.Errorf("expected SelfHealPolicy %q, got %q", tc.expValue, resp.SelfHealPolicy) + } + }) + } +} + func TestControl_PoolQuery(t *testing.T) { poolUUID := test.MockPoolUUID() @@ -1776,10 +1862,6 @@ func TestControl_PoolQuery(t *testing.T) { }, }, }, - // TODO DAOS-18128: Add more test cases - // sys-prop but no pool-prop - // pool-prop but no sys-prop - // neither pool or sys props "query succeeds self_heal policies provided; missing pool self_heal property": { req: &PoolQueryReq{ ID: poolUUID.String(), @@ -1889,6 +1971,43 @@ func TestControl_PoolQuery(t *testing.T) { SysSelfHealPolicy: "exclude;pool_exclude;pool_rebuild", }, }, + "pool get-prop returns error": { + req: &PoolQueryReq{ + ID: poolUUID.String(), + QueryMask: daos.MustNewPoolQueryMask(daos.PoolQueryOptionSelfHealPolicy), + }, + mic: &MockInvokerConfig{ + UnaryResponseSet: []*UnaryResponse{ + MockMSResponse("host1", nil, queryResp(1)), + MockMSResponse("host1", errors.New("get-prop failure"), nil), + }, + }, + expErr: errors.New("pool get-prop self_heal failed"), + }, + "pool get-prop returns multiple properties": { + req: &PoolQueryReq{ + ID: poolUUID.String(), + QueryMask: daos.MustNewPoolQueryMask(daos.PoolQueryOptionSelfHealPolicy), + }, + mic: &MockInvokerConfig{ + UnaryResponseSet: []*UnaryResponse{ + MockMSResponse("host1", nil, queryResp(1)), + MockMSResponse("host1", nil, &mgmtpb.PoolGetPropResp{ + Properties: []*mgmtpb.PoolProperty{ + { + Number: propWithVal("self_heal", "").Number, + Value: &mgmtpb.PoolProperty_Strval{Strval: "exclude"}, + }, + { + Number: propWithVal("self_heal", "").Number, + Value: &mgmtpb.PoolProperty_Strval{Strval: "rebuild"}, + }, + }, + }), + }, + }, + expErr: errors.New("> 1 occurrences of prop 4 in resp"), + }, } { t.Run(name, func(t *testing.T) { log, buf := logging.NewTestLogger(t.Name()) diff --git a/src/control/lib/daos/pool_property.go b/src/control/lib/daos/pool_property.go index c9364377a51..36c7ae08e00 100644 --- a/src/control/lib/daos/pool_property.go +++ b/src/control/lib/daos/pool_property.go @@ -209,6 +209,9 @@ func DataThreshIsValid(size uint64) bool { return bool(C.daos_data_thresh_valid(C.uint32_t(size))) } +// DefaultPoolSelfHealStr describes the default self_heal flags. +const DefaultPoolSelfHealStr = "exclude;rebuild" + // PoolPropertySelfHealUnsetFlags returns disabled flags in the self-heal pool property as a // string slice. func PoolPropertySelfHealUnsetFlags(value string) []string { @@ -290,7 +293,7 @@ func PoolProperties() PoolPropertyMap { case PoolSelfHealingDelayRebuild: return "delay_rebuild" case PoolSelfHealingAutoExclude | PoolSelfHealingAutoRebuild: - return "exclude;rebuild" + return DefaultPoolSelfHealStr case PoolSelfHealingAutoExclude | PoolSelfHealingDelayRebuild: return "exclude;delay_rebuild" default: @@ -303,7 +306,7 @@ func PoolProperties() PoolPropertyMap { "exclude": PoolSelfHealingAutoExclude, "rebuild": PoolSelfHealingAutoRebuild, "delay_rebuild": PoolSelfHealingDelayRebuild, - "exclude;rebuild": PoolSelfHealingAutoExclude | PoolSelfHealingAutoRebuild, + DefaultPoolSelfHealStr: PoolSelfHealingAutoExclude | PoolSelfHealingAutoRebuild, "rebuild;exclude": PoolSelfHealingAutoExclude | PoolSelfHealingAutoRebuild, "delay_rebuild;exclude": PoolSelfHealingAutoExclude | PoolSelfHealingDelayRebuild, "exclude;delay_rebuild": PoolSelfHealingAutoExclude | PoolSelfHealingDelayRebuild, diff --git a/src/control/server/mgmt_pool_test.go b/src/control/server/mgmt_pool_test.go index 54a1c7621a7..51a3bdc2813 100644 --- a/src/control/server/mgmt_pool_test.go +++ b/src/control/server/mgmt_pool_test.go @@ -2344,11 +2344,12 @@ func TestServer_MgmtSvc_PoolQuery(t *testing.T) { } for name, tc := range map[string]struct { - mgmtSvc *mgmtSvc - setupMockDrpc func(_ *mgmtSvc, _ error) - req *mgmtpb.PoolQueryReq - expResp *mgmtpb.PoolQueryResp - expErr error + mgmtSvc *mgmtSvc + setupMockDrpc func(_ *mgmtSvc, _ error) + req *mgmtpb.PoolQueryReq + missingSelfHealSysProp bool + expResp *mgmtpb.PoolQueryResp + expErr error }{ "nil request": { expErr: errors.New("nil request"), @@ -2449,6 +2450,18 @@ func TestServer_MgmtSvc_PoolQuery(t *testing.T) { SysSelfHealPolicy: "pool_rebuild", }, }, + "successful query; sys self-heal prop fetch; missing system property": { + missingSelfHealSysProp: true, + req: &mgmtpb.PoolQueryReq{ + Id: mockUUID, + QueryMask: uint64(daos.MustNewPoolQueryMask(daos.PoolQueryOptionSelfHealPolicy)), + }, + expResp: &mgmtpb.PoolQueryResp{ + State: mgmtpb.PoolServiceState_Ready, + Uuid: mockUUID, + SysSelfHealPolicy: daos.DefaultSysSelfHealFlagsStr, + }, + }, } { t.Run(name, func(t *testing.T) { buf.Reset() @@ -2479,10 +2492,12 @@ func TestServer_MgmtSvc_PoolQuery(t *testing.T) { tc.req.Sys = build.DefaultSystemName } - // Change stored value to something different from the default. - if err := system.SetUserProperty(tc.mgmtSvc.sysdb, tc.mgmtSvc.systemProps, - "self_heal", "pool_rebuild"); err != nil { - t.Fatal(err) + if !tc.missingSelfHealSysProp { + // Change stored value to something different from the default. + if err := system.SetUserProperty(tc.mgmtSvc.sysdb, tc.mgmtSvc.systemProps, + "self_heal", "pool_rebuild"); err != nil { + t.Fatal(err) + } } gotResp, gotErr := tc.mgmtSvc.PoolQuery(test.Context(t), tc.req) From 54cdcd6e409da20a1f1eb807093e936cb9724ed5 Mon Sep 17 00:00:00 2001 From: Tom Nabarro Date: Wed, 12 Nov 2025 21:43:59 +0000 Subject: [PATCH 014/253] DAOS-18198 control: Fix gRPC command authorizations (#17113) dmg system commands that operate over an enumerated list of pools often call into the control API from the server. When running with certificates and not in insecure mode these server-to-server calls get blocked if the gRPC method hasn't been given explicit ComponentServer authorization. This PR adds those controls for dmg system drain|reintegrate|self-heal|rebuild commands. Signed-off-by: Tom Nabarro --- src/control/security/grpc_authorization.go | 10 +++++----- src/control/security/grpc_authorization_test.go | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/control/security/grpc_authorization.go b/src/control/security/grpc_authorization.go index ed178b62214..50104b5b475 100644 --- a/src/control/security/grpc_authorization.go +++ b/src/control/security/grpc_authorization.go @@ -66,14 +66,14 @@ var methodAuthorizations = map[string][]Component{ "/mgmt.MgmtSvc/PoolUpdateACL": {ComponentAdmin}, "/mgmt.MgmtSvc/PoolDeleteACL": {ComponentAdmin}, "/mgmt.MgmtSvc/PoolExclude": {ComponentAdmin}, - "/mgmt.MgmtSvc/PoolDrain": {ComponentAdmin}, - "/mgmt.MgmtSvc/PoolReintegrate": {ComponentAdmin}, + "/mgmt.MgmtSvc/PoolDrain": {ComponentAdmin, ComponentServer}, + "/mgmt.MgmtSvc/PoolReintegrate": {ComponentAdmin, ComponentServer}, "/mgmt.MgmtSvc/PoolEvict": {ComponentAdmin, ComponentAgent}, "/mgmt.MgmtSvc/PoolExtend": {ComponentAdmin}, "/mgmt.MgmtSvc/PoolUpgrade": {ComponentAdmin}, - "/mgmt.MgmtSvc/PoolRebuildStart": {ComponentAdmin}, - "/mgmt.MgmtSvc/PoolRebuildStop": {ComponentAdmin}, - "/mgmt.MgmtSvc/PoolSelfHealEval": {ComponentAdmin}, + "/mgmt.MgmtSvc/PoolRebuildStart": {ComponentAdmin, ComponentServer}, + "/mgmt.MgmtSvc/PoolRebuildStop": {ComponentAdmin, ComponentServer}, + "/mgmt.MgmtSvc/PoolSelfHealEval": {ComponentAdmin, ComponentServer}, "/mgmt.MgmtSvc/GetAttachInfo": {ComponentAgent}, "/mgmt.MgmtSvc/ListPools": {ComponentAdmin}, "/mgmt.MgmtSvc/ListContainers": {ComponentAdmin}, diff --git a/src/control/security/grpc_authorization_test.go b/src/control/security/grpc_authorization_test.go index eeef1167f39..3bd1d3a8152 100644 --- a/src/control/security/grpc_authorization_test.go +++ b/src/control/security/grpc_authorization_test.go @@ -91,14 +91,14 @@ func TestSecurity_ComponentHasAccess(t *testing.T) { "/mgmt.MgmtSvc/PoolUpdateACL": {ComponentAdmin}, "/mgmt.MgmtSvc/PoolDeleteACL": {ComponentAdmin}, "/mgmt.MgmtSvc/PoolExclude": {ComponentAdmin}, - "/mgmt.MgmtSvc/PoolDrain": {ComponentAdmin}, - "/mgmt.MgmtSvc/PoolReintegrate": {ComponentAdmin}, + "/mgmt.MgmtSvc/PoolDrain": {ComponentAdmin, ComponentServer}, + "/mgmt.MgmtSvc/PoolReintegrate": {ComponentAdmin, ComponentServer}, "/mgmt.MgmtSvc/PoolEvict": {ComponentAdmin, ComponentAgent}, "/mgmt.MgmtSvc/PoolExtend": {ComponentAdmin}, "/mgmt.MgmtSvc/PoolUpgrade": {ComponentAdmin}, - "/mgmt.MgmtSvc/PoolRebuildStart": {ComponentAdmin}, - "/mgmt.MgmtSvc/PoolRebuildStop": {ComponentAdmin}, - "/mgmt.MgmtSvc/PoolSelfHealEval": {ComponentAdmin}, + "/mgmt.MgmtSvc/PoolRebuildStart": {ComponentAdmin, ComponentServer}, + "/mgmt.MgmtSvc/PoolRebuildStop": {ComponentAdmin, ComponentServer}, + "/mgmt.MgmtSvc/PoolSelfHealEval": {ComponentAdmin, ComponentServer}, "/mgmt.MgmtSvc/GetAttachInfo": {ComponentAgent}, "/mgmt.MgmtSvc/ListPools": {ComponentAdmin}, "/mgmt.MgmtSvc/ListContainers": {ComponentAdmin}, From 0eb00361022bcae93820f6ff56820b132c0af0f6 Mon Sep 17 00:00:00 2001 From: Liu Xuezhao Date: Fri, 14 Nov 2025 21:47:16 +0800 Subject: [PATCH 015/253] DAOS-18154 rebuild: cleanup IV cache before reint (#17080) For the case of no reboot between exclude and reint, should cleanup the some IVs in the first step of reint. Signed-off-by: Xuezhao Liu --- src/engine/server_iv.c | 21 +++++++++++++++++++++ src/include/daos_srv/iv.h | 1 + src/pool/srv_target.c | 5 +++++ 3 files changed, 27 insertions(+) diff --git a/src/engine/server_iv.c b/src/engine/server_iv.c index 18ba81d5a84..abde85fa360 100644 --- a/src/engine/server_iv.c +++ b/src/engine/server_iv.c @@ -880,6 +880,27 @@ ds_iv_ns_cleanup(struct ds_iv_ns *ns) } } +/* To prepare for reintegrate, cleanup some IVs' cache. + * May add more types later when needed. + */ +void +ds_iv_ns_reint_prep(struct ds_iv_ns *ns) +{ + struct ds_iv_entry *entry; + struct ds_iv_entry *tmp; + + d_list_for_each_entry_safe(entry, tmp, &ns->iv_entry_list, iv_link) { + if (entry->iv_key.class_id == IV_CONT_TRACK_EPOCH || + entry->iv_key.class_id == IV_CONT_PROP || + entry->iv_key.class_id == IV_CONT_SNAP) { + D_INFO(DF_UUID " delete IV class_id %d", DP_UUID(ns->iv_pool_uuid), + entry->iv_key.class_id); + d_list_del(&entry->iv_link); + iv_entry_free(entry); + } + } +} + void ds_iv_ns_stop(struct ds_iv_ns *ns) { diff --git a/src/include/daos_srv/iv.h b/src/include/daos_srv/iv.h index 12f19e98383..1221e97739b 100644 --- a/src/include/daos_srv/iv.h +++ b/src/include/daos_srv/iv.h @@ -319,6 +319,7 @@ int ds_iv_ns_create(crt_context_t ctx, uuid_t pool_uuid, crt_group_t *grp, void ds_iv_ns_update(struct ds_iv_ns *ns, unsigned int master_rank, uint64_t term); void ds_iv_ns_cleanup(struct ds_iv_ns *ns); +void ds_iv_ns_reint_prep(struct ds_iv_ns *ns); void ds_iv_ns_stop(struct ds_iv_ns *ns); void ds_iv_ns_leader_stop(struct ds_iv_ns *ns); void ds_iv_ns_start(struct ds_iv_ns *ns); diff --git a/src/pool/srv_target.c b/src/pool/srv_target.c index 07e587a3b85..f48ccf8228c 100644 --- a/src/pool/srv_target.c +++ b/src/pool/srv_target.c @@ -2797,6 +2797,8 @@ ds_pool_tgt_discard_handler(crt_rpc_t *rpc) pool->sp_need_discard = 1; pool->sp_discard_status = 0; rc = dss_ult_execute(ds_pool_tgt_discard_ult, arg, NULL, NULL, DSS_XS_SYS, 0, 0); + if (rc == 0) + ds_iv_ns_reint_prep(pool->sp_iv_ns); /* cleanup IV cache */ ds_pool_put(pool); out: @@ -3117,6 +3119,9 @@ ds_pool_recov_cont_handler(crt_rpc_t *rpc) rc = ds_pool_thread_collective(prci->prci_uuid, ex_status, pool_tgt_recov_cont, &prca, 0); ABT_rwlock_unlock(pool->sp_recov_lock); + if (rc == 0) + ds_iv_ns_reint_prep(pool->sp_iv_ns); /* cleanup IV cache */ + out: DL_CDEBUG(rc != 0, DLOG_ERR, DB_REBUILD, rc, "Recovered ( " DF_U64 ") containers for the pool " DF_UUID, prci->prci_cont_nr, From 3026ae317ad4cb28691e10a8589f41d6cc62068f Mon Sep 17 00:00:00 2001 From: Ken Cain Date: Fri, 14 Nov 2025 10:57:42 -0500 Subject: [PATCH 016/253] DAOS-17751 test: ec_online_rebuild_mdtest, multi-recv/verbs (#16895) Configure this single test, test_ec_online_rebuild_mdtest to run with multi-recv when run with the verbs provider, ofi+verbs;ofi_rxm. For both engines and any client-side utilities launched by the test (e.g., the cart_ctl, daos, mdtest), configure environment variable NA_OFI_UNEXPECTED_TAG_MSG=0. The intent of this patch is to re-enable this test that has intermittently failed with mercury/libfabric errors such as NA_CB_RECV_UNEXPECTED in na_ofi_cq_process_retries(). Also in this change, the shared class ErasureCodeMdtest.setUp() method is changed to not connect to the pool it creates, since that is not used, and it also would require additional multi-recv environment configuration, to match the engine-side setup. Otherwise, the test will hang with mis-matched client / engine environments. Signed-off-by: Kenneth Cain --- src/tests/ftest/erasurecode/online_rebuild_mdtest.yaml | 8 ++++++++ src/tests/ftest/util/apricot/apricot/test.py | 1 + src/tests/ftest/util/ec_utils.py | 2 +- 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/tests/ftest/erasurecode/online_rebuild_mdtest.yaml b/src/tests/ftest/erasurecode/online_rebuild_mdtest.yaml index 5e60eae8eab..a6e82be7837 100644 --- a/src/tests/ftest/erasurecode/online_rebuild_mdtest.yaml +++ b/src/tests/ftest/erasurecode/online_rebuild_mdtest.yaml @@ -23,6 +23,9 @@ server_config: log_file: daos_server0.log log_mask: INFO storage: auto + env_vars: + - NA_OFI_UNEXPECTED_TAG_MSG=0 + 1: pinned_numa_node: 1 nr_xs_helpers: 1 @@ -31,6 +34,11 @@ server_config: log_file: daos_server1.log log_mask: INFO storage: auto + env_vars: + - NA_OFI_UNEXPECTED_TAG_MSG=0 +client: + env_vars: + - NA_OFI_UNEXPECTED_TAG_MSG=0 pool: size: 93% container: diff --git a/src/tests/ftest/util/apricot/apricot/test.py b/src/tests/ftest/util/apricot/apricot/test.py index 092b0e9a31e..829ee0f7bfa 100644 --- a/src/tests/ftest/util/apricot/apricot/test.py +++ b/src/tests/ftest/util/apricot/apricot/test.py @@ -851,6 +851,7 @@ def write_string_to_logfile(self, message): if self.server_managers and self.agent_managers: # Compose and run cart_ctl command cart_ctl = CartCtl() + cart_ctl.get_params(self) cart_ctl.add_log_msg.value = "add_log_msg" cart_ctl.rank.value = "all" cart_ctl.log_message.value = message diff --git a/src/tests/ftest/util/ec_utils.py b/src/tests/ftest/util/ec_utils.py index 10b4784f278..9243cbb9176 100644 --- a/src/tests/ftest/util/ec_utils.py +++ b/src/tests/ftest/util/ec_utils.py @@ -418,7 +418,7 @@ def setUp(self): """Set up each test case.""" super().setUp() # Create Pool - self.add_pool() + self.add_pool(connect=False) self.container = None self.out_queue = queue.Queue() From 0ff9ca79eceedc2b6457ef2610476557fdea0666 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 14 Nov 2025 11:23:22 -0800 Subject: [PATCH 017/253] DAOS-18211 cq: bump GHA versions (#17096) Updates `EnricoMi/publish-unit-test-result-action` from 2.20.0 to 2.21.0 Updates `actions/upload-artifact` from 4 to 5 Updates `codespell-project/actions-codespell` from 2.1 to 2.2 Updates `github/codeql-action` from 4.30.8 to 4.31.2 Signed-off-by: dependabot[bot] Signed-off-by: Dalton Bohning --- .github/workflows/bullseye-coverage.yml | 12 ++++++------ .github/workflows/ci2.yml | 2 +- .github/workflows/landing-builds.yml | 2 +- .github/workflows/linting.yml | 6 +++--- .github/workflows/ossf-scorecard.yml | 4 ++-- .github/workflows/rpm-build-and-test.yml | 12 ++++++------ .github/workflows/trivy.yml | 4 ++-- 7 files changed, 21 insertions(+), 21 deletions(-) diff --git a/.github/workflows/bullseye-coverage.yml b/.github/workflows/bullseye-coverage.yml index f03227c5dbf..3233338aa89 100644 --- a/.github/workflows/bullseye-coverage.yml +++ b/.github/workflows/bullseye-coverage.yml @@ -366,7 +366,7 @@ jobs: if: (!cancelled()) && (success() || failure()) && steps.run-test.outcome != 'skipped' # yamllint disable-line rule:line-length - uses: EnricoMi/publish-unit-test-result-action@3a74b2957438d0b6e2e61d67b05318aa25c9e6c6 # v2.20.0 + uses: EnricoMi/publish-unit-test-result-action@34d7c956a59aed1bfebf31df77b8de55db9bbaaf # v2.21.0 with: check_name: ${{ env.STAGE_NAME }} Test Results github_token: ${{ secrets.GITHUB_TOKEN }} @@ -374,14 +374,14 @@ jobs: - name: Publish artifacts if: (!cancelled()) && (success() || failure()) && steps.run-test.outcome != 'skipped' - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 with: name: ${{ env.STAGE_NAME }} artifacts path: ${{ env.STAGE_NAME }}/** - name: Upload test results if: (success() || failure()) && steps.run-test.outcome != 'skipped' - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 with: name: ${{ env.STAGE_NAME }} test-results path: ${{ env.STAGE_NAME }}/**/results.xml @@ -634,7 +634,7 @@ jobs: if: (!cancelled()) && (success() || failure()) && steps.run-test.outcome != 'skipped' # yamllint disable-line rule:line-length - uses: EnricoMi/publish-unit-test-result-action@3a74b2957438d0b6e2e61d67b05318aa25c9e6c6 # v2.20.0 + uses: EnricoMi/publish-unit-test-result-action@34d7c956a59aed1bfebf31df77b8de55db9bbaaf # v2.21.0 with: check_name: ${{ env.STAGE_NAME }} Test Results github_token: ${{ secrets.GITHUB_TOKEN }} @@ -642,14 +642,14 @@ jobs: - name: Publish artifacts if: (!cancelled()) && (success() || failure()) && steps.run-test.outcome != 'skipped' - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 with: name: ${{ env.STAGE_NAME }} artifacts path: ${{ env.STAGE_NAME }}/** - name: Upload test results if: (success() || failure()) && steps.run-test.outcome != 'skipped' - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 with: name: ${{ env.STAGE_NAME }} test-results path: ${{ env.STAGE_NAME }}/**/results.xml diff --git a/.github/workflows/ci2.yml b/.github/workflows/ci2.yml index c49e46f3785..3832de6f163 100644 --- a/.github/workflows/ci2.yml +++ b/.github/workflows/ci2.yml @@ -68,7 +68,7 @@ jobs: - name: Publish NLT test results if: always() # yamllint disable-line rule:line-length - uses: EnricoMi/publish-unit-test-result-action@3a74b2957438d0b6e2e61d67b05318aa25c9e6c6 # v2.20.0 + uses: EnricoMi/publish-unit-test-result-action@34d7c956a59aed1bfebf31df77b8de55db9bbaaf # v2.21.0 with: github_token: ${{ secrets.GITHUB_TOKEN }} files: nlt-junit.xml diff --git a/.github/workflows/landing-builds.yml b/.github/workflows/landing-builds.yml index 6a557999a04..b083d8d300a 100644 --- a/.github/workflows/landing-builds.yml +++ b/.github/workflows/landing-builds.yml @@ -144,7 +144,7 @@ jobs: - name: Publish NLT test results if: always() # yamllint disable-line rule:line-length - uses: EnricoMi/publish-unit-test-result-action@3a74b2957438d0b6e2e61d67b05318aa25c9e6c6 # v2.20.0 + uses: EnricoMi/publish-unit-test-result-action@34d7c956a59aed1bfebf31df77b8de55db9bbaaf # v2.21.0 with: github_token: ${{ secrets.GITHUB_TOKEN }} files: nlt-junit.xml diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 084673c65a2..fe3db350144 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -129,7 +129,7 @@ jobs: - name: Run check run: doxygen Doxyfile - name: 'Upload Artifact' - uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4.3.1 + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 with: name: API Documentation path: docs/doxygen/html/ @@ -164,7 +164,7 @@ jobs: - name: Install extra python packages run: python3 -m pip install --requirement utils/cq/requirements.txt - name: Run check - uses: codespell-project/actions-codespell@406322ec52dd7b488e48c1c4b82e2a8b3a1bf630 # master + uses: codespell-project/actions-codespell@8f01853be192eb0f849a5c7d721450e7a467c579 # master with: skip: ./src/control/vendor,./src/control/go.sum,./.git,./utils/*.patch ignore_words_file: ci/codespell.ignores @@ -191,7 +191,7 @@ jobs: with: target: ${{ steps.get_merge_base.outputs.ref }} - name: Export changes - uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4.3.1 + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 if: failure() with: name: format-patch-for-pr-${{ github.event.pull_request.number }} diff --git a/.github/workflows/ossf-scorecard.yml b/.github/workflows/ossf-scorecard.yml index 3f57d3d8d94..c33fe62c222 100644 --- a/.github/workflows/ossf-scorecard.yml +++ b/.github/workflows/ossf-scorecard.yml @@ -62,7 +62,7 @@ jobs: # uploads of run results in SARIF # format to the repository Actions tab. - name: "Upload artifact" - uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4.3.1 + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 with: name: SARIF file path: results.sarif @@ -71,6 +71,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard (optional). # Commenting out will disable upload of results to your repo's Code Scanning dashboard - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@f443b600d91635bebf5b0d9ebc620189c0d6fba5 # v4.30.8 + uses: github/codeql-action/upload-sarif@0499de31b99561a6d14a36a5f662c2a54f91beee # v4.31.2 with: sarif_file: results.sarif diff --git a/.github/workflows/rpm-build-and-test.yml b/.github/workflows/rpm-build-and-test.yml index 8588f840b85..38ece1cce75 100644 --- a/.github/workflows/rpm-build-and-test.yml +++ b/.github/workflows/rpm-build-and-test.yml @@ -375,7 +375,7 @@ jobs: if: (!cancelled()) && (success() || failure()) && steps.run-test.outcome != 'skipped' # yamllint disable-line rule:line-length - uses: EnricoMi/publish-unit-test-result-action@3a74b2957438d0b6e2e61d67b05318aa25c9e6c6 # v2.20.0 + uses: EnricoMi/publish-unit-test-result-action@34d7c956a59aed1bfebf31df77b8de55db9bbaaf # v2.21.0 with: check_name: ${{ env.STAGE_NAME }} Test Results github_token: ${{ secrets.GITHUB_TOKEN }} @@ -383,14 +383,14 @@ jobs: - name: Publish artifacts if: (!cancelled()) && (success() || failure()) && steps.run-test.outcome != 'skipped' - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 with: name: ${{ env.STAGE_NAME }} artifacts path: ${{ env.STAGE_NAME }}/** - name: Upload test results if: (success() || failure()) && steps.run-test.outcome != 'skipped' - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 with: name: ${{ env.STAGE_NAME }} test-results path: ${{ env.STAGE_NAME }}/**/results.xml @@ -643,7 +643,7 @@ jobs: if: (!cancelled()) && (success() || failure()) && steps.run-test.outcome != 'skipped' # yamllint disable-line rule:line-length - uses: EnricoMi/publish-unit-test-result-action@3a74b2957438d0b6e2e61d67b05318aa25c9e6c6 # v2.20.0 + uses: EnricoMi/publish-unit-test-result-action@34d7c956a59aed1bfebf31df77b8de55db9bbaaf # v2.21.0 with: check_name: ${{ env.STAGE_NAME }} Test Results github_token: ${{ secrets.GITHUB_TOKEN }} @@ -651,14 +651,14 @@ jobs: - name: Publish artifacts if: (!cancelled()) && (success() || failure()) && steps.run-test.outcome != 'skipped' - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 with: name: ${{ env.STAGE_NAME }} artifacts path: ${{ env.STAGE_NAME }}/** - name: Upload test results if: (success() || failure()) && steps.run-test.outcome != 'skipped' - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 with: name: ${{ env.STAGE_NAME }} test-results path: ${{ env.STAGE_NAME }}/**/results.xml diff --git a/.github/workflows/trivy.yml b/.github/workflows/trivy.yml index a04be926bb7..4a708cdcbf8 100644 --- a/.github/workflows/trivy.yml +++ b/.github/workflows/trivy.yml @@ -49,7 +49,7 @@ jobs: cp utils/trivy/.trivyignore report/trivyignore.txt - name: Upload the report to the GitHub artifact store - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 with: path: report/* name: trivy-report-daos @@ -68,7 +68,7 @@ jobs: trivy-config: 'utils/trivy/trivy.yaml' - name: Upload Trivy scan results to GitHub Security tab - uses: github/codeql-action/upload-sarif@f443b600d91635bebf5b0d9ebc620189c0d6fba5 # v4.30.8 + uses: github/codeql-action/upload-sarif@0499de31b99561a6d14a36a5f662c2a54f91beee # v4.31.2 with: sarif_file: 'trivy-results.sarif' From e837088c25a0f93daa1e49576e5d8a0679807032 Mon Sep 17 00:00:00 2001 From: Dalton Bohning Date: Mon, 17 Nov 2025 06:00:46 -0800 Subject: [PATCH 018/253] DAOS-17827 test: use correct gen_certificates.sh (#17129) Use gen_certificates.sh relative to the set prefix, not ftest. Signed-off-by: Dalton Bohning --- src/tests/ftest/util/launch_utils.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/tests/ftest/util/launch_utils.py b/src/tests/ftest/util/launch_utils.py index e317558bdd0..cd8bf8eeed2 100644 --- a/src/tests/ftest/util/launch_utils.py +++ b/src/tests/ftest/util/launch_utils.py @@ -869,13 +869,12 @@ def _generate_certs(self, logger): logger.debug("Generating certificates") test_env = TestEnvironment() certs_dir = os.path.join(test_env.log_dir, "daosCA") - certgen_dir = os.path.abspath( - os.path.join("..", "..", "..", "..", "lib64", "daos", "certgen")) - command = os.path.join(certgen_dir, "gen_certificates.sh") if not run_local(logger, f"/usr/bin/rm -rf {certs_dir}").passed: message = "Error removing old certificates" self.test_result.fail_test(logger, "Prepare", message, sys.exc_info()) return False + command = os.path.abspath( + os.path.join(test_env.daos_prefix, "lib64", "daos", "certgen", "gen_certificates.sh")) if not run_local(logger, f"{command} {test_env.log_dir}").passed: message = "Error generating certificates" self.test_result.fail_test(logger, "Prepare", message, sys.exc_info()) From 3d8f848623a851914799d3fc19e398b81c51e117 Mon Sep 17 00:00:00 2001 From: Tom Nabarro Date: Mon, 17 Nov 2025 18:11:19 +0000 Subject: [PATCH 019/253] DAOS-18201 control: Allow bracketed strings in CreateRankSet (#17124) Signed-off-by: Tom Nabarro --- src/control/lib/ranklist/ranklist.go | 19 +++++------ src/control/lib/ranklist/ranklist_test.go | 12 +++++++ src/control/server/mgmt_system_test.go | 40 +++++++++++++++++++++++ 3 files changed, 60 insertions(+), 11 deletions(-) diff --git a/src/control/lib/ranklist/ranklist.go b/src/control/lib/ranklist/ranklist.go index add82be22aa..a677f4bf172 100644 --- a/src/control/lib/ranklist/ranklist.go +++ b/src/control/lib/ranklist/ranklist.go @@ -25,11 +25,7 @@ func init() { } } -func fixBrackets(stringRanks string, remove bool) string { - if remove { - return strings.Trim(stringRanks, "[]") - } - +func addBrackets(stringRanks string) string { if !strings.HasPrefix(stringRanks, "[") { stringRanks = "[" + stringRanks } @@ -40,6 +36,10 @@ func fixBrackets(stringRanks string, remove bool) string { return stringRanks } +func removeBrackets(stringRanks string) string { + return strings.Trim(stringRanks, "[]") +} + // RankList provides convenience methods for working with Rank slices. type RankList []Rank @@ -65,7 +65,7 @@ func (rs *RankSet) String() string { if rs == nil || rs.ns == nil { return "" } - return fixBrackets(rs.ns.String(), true) + return removeBrackets(rs.ns.String()) } // RangedString returns a ranged string representation of the RankSet. @@ -201,14 +201,11 @@ func MustCreateRankSet(stringRanks string) *RankSet { func CreateRankSet(stringRanks string) (*RankSet, error) { rs := NewRankSet() - if len(stringRanks) < 1 { + if len(removeBrackets(stringRanks)) < 1 { return rs, nil } - stringRanks = fixBrackets(stringRanks, false) - - // add enclosing brackets to input so CreateSet works without hostnames - ns, err := hostlist.CreateNumericSet(stringRanks) + ns, err := hostlist.CreateNumericSet(addBrackets(stringRanks)) if err != nil { return nil, err } diff --git a/src/control/lib/ranklist/ranklist_test.go b/src/control/lib/ranklist/ranklist_test.go index 07ddef38396..a4d773afad5 100644 --- a/src/control/lib/ranklist/ranklist_test.go +++ b/src/control/lib/ranklist/ranklist_test.go @@ -31,10 +31,22 @@ func TestRanklist_RankSet(t *testing.T) { expCount: 0, expRanks: []Rank{}, }, + "empty bracketed start list": { + ranks: "[]", + expOut: "", + expCount: 0, + expRanks: []Rank{}, + }, "invalid with hostnames": { ranks: "node2-1,node1-2.suffix1,node1-[45,47].suffix2,node3,node1-3", expErr: errors.New("unexpected alphabetic character(s)"), }, + "simple bracketed ranged rank list": { + ranks: "[0-10]", + expOut: "0-10", + expCount: 11, + expRanks: []Rank{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + }, "simple ranged rank list": { ranks: "0-10", expOut: "0-10", diff --git a/src/control/server/mgmt_system_test.go b/src/control/server/mgmt_system_test.go index 1d25ddcfc7d..a7d214183f6 100644 --- a/src/control/server/mgmt_system_test.go +++ b/src/control/server/mgmt_system_test.go @@ -595,6 +595,46 @@ func TestServer_MgmtSvc_getPoolRanks(t *testing.T) { }, expDrpcCount: 2, }, + "two pools; bracketed zero disabled ranks": { + pools: []string{test.MockUUID(1), test.MockUUID(2)}, + inRanks: ranklist.MustCreateRankSet("1,8"), + getEnabled: false, + drpcResps: []*mockDrpcResponse{ + &mockDrpcResponse{ + Message: &mgmtpb.PoolQueryResp{ + EnabledRanks: "0-4", + DisabledRanks: "[]", + }, + }, + &mockDrpcResponse{ + Message: &mgmtpb.PoolQueryResp{ + EnabledRanks: "1-7", + DisabledRanks: "[]", + }, + }, + }, + expDrpcCount: 2, + }, + "two pools; bracketed zero enabled ranks": { + pools: []string{test.MockUUID(1), test.MockUUID(2)}, + inRanks: ranklist.MustCreateRankSet("1,8"), + getEnabled: true, + drpcResps: []*mockDrpcResponse{ + &mockDrpcResponse{ + Message: &mgmtpb.PoolQueryResp{ + EnabledRanks: "[]", + DisabledRanks: "0-4", + }, + }, + &mockDrpcResponse{ + Message: &mgmtpb.PoolQueryResp{ + EnabledRanks: "[]", + DisabledRanks: "1-7", + }, + }, + }, + expDrpcCount: 2, + }, "match zero ranks; two pools": { pools: []string{test.MockUUID(1), test.MockUUID(2)}, inRanks: ranklist.MustCreateRankSet("8-10"), From d8194a413d936ec531743c241323dc57d39349fc Mon Sep 17 00:00:00 2001 From: Kris Jacque Date: Mon, 17 Nov 2025 11:14:31 -0700 Subject: [PATCH 020/253] DAOS-13520 control: Show unchecked pools in dmg check query (#17091) - Include listing of unchecked pools returned by dmg check query, even in non-verbose mode. Signed-off-by: Kris Jacque --- src/control/cmd/dmg/pretty/check.go | 43 +++++++----- src/control/cmd/dmg/pretty/check_test.go | 84 ++++++++++++++++++++++-- 2 files changed, 106 insertions(+), 21 deletions(-) diff --git a/src/control/cmd/dmg/pretty/check.go b/src/control/cmd/dmg/pretty/check.go index 334d4e848ea..7860e694d26 100644 --- a/src/control/cmd/dmg/pretty/check.go +++ b/src/control/cmd/dmg/pretty/check.go @@ -50,26 +50,40 @@ func PrintCheckerPolicies(out io.Writer, flags control.SystemCheckFlags, policie tf.Format(table) } -func countResultPools(resp *control.SystemCheckQueryResp) int { +func countResultPools(resp *control.SystemCheckQueryResp) (int, int) { if resp == nil { - return 0 + return 0, 0 } - poolMap := make(map[string]struct{}) + checkedMap := make(map[string]struct{}) + uncheckedMap := make(map[string]struct{}) for _, pool := range resp.Pools { - // Don't include pools that were not checked. if pool.Unchecked() { - continue + uncheckedMap[pool.UUID] = struct{}{} + } else { + checkedMap[pool.UUID] = struct{}{} } - poolMap[pool.UUID] = struct{}{} } for _, report := range resp.Reports { if report.IsRemovedPool() && report.PoolUuid != "" { - poolMap[report.PoolUuid] = struct{}{} + checkedMap[report.PoolUuid] = struct{}{} } } - return len(poolMap) + return len(checkedMap), len(uncheckedMap) +} + +func printSystemCheckPoolInfo(out io.Writer, pools []*control.SystemCheckPoolInfo, verbose bool) { + if verbose { + fmt.Fprintln(out, "\nPer-Pool Checker Info:") + } else { + fmt.Fprintln(out, "\nUnchecked Pools:") + } + for _, pool := range pools { + if verbose || pool.Unchecked() { + fmt.Fprintf(out, " %+v\n", pool) + } + } } // PrintCheckQueryResp prints the checker results to the console. @@ -91,15 +105,15 @@ func PrintCheckQueryResp(out io.Writer, resp *control.SystemCheckQueryResp, verb // should show the number of pools being checked. If the checker has completed, // we should show the number of unique pools found in the reports. action := "Checking" - poolCount := countResultPools(resp) + checkedCount, uncheckedCount := countResultPools(resp) if resp.Status == control.SystemCheckStatusCompleted { action = "Checked" } - if poolCount > 0 { - fmt.Fprintf(out, " %s %s\n", action, english.Plural(poolCount, "pool", "")) + if checkedCount > 0 { + fmt.Fprintf(out, " %s %s\n", action, english.Plural(checkedCount, "pool", "")) } - if len(resp.Pools) > 0 && verbose { + if len(resp.Pools) > 0 && (verbose || uncheckedCount > 0) { pools := make([]*control.SystemCheckPoolInfo, 0, len(resp.Pools)) for _, pool := range resp.Pools { pools = append(pools, pool) @@ -107,10 +121,7 @@ func PrintCheckQueryResp(out io.Writer, resp *control.SystemCheckQueryResp, verb sort.Slice(pools, func(i, j int) bool { return pools[i].UUID < pools[j].UUID }) - fmt.Fprintln(out, "\nPer-Pool Checker Info:") - for _, pool := range pools { - fmt.Fprintf(out, " %+v\n", pool) - } + printSystemCheckPoolInfo(out, pools, verbose) } fmt.Fprintln(out) diff --git a/src/control/cmd/dmg/pretty/check_test.go b/src/control/cmd/dmg/pretty/check_test.go index ca96f645fa7..3394206d020 100644 --- a/src/control/cmd/dmg/pretty/check_test.go +++ b/src/control/cmd/dmg/pretty/check_test.go @@ -203,11 +203,6 @@ Inconsistency Reports: Phase: chkpb.CheckScanPhase_CSP_DONE.String(), StartTime: checkTime, }, - "pool-5": { - UUID: "pool-5", - Status: chkpb.CheckPoolStatus_CPS_UNCHECKED.String(), - Phase: chkpb.CheckScanPhase_CSP_PREPARE.String(), - }, }, Reports: []*control.SystemCheckReport{ { @@ -264,6 +259,85 @@ ID Class Pool Resolution 0x3 POOL_LESS_SVC_WITHOUT_QUORUM pool-3 TRUST_PS 0x4 POOL_NONEXIST_ON_ENGINE pool-4 DISCARD (dry run) +`, + }, + "non-verbose with unchecked pools": { + resp: &control.SystemCheckQueryResp{ + Status: control.SystemCheckStatusCompleted, + ScanPhase: control.SystemCheckScanPhaseDone, + Pools: map[string]*control.SystemCheckPoolInfo{ + "pool-1": { + UUID: "pool-1", + Status: chkpb.CheckPoolStatus_CPS_CHECKED.String(), + Phase: chkpb.CheckScanPhase_CSP_DONE.String(), + StartTime: checkTime, + }, + "pool-2": { + UUID: "pool-2", + Status: chkpb.CheckPoolStatus_CPS_UNCHECKED.String(), + Phase: chkpb.CheckScanPhase_CSP_DONE.String(), + }, + "pool-3": { + UUID: "pool-3", + Status: chkpb.CheckPoolStatus_CPS_CHECKED.String(), + Phase: chkpb.CheckScanPhase_CSP_DONE.String(), + StartTime: checkTime, + }, + "pool-5": { + UUID: "pool-5", + Status: chkpb.CheckPoolStatus_CPS_UNCHECKED.String(), + Phase: chkpb.CheckScanPhase_CSP_PREPARE.String(), + }, + }, + Reports: []*control.SystemCheckReport{ + { + CheckReport: chkpb.CheckReport{ + Seq: 1, + Class: chkpb.CheckInconsistClass_CIC_POOL_BAD_SVCL, + Action: chkpb.CheckInconsistAction_CIA_IGNORE, + Msg: "message 1", + PoolUuid: "pool-1", + }, + }, + { + CheckReport: chkpb.CheckReport{ + Seq: 3, + Class: chkpb.CheckInconsistClass_CIC_POOL_LESS_SVC_WITHOUT_QUORUM, + Action: chkpb.CheckInconsistAction_CIA_TRUST_PS, + Msg: "message 3", + PoolUuid: "pool-3", + }, + }, + { + CheckReport: chkpb.CheckReport{ + Seq: 4, + Result: int32(chkpb.CheckResult_DRY_RUN), + Class: chkpb.CheckInconsistClass_CIC_POOL_NONEXIST_ON_ENGINE, + Action: chkpb.CheckInconsistAction_CIA_DISCARD, + Msg: "message 4", + PoolUuid: "pool-4", + }, + }, + }, + }, + expOut: ` +DAOS System Checker Info + Current status: COMPLETED + Current phase: DONE (Check completed) + Checked 3 pools + +Unchecked Pools: + Pool pool-2: 0 ranks, status: CPS_UNCHECKED, phase: CSP_DONE + Pool pool-5: 0 ranks, status: CPS_UNCHECKED, phase: CSP_PREPARE + +Inconsistency Reports: +- Resolved: +ID Class Pool Resolution +-- ----- ---- ---------- +0x1 POOL_BAD_SVCL pool-1 IGNORE +0x3 POOL_LESS_SVC_WITHOUT_QUORUM pool-3 TRUST_PS +0x4 POOL_NONEXIST_ON_ENGINE pool-4 DISCARD (dry run) + `, }, "non-verbose with container": { From b51a31cca2c4e29e0400a2a9503de53c69a25b21 Mon Sep 17 00:00:00 2001 From: Cedric Koch-Hofer <94527853+knard38@users.noreply.github.com> Date: Tue, 18 Nov 2025 00:35:54 +0100 Subject: [PATCH 021/253] DAOS-14750 control: fix cont get-prop values (#17040) Fix the output values of daos get-prop command of the properties rd_fac, rd_lvl and layout_type Signed-off-by: Cedric Koch-Hofer --- src/control/lib/daos/container_property.go | 37 +++++--- .../lib/daos/container_property_test.go | 88 ++++++++++++++----- 2 files changed, 90 insertions(+), 35 deletions(-) diff --git a/src/control/lib/daos/container_property.go b/src/control/lib/daos/container_property.go index b123b151efd..057dc262f34 100644 --- a/src/control/lib/daos/container_property.go +++ b/src/control/lib/daos/container_property.go @@ -1,5 +1,6 @@ // // (C) Copyright 2021-2023 Intel Corporation. +// (C) Copyright 2025 Hewlett Packard Enterprise Development LP // (C) Copyright 2025 Google LLC // // SPDX-License-Identifier: BSD-2-Clause-Patent @@ -368,7 +369,7 @@ var propHdlrs = propHdlrMap{ }, C.DAOS_PROP_ENTRY_REDUN_FAC: { C.DAOS_PROP_CO_REDUN_FAC, - "Redundancy Factor", + "Redundancy Factor (0-4)", nil, valHdlrMap{ "0": genSetValHdlr(C.DAOS_PROP_CO_REDUN_RF0), @@ -381,15 +382,15 @@ var propHdlrs = propHdlrMap{ func(p *ContainerProperty) string { switch p.GetValue() { case C.DAOS_PROP_CO_REDUN_RF0: - return "rd_fac0" + return "0" case C.DAOS_PROP_CO_REDUN_RF1: - return "rd_fac1" + return "1" case C.DAOS_PROP_CO_REDUN_RF2: - return "rd_fac2" + return "2" case C.DAOS_PROP_CO_REDUN_RF3: - return "rd_fac3" + return "3" case C.DAOS_PROP_CO_REDUN_RF4: - return "rd_fac4" + return "4" default: return propInvalidValue(p) } @@ -500,7 +501,7 @@ var propHdlrs = propHdlrMap{ }, C.DAOS_PROP_ENTRY_REDUN_LVL: { C.DAOS_PROP_CO_REDUN_LVL, - "Redundancy Level", + "Redundancy Level (rank=1, node=2)", nil, valHdlrMap{ "1": genSetValHdlr(C.DAOS_PROP_CO_REDUN_RANK), @@ -510,14 +511,13 @@ var propHdlrs = propHdlrMap{ }, []string{"rf_lvl"}, func(p *ContainerProperty) string { - lvl := p.GetValue() - switch lvl { + switch p.GetValue() { case C.DAOS_PROP_CO_REDUN_RANK: - return fmt.Sprintf("rank (%d)", lvl) + return "rank" case C.DAOS_PROP_CO_REDUN_NODE: - return fmt.Sprintf("node (%d)", lvl) + return "node" default: - return fmt.Sprintf("(%d)", lvl) + return propInvalidValue(p) } }, false, @@ -549,7 +549,16 @@ var propHdlrs = propHdlrMap{ // ---------------------------------------- C.DAOS_PROP_ENTRY_LAYOUT_TYPE: { C.DAOS_PROP_CO_LAYOUT_TYPE, - "Layout Type", + func() string { + acc := []string{} + for i := 0; i < C.DAOS_PROP_CO_LAYOUT_MAX; i++ { + var loStr [10]C.char + + C.daos_unparse_ctype(C.ushort(i), &loStr[0]) + acc = append(acc, C.GoString(&loStr[0])) + } + return "Layout Type (" + strings.Join(acc, ", ") + ")" + }(), nil, nil, nil, @@ -558,7 +567,7 @@ var propHdlrs = propHdlrMap{ loInt := C.ushort(p.GetValue()) C.daos_unparse_ctype(loInt, &loStr[0]) - return fmt.Sprintf("%s (%d)", C.GoString(&loStr[0]), loInt) + return fmt.Sprintf("%s", C.GoString(&loStr[0])) }, true, }, diff --git a/src/control/lib/daos/container_property_test.go b/src/control/lib/daos/container_property_test.go index abf98ead5ef..9f0d6fc164c 100644 --- a/src/control/lib/daos/container_property_test.go +++ b/src/control/lib/daos/container_property_test.go @@ -1,5 +1,6 @@ // // (C) Copyright 2021-2022 Intel Corporation. +// (C) Copyright 2025 Hewlett Packard Enterprise Development LP // (C) Copyright 2025 Google LLC // // SPDX-License-Identifier: BSD-2-Clause-Patent @@ -201,13 +202,13 @@ func TestDaos_ContainerProperty_RedunLevel(t *testing.T) { var expStr string switch inputKey { case "1": - expStr = "rank (1)" + expStr = "rank" case "2": - expStr = "node (2)" + expStr = "node" case "rank": - expStr = "rank (1)" + expStr = "rank" case "node": - expStr = "node (2)" + expStr = "node" default: t.Fatalf("untested key %q", inputKey) } @@ -218,7 +219,7 @@ func TestDaos_ContainerProperty_RedunLevel(t *testing.T) { t.Run("unexpected level", func(t *testing.T) { testProp := newTestContainerProperty(ContainerPropRedunLevel) testProp.SetValue(42) - test.AssertEqual(t, "(42)", testProp.StringValue(), "unexpected string value") + test.AssertEqual(t, fmt.Sprintf("property %q: invalid value 0x2a", testProp.Name), testProp.StringValue(), "unexpected string value") }) } @@ -233,15 +234,15 @@ func TestDaos_ContainerProperty_RedunFactor(t *testing.T) { var expStr string switch inputKey { case "0": - expStr = "rd_fac0" + expStr = "0" case "1": - expStr = "rd_fac1" + expStr = "1" case "2": - expStr = "rd_fac2" + expStr = "2" case "3": - expStr = "rd_fac3" + expStr = "3" case "4": - expStr = "rd_fac4" + expStr = "4" default: t.Fatalf("untested key %q", inputKey) } @@ -323,19 +324,64 @@ func testReadOnlyContainerProperty(t *testing.T, propType ContainerPropType) { test.CmpErr(t, errors.Errorf("property %q is read-only", testProp.Name), testProp.Set("whoops")) } -func TestDaos_ContainerProperty_Layout(t *testing.T) { +func TestDaos_ContainerProperty_LayoutValues(t *testing.T) { testReadOnlyContainerProperty(t, ContainerPropLayoutType) - t.Run("valid layout", func(t *testing.T) { - testProp := newTestContainerProperty(ContainerPropLayoutType) - testProp.SetValue(uint64(ContainerLayoutPOSIX)) - test.AssertEqual(t, testProp.StringValue(), fmt.Sprintf("%s (%d)", ContainerLayoutPOSIX, ContainerLayoutPOSIX), "unexpected string value") - }) - t.Run("unknown layout", func(t *testing.T) { - testProp := newTestContainerProperty(ContainerPropLayoutType) - testProp.SetValue(uint64(ContainerLayoutUnknown)) - test.AssertEqual(t, testProp.StringValue(), "unknown (0)", "unexpected string value") - }) + for name, tc := range map[string]struct { + propVal uint64 + expStr string + }{ + "Valid unknown layout": { + propVal: uint64(ContainerLayoutUnknown), + expStr: "unknown", + }, + "Valid POSIX layout": { + propVal: uint64(ContainerLayoutPOSIX), + expStr: "POSIX", + }, + "Valid HDF5 layout": { + propVal: uint64(ContainerLayoutHDF5), + expStr: "HDF5", + }, + "Valid PYTHON layout": { + propVal: uint64(ContainerLayoutPython), + expStr: "PYTHON", + }, + "Valid SPARK layout": { + propVal: uint64(ContainerLayoutSpark), + expStr: "SPARK", + }, + "Valid DATABASE layout": { + propVal: uint64(ContainerLayoutDatabase), + expStr: "DATABASE", + }, + "Valid ROOT layout": { + propVal: uint64(ContainerLayoutRoot), + expStr: "ROOT", + }, + "Valid SEISMIC layout": { + propVal: uint64(ContainerLayoutSeismic), + expStr: "SEISMIC", + }, + "Valid METEO layout": { + propVal: uint64(ContainerLayoutMeteo), + expStr: "METEO", + }, + } { + t.Run(name, func(t *testing.T) { + testProp := newTestContainerProperty(ContainerPropLayoutType) + testProp.SetValue(tc.propVal) + + test.AssertEqual(t, tc.expStr, testProp.StringValue(), "unexpected string value") + }) + } +} + +func TestDaos_ContainerProperty_LayoutDescription(t *testing.T) { + testReadOnlyContainerProperty(t, ContainerPropLayoutType) + + testProp := newTestContainerProperty(ContainerPropLayoutType) + test.AssertEqual(t, testProp.Description, "Layout Type (unknown, POSIX, HDF5, PYTHON, SPARK, DATABASE, ROOT, SEISMIC, METEO)", "unexpected description") } func TestDaos_ContainerProperty_ACL(t *testing.T) { From 19c927ea390c7682e6e7c66f6565fc08afc65242 Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Tue, 18 Nov 2025 09:37:38 -0500 Subject: [PATCH 022/253] DAOS-18220 build: Create 2.8 TB1 (#17137) Test build 1 for DAOS 2.8 Signed-off-by: Phil Henderson --- TAG | 2 +- VERSION | 2 +- utils/rpms/daos.changelog | 5 ++++- utils/rpms/daos.spec | 4 ++-- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/TAG b/TAG index 9071fdd003b..3dac9223c86 100644 --- a/TAG +++ b/TAG @@ -1 +1 @@ -2.7.101-tb +2.7.102-tb diff --git a/VERSION b/VERSION index 9ab9777fabf..aab94590421 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.7.101 +2.7.102 diff --git a/utils/rpms/daos.changelog b/utils/rpms/daos.changelog index ce183a90690..59172d6c460 100644 --- a/utils/rpms/daos.changelog +++ b/utils/rpms/daos.changelog @@ -1,5 +1,8 @@ %changelog -* Wed Nov 05 2025 Tomasz Gromadzki - 2.7-101-17 +* Fri Nov 17 2025 Phillip Henderson 2.7.102-1 +- Bump version to 2.7.102 + +* Wed Nov 05 2025 Tomasz Gromadzki - 2.7.101-17 - Update PMDK to release 2.1.2 * Thu Oct 16 2025 Jeff Olivier 2.7.101-16 diff --git a/utils/rpms/daos.spec b/utils/rpms/daos.spec index 3e095b37b1e..ec834343eda 100644 --- a/utils/rpms/daos.spec +++ b/utils/rpms/daos.spec @@ -24,8 +24,8 @@ %endif Name: daos -Version: 2.7.101 -Release: 17%{?relval}%{?dist} +Version: 2.7.102 +Release: 1%{?relval}%{?dist} Summary: DAOS Storage Engine License: BSD-2-Clause-Patent From 1a1e344e0b0e3b9295fab4726b6487b88bff6fb0 Mon Sep 17 00:00:00 2001 From: Tomasz Gromadzki Date: Wed, 19 Nov 2025 18:19:50 +0100 Subject: [PATCH 023/253] SRE-2772 ci: remove all references to hpdd.intel.com (#16874) Also enable Jenkins githook as Jenkins is back operational. Signed-off-by: Tomasz Gromadzki --- .github/workflows/bullseye-coverage.yml | 4 ++-- .github/workflows/rpm-build-and-test.yml | 4 ++-- ci/functional/launchable_analysis | 2 +- utils/githooks/pre-commit.d/30-Jenkinsfile.sh | 1 - 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/.github/workflows/bullseye-coverage.yml b/.github/workflows/bullseye-coverage.yml index 3233338aa89..92826fb5b56 100644 --- a/.github/workflows/bullseye-coverage.yml +++ b/.github/workflows/bullseye-coverage.yml @@ -201,7 +201,7 @@ jobs: env: CONFIG_POWER_ONLY: false PRAGMA_SUFFIX: -vm - OPERATIONS_EMAIL: john.malmberg@hpe.com + OPERATIONS_EMAIL: core-daos-devops@groups.int.hpe.com TEST_RPMS: true COMMIT_MESSAGE: ${{ needs.Call-RPM-Build.outputs.commit-message }} JENKINS_URL: https://jenkins-3.daos.hpc.amslabs.hpecorp.net/ @@ -491,7 +491,7 @@ jobs: env: CONFIG_POWER_ONLY: false PRAGMA_SUFFIX: -vm - OPERATIONS_EMAIL: john.malmberg@hpe.com + OPERATIONS_EMAIL: core-daos-devops@groups.int.hpe.com TEST_RPMS: true COMMIT_MESSAGE: ${{ needs.Call-RPM-Build.outputs.commit-message }} JENKINS_URL: https://jenkins-3.daos.hpc.amslabs.hpecorp.net/ diff --git a/.github/workflows/rpm-build-and-test.yml b/.github/workflows/rpm-build-and-test.yml index 38ece1cce75..3132c34043d 100644 --- a/.github/workflows/rpm-build-and-test.yml +++ b/.github/workflows/rpm-build-and-test.yml @@ -210,7 +210,7 @@ jobs: env: CONFIG_POWER_ONLY: false PRAGMA_SUFFIX: -vm - OPERATIONS_EMAIL: john.malmberg@hpe.com + OPERATIONS_EMAIL: core-daos-devops@groups.int.hpe.com TEST_RPMS: true COMMIT_MESSAGE: ${{ needs.Call-RPM-Build.outputs.commit-message }} JENKINS_URL: https://jenkins-3.daos.hpc.amslabs.hpecorp.net/ @@ -500,7 +500,7 @@ jobs: env: CONFIG_POWER_ONLY: false PRAGMA_SUFFIX: -vm - OPERATIONS_EMAIL: john.malmberg@hpe.com + OPERATIONS_EMAIL: core-daos-devops@groups.int.hpe.com TEST_RPMS: true COMMIT_MESSAGE: ${{ needs.Call-RPM-Build.outputs.commit-message }} JENKINS_URL: https://jenkins-3.daos.hpc.amslabs.hpecorp.net/ diff --git a/ci/functional/launchable_analysis b/ci/functional/launchable_analysis index faf2ed43c43..9b40db9c396 100755 --- a/ci/functional/launchable_analysis +++ b/ci/functional/launchable_analysis @@ -57,5 +57,5 @@ if $notify; then env | sort | grep ^CHANGE_ echo echo "See details in $dir." - } | mail -r "$HOSTNAME"@hpe.com -s "Launchable prediction failure: ${not_predicted_percent}%" john.malmberg@hpe.com + } | mail -r "$HOSTNAME"@hpe.com -s "Launchable prediction failure: ${not_predicted_percent}%" core-daos-devops@groups.int.hpe.com fi diff --git a/utils/githooks/pre-commit.d/30-Jenkinsfile.sh b/utils/githooks/pre-commit.d/30-Jenkinsfile.sh index 84385123c9e..098a7790332 100755 --- a/utils/githooks/pre-commit.d/30-Jenkinsfile.sh +++ b/utils/githooks/pre-commit.d/30-Jenkinsfile.sh @@ -21,7 +21,6 @@ if [ -z "$(_git_diff_cached_files "Jenkinsfile")" ] ; then exit 0 fi - echo "Checking syntax" : "${JENKINS_HOST:=jenkins.daos.hpc.amslabs.hpecorp.net}" From ec672e94eb727b7b5bd1efdec3cd44606ac1712d Mon Sep 17 00:00:00 2001 From: Tomasz Gromadzki Date: Wed, 19 Nov 2025 18:22:11 +0100 Subject: [PATCH 024/253] DAOS-17642 tools: use the ISO 8601 date format: YYYY-MM-DD (#17128) From now on, all dates provided for log operations must be in the ISO 8601 date format: YYYY-MM-DD. Zeros should be added to the beginning of one-digit months and days. Harmonize log operations module with the new logs' date/time stamp format introduced by the https://github.com/daos-stack/daos/pull/16772 PR: YYYY-MM-DD HH:MM:SS.mmmmmm Signed-off-by: Tomasz Gromadzki --- src/control/lib/support/log.go | 47 +++++----- src/control/lib/support/log_test.go | 127 ++++++++++++++-------------- 2 files changed, 87 insertions(+), 87 deletions(-) diff --git a/src/control/lib/support/log.go b/src/control/lib/support/log.go index 721fc18fb50..dc22e8d915f 100644 --- a/src/control/lib/support/log.go +++ b/src/control/lib/support/log.go @@ -1,5 +1,6 @@ // // (C) Copyright 2022-2024 Intel Corporation. +// (C) Copyright 2025 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -50,8 +51,8 @@ type CollectLogSubCmd struct { TargetFolder string `short:"t" long:"target-folder" description:"Target Folder location where log will be copied"` Archive bool `short:"z" long:"archive" description:"Archive the log/config files"` ExtraLogsDir string `short:"c" long:"extra-logs-dir" description:"Collect the Logs from given directory"` - LogStartDate string `short:"D" long:"start-date" description:"Specify the start date, the day from log will be collected, Format: MM-DD"` - LogEndDate string `short:"F" long:"end-date" description:"Specify the end date, the day till the log will be collected, Format: MM-DD"` + LogStartDate string `short:"D" long:"start-date" description:"Specify the start date, the day from log will be collected, Format: YYYY-MM-DD"` + LogEndDate string `short:"F" long:"end-date" description:"Specify the end date, the day till the log will be collected, Format: YYYY-MM-DD"` LogStartTime string `short:"S" long:"log-start-time" description:"Specify the log collection start time, Format: HH:MM:SS"` LogEndTime string `short:"E" long:"log-end-time" description:"Specify the log collection end time, Format: HH:MM:SS"` FileTransferExecArgs string `short:"T" long:"transfer-args" description:"Extra arguments for alternate file transfer tool"` @@ -62,11 +63,10 @@ type LogTypeSubCmd struct { } const ( - MMDDYYYY = "1-2-2006" - HHMMSS = "15:4:5" - MMDDHHMMSS = "1/2-15:4:5" - MMDDYYYY_HHMMSS = "1-2-2006 15:4:5" - YYYYMMDD_HHMMSS = "2006/1/2 15:4:5" + YYYYMMDD = "2006-01-02" // Date format as it is expected from commandline argument + HHMMSS = "15:04:05" // Time format as it is expected from commandline argument + YYYYMMDD_HHMMSS = "2006-01-02 15:04:05" // Date/Time format as it is defined by ISO 8601 + YYYYMMDD_HHMMSS_LOG = "2006/01/02 15:04:05" // Date/Time format as it is used in DAOS logs ) // Folder names to copy logs and configs @@ -164,14 +164,14 @@ type logCopy struct { // Verify if the date and time argument is valid and return error if it's invalid func (cmd *CollectLogSubCmd) DateTimeValidate() error { if cmd.LogStartDate != "" || cmd.LogEndDate != "" { - startDate, err := time.Parse(MMDDYYYY, cmd.LogStartDate) + startDate, err := time.Parse(YYYYMMDD, cmd.LogStartDate) if err != nil { - return errors.New("Invalid date, please provide the startDate in MM-DD-YYYY format") + return errors.New("Invalid date, please provide the startDate in YYYY-MM-DD format") } - endDate, err := time.Parse(MMDDYYYY, cmd.LogEndDate) + endDate, err := time.Parse(YYYYMMDD, cmd.LogEndDate) if err != nil { - return errors.New("Invalid date, please provide the endDate in MM-DD-YYYY format") + return errors.New("Invalid date, please provide the endDate in YYYY-MM-DD format") } if startDate.After(endDate) { @@ -719,12 +719,12 @@ func getDateTime(log logging.Logger, opts ...CollectLogsParams) (time.Time, time startTimeStr := fmt.Sprintf("%s %s", opts[0].LogStartDate, opts[0].LogStartTime) endTimeStr := fmt.Sprintf("%s %s", opts[0].LogEndDate, opts[0].LogEndTime) - actStartTime, err := time.Parse(MMDDYYYY_HHMMSS, startTimeStr) + actStartTime, err := time.Parse(YYYYMMDD_HHMMSS, startTimeStr) if err != nil { return time.Time{}, time.Time{}, err } - actEndTime, err := time.Parse(MMDDYYYY_HHMMSS, endTimeStr) + actEndTime, err := time.Parse(YYYYMMDD_HHMMSS, endTimeStr) if err != nil { return time.Time{}, time.Time{}, err } @@ -766,17 +766,16 @@ func cpLinesFromLog(log logging.Logger, srcFile string, destFile string, opts .. scanner := bufio.NewScanner(readFile) var cpLogLine bool if opts[0].LogCmd == "EngineLog" { - // Remove year as engine log does not store the year information. - actStartTime, _ = time.Parse(MMDDHHMMSS, actStartTime.Format(MMDDHHMMSS)) - actEndTime, _ = time.Parse(MMDDHHMMSS, actEndTime.Format(MMDDHHMMSS)) + actStartTime, _ = time.Parse(YYYYMMDD_HHMMSS_LOG, actStartTime.Format(YYYYMMDD_HHMMSS_LOG)) + actEndTime, _ = time.Parse(YYYYMMDD_HHMMSS_LOG, actEndTime.Format(YYYYMMDD_HHMMSS_LOG)) - var validDateTime = regexp.MustCompile(`^\d\d\/\d\d-\d\d:\d\d:\d\d.\d\d`) + var validDateTime = regexp.MustCompile(`^\d\d\d\d\/\d\d\/\d\d \d\d:\d\d:\d\d.\d\d\d\d\d\d`) for scanner.Scan() { lineData := scanner.Text() lineDataSlice := strings.Split(lineData, " ") // Verify if log line has date/time stamp and copy line if it's in range. - if validDateTime.MatchString(lineData) == false { + if !validDateTime.MatchString(lineData) { if cpLogLine { _, err = writeFile.WriteString(lineData + "\n") if err != nil { @@ -786,10 +785,10 @@ func cpLinesFromLog(log logging.Logger, srcFile string, destFile string, opts .. continue } - dateTime := strings.Split(lineDataSlice[0], "-") - timeOnly := strings.Split(dateTime[1], ".") - expDateTime := fmt.Sprintf("%s-%s", dateTime[0], timeOnly[0]) - expLogTime, _ := time.Parse(MMDDHHMMSS, expDateTime) + dateTime := lineDataSlice[0] + timeOnly := lineDataSlice[1] + expDateTime := fmt.Sprintf("%s %s", dateTime, timeOnly) + expLogTime, _ := time.Parse(YYYYMMDD_HHMMSS_LOG, expDateTime) // Copy line, if the log line has time stamp between the given range of start/end date and time. if expLogTime.After(actStartTime) && expLogTime.Before(actEndTime) { @@ -818,7 +817,7 @@ func cpLinesFromLog(log logging.Logger, srcFile string, destFile string, opts .. lineData := scanner.Text() // Verify if log line has date/time stamp and copy line if it's in range. - if validDateTime.MatchString(lineData) == false { + if !validDateTime.MatchString(lineData) { if cpLogLine { _, err = writeFile.WriteString(lineData + "\n") if err != nil { @@ -829,7 +828,7 @@ func cpLinesFromLog(log logging.Logger, srcFile string, destFile string, opts .. } data := validDateTime.FindAllString(lineData, -1) - expLogTime, _ := time.Parse(YYYYMMDD_HHMMSS, data[0]) + expLogTime, _ := time.Parse(YYYYMMDD_HHMMSS_LOG, data[0]) // Copy line, if the log line has time stamp between the given range of start/end date and time. if expLogTime.After(actStartTime) && expLogTime.Before(actEndTime) { cpLogLine = true diff --git a/src/control/lib/support/log_test.go b/src/control/lib/support/log_test.go index db7a5c048eb..1b3ebf8d32f 100644 --- a/src/control/lib/support/log_test.go +++ b/src/control/lib/support/log_test.go @@ -1,5 +1,6 @@ // // (C) Copyright 2022-2024 Intel Corporation. +// (C) Copyright 2025 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -893,30 +894,30 @@ func TestSupport_DateTimeValidate(t *testing.T) { expErr: nil, }, "Valid StartDate No EndDate": { - logStartDate: "12-01-2024", - expErr: errors.New("Invalid date, please provide the endDate in MM-DD-YYYY format"), + logStartDate: "2024-12-01", + expErr: errors.New("Invalid date, please provide the endDate in YYYY-MM-DD format"), }, "No StartDate Valid EndDate": { - logEndDate: "12-31-2024", - expErr: errors.New("Invalid date, please provide the startDate in MM-DD-YYYY format"), + logEndDate: "2024-12-31", + expErr: errors.New("Invalid date, please provide the startDate in YYYY-MM-DD format"), }, "Invalid StartDate No EndDate": { - logStartDate: "44-22-2024", - expErr: errors.New("Invalid date, please provide the startDate in MM-DD-YYYY format"), + logStartDate: "2024-44-22", + expErr: errors.New("Invalid date, please provide the startDate in YYYY-MM-DD format"), }, "Invalid EndDate": { - logStartDate: "12-01-2024", - logEndDate: "44-22-2024", - expErr: errors.New("Invalid date, please provide the endDate in MM-DD-YYYY format"), + logStartDate: "2024-12-01", + logEndDate: "2024-44-22", + expErr: errors.New("Invalid date, please provide the endDate in YYYY-MM-DD format"), }, "StartDate after EndDate": { - logStartDate: "10-01-2024", - logEndDate: "05-06-2024", + logStartDate: "2024-10-01", + logEndDate: "2024-05-06", expErr: errors.New("start-date can not be after end-date"), }, "Valid StartDate and EndDate": { - logStartDate: "12-01-2024", - logEndDate: "12-31-2024", + logStartDate: "2024-12-01", + logEndDate: "2024-12-31", expErr: nil, }, "Valid StartTime No EndTime": { @@ -942,8 +943,8 @@ func TestSupport_DateTimeValidate(t *testing.T) { expErr: nil, }, "Valid Date Time": { - logStartDate: "12-01-2024", - logEndDate: "12-31-2024", + logStartDate: "2024-12-01", + logEndDate: "2024-12-31", logStartTime: "13:15:59", logEndTime: "20:30:50", expErr: nil, @@ -1034,19 +1035,19 @@ func TestSupport_cpLinesFromLog(t *testing.T) { collLogParams := CollectLogsParams{} - DummyEngineLog := `01/01-01:01:01.90 system-01 LOG LINE 1 -02/02-04:04:04.90 system-02 LOG LINE 2 -03/03-06:06:06.90 system-02 LOG LINE 3 -04/04-08:08:08.90 system-02 LOG LINE 4 -05/05-10:10:10.90 system-02 LOG LINE 5 -06/06-12:12:12.90 system-02 LOG LINE 6 -07/07-14:14:14.90 system-02 LOG LINE 7 + DummyEngineLog := `2023/01/01-01:01:01.90 system-01 LOG LINE 1 +2023/02/02 04:04:04.908070 system-02 LOG LINE 2 +2023/03/03 06:06:06.907060 system-02 LOG LINE 3 +2023/04/04 08:08:08.905040 system-02 LOG LINE 4 +2023/05/05 10:10:10.904030 system-02 LOG LINE 5 +2023/06/06 12:12:12.903020 system-02 LOG LINE 6 +2023/07/07 14:14:14.902010 system-02 LOG LINE 7 LINE WITHOUT DATE AND TIME -08/08-16:16:16.90 system-02 LOG LINE 8 -09/09-18:18:18.90 system-02 LOG LINE 9 -10/10-20:20:20.90 system-02 LOG LINE 10 -11/11-22:22:22.90 system-02 LOG LINE 11 -12/12-23:59:59.90 system-02 LOG LINE 12 +2023/08/08 16:16:16.901090 system-02 LOG LINE 8 +2023/09/09 18:18:18.909080 system-02 LOG LINE 9 +2023/10/10 20:20:20.908070 system-02 LOG LINE 10 +2023/11/11 22:22:22.907060 system-02 LOG LINE 11 +2023/12/12 23:59:59.906050 system-02 LOG LINE 12 ` MockEngineLogFile := test.CreateTestFile(t, targetTestDir, DummyEngineLog) @@ -1108,42 +1109,42 @@ INFO 2023/12/12 23:59:59.441241 LOG LINE 12 expErr: errors.New("unable to Copy File"), }, "Invalid Source File": { - logStartDate: "01-01-2023", - logEndDate: "12-31-2023", + logStartDate: "2023-01-01", + logEndDate: "2023-12-31", srcFile: srcPath + "unknownFile", destFile: dstTestDir, expErr: errors.New("no such file or directory"), }, "Valid date without any time": { - logStartDate: "01-01-2023", - logEndDate: "12-31-2023", + logStartDate: "2023-01-01", + logEndDate: "2023-12-31", srcFile: srcPath, destFile: dstTestDir, expErr: nil, }, "Verify the content of Engine log line based on date": { - logStartDate: "04-01-2023", - logEndDate: "08-08-2023", + logStartDate: "2023-04-01", + logEndDate: "2023-08-08", srcFile: MockEngineLogFile, destFile: dstTestDir, logCmd: "EngineLog", expErr: nil, - verifyLog: "08/08-16:16:16.90 system-02 LOG LINE 8", + verifyLog: "2023/08/08 16:16:16.901090 system-02 LOG LINE 8", }, "Verify the content of Engine log line based on date and time": { - logStartDate: "09-09-2023", - logEndDate: "11-11-2023", + logStartDate: "2023-09-09", + logEndDate: "2023-11-11", logStartTime: "12:00:00", logEndTime: "23:23:23", srcFile: MockEngineLogFile, destFile: dstTestDir, logCmd: "EngineLog", expErr: nil, - verifyLog: "11/11-22:22:22.90 system-02 LOG LINE 11", + verifyLog: "2023/11/11 22:22:22.907060 system-02 LOG LINE 11", }, "Verify the content of Control log line based on date": { - logStartDate: "04-01-2023", - logEndDate: "08-08-2023", + logStartDate: "2023-04-01", + logEndDate: "2023-08-08", srcFile: MockControlLogFile, destFile: dstTestDir, logCmd: "ControlLog", @@ -1151,8 +1152,8 @@ INFO 2023/12/12 23:59:59.441241 LOG LINE 12 verifyLog: "hostname INFO 2023/08/08 16:16:16 LOG LINE 8", }, "Verify the content of Control log line based on date and time": { - logStartDate: "09-09-2023", - logEndDate: "11-11-2023", + logStartDate: "2023-09-09", + logEndDate: "2023-11-11", logStartTime: "12:00:00", logEndTime: "23:23:23", srcFile: MockControlLogFile, @@ -1162,8 +1163,8 @@ INFO 2023/12/12 23:59:59.441241 LOG LINE 12 verifyLog: "hostname INFO 2023/11/11 22:22:22 LOG LINE 11", }, "Verify the content of Admin log line based on date": { - logStartDate: "04-01-2023", - logEndDate: "08-08-2023", + logStartDate: "2023-04-01", + logEndDate: "2023-08-08", srcFile: MockAdminLogFile, destFile: dstTestDir, logCmd: "HelperLog", @@ -1171,8 +1172,8 @@ INFO 2023/12/12 23:59:59.441241 LOG LINE 12 verifyLog: "INFO 2023/08/08 16:16:16.441237 LOG LINE 8", }, "Verify the content of Admin log line based on date and time": { - logStartDate: "09-09-2023", - logEndDate: "11-11-2023", + logStartDate: "2023-09-09", + logEndDate: "2023-11-11", logStartTime: "12:00:00", logEndTime: "23:23:23", srcFile: MockAdminLogFile, @@ -1223,39 +1224,39 @@ func TestSupport_getDateTime(t *testing.T) { expErr error }{ "No StartTime": { - logStartDate: "1-2-2023", - logEndDate: "1-3-2023", + logStartDate: "2023-01-02", + logEndDate: "2023-01-03", expErr: nil, }, "No EndTime": { - logStartDate: "1-2-2023", - logEndDate: "1-3-2023", + logStartDate: "2023-01-02", + logEndDate: "2023-01-03", logStartTime: "10:10:10", - expStartTime: "01-02-2023 10:10:10", - expEndTime: "01-03-2023 23:59:59", + expStartTime: "2023-01-02 10:10:10", + expEndTime: "2023-01-03 23:59:59", expErr: nil, }, "Valid Date and Invalid Start Time": { - logStartDate: "1-2-2023", - logEndDate: "1-3-2023", + logStartDate: "2023-01-02", + logEndDate: "2023-01-03", logStartTime: "99:99:99", logEndTime: "12:12:12", - expErr: errors.New("parsing time \"1-2-2023 99:99:99\": hour out of range"), + expErr: errors.New("parsing time \"2023-01-02 99:99:99\": hour out of range"), }, "Valid Date and Invalid End Time": { - logStartDate: "1-2-2023", - logEndDate: "1-3-2023", + logStartDate: "2023-01-02", + logEndDate: "2023-01-03", logStartTime: "10:10:10", logEndTime: "99:99:99", - expErr: errors.New("parsing time \"1-3-2023 99:99:99\": hour out of range"), + expErr: errors.New("parsing time \"2023-01-03 99:99:99\": hour out of range"), }, "Valid Date and Time": { - logStartDate: "1-2-2023", - logEndDate: "1-3-2023", + logStartDate: "2023-01-02", + logEndDate: "2023-01-03", logStartTime: "10:10:10", logEndTime: "12:12:12", - expStartTime: "01-02-2023 10:10:10", - expEndTime: "01-03-2023 12:12:12", + expStartTime: "2023-01-02 10:10:10", + expEndTime: "2023-01-03 12:12:12", expErr: nil, }, } { @@ -1267,13 +1268,13 @@ func TestSupport_getDateTime(t *testing.T) { startTime, endTime, gotErr := getDateTime(log, collLogParams) test.CmpErr(t, tc.expErr, gotErr) if tc.expStartTime != "" { - tmpStartTime, _ := time.Parse(MMDDYYYY_HHMMSS, tc.expStartTime) + tmpStartTime, _ := time.Parse(YYYYMMDD_HHMMSS, tc.expStartTime) if tmpStartTime.Equal(startTime) == false { t.Fatalf("Expected StartTime:=%s But Got :=%s", tmpStartTime, startTime) } } if tc.expEndTime != "" { - tmpEndTime, _ := time.Parse(MMDDYYYY_HHMMSS, tc.expEndTime) + tmpEndTime, _ := time.Parse(YYYYMMDD_HHMMSS, tc.expEndTime) if tmpEndTime.Equal(endTime) == false { t.Fatalf("Expected EndTime:=%s But Got :=%s", tmpEndTime, endTime) } From 9a4fb1b1d28704357d1e19a5b094960386d0c0ce Mon Sep 17 00:00:00 2001 From: Nasf-Fan Date: Thu, 20 Nov 2025 01:23:59 +0800 Subject: [PATCH 025/253] DAOS-18170 object: more delay before retry obj rpc (#17130) If an update RPC is timeout, it is unnecessary to retry the update RPC immediately. Because the original one maybe blocked on some of targets under some heave load cases. Under such case, the retried RPC will get -DER_INPROGRESS and cause RPC retry again and again. That will further increase server load and make the situation to be worse. This patch introduces more delay before retrying RPC under such cases. It also add more delay for collective object RPC. Signed-off-by: Fan Yong --- src/object/cli_obj.c | 75 +++++++++++++++++++++++---------------- src/object/obj_internal.h | 47 +++++++----------------- src/object/obj_tx.c | 11 +++--- src/object/srv_obj.c | 15 ++++---- 4 files changed, 68 insertions(+), 80 deletions(-) diff --git a/src/object/cli_obj.c b/src/object/cli_obj.c index 7ba74731f62..d22cf109e41 100644 --- a/src/object/cli_obj.c +++ b/src/object/cli_obj.c @@ -1717,35 +1717,43 @@ dc_obj_layout_refresh(daos_handle_t oh) } uint32_t -dc_obj_retry_delay(tse_task_t *task, uint32_t opc, int err, uint16_t *retry_cnt, - uint16_t *inprogress_cnt, uint32_t timeout_sec) +dc_obj_retry_delay(tse_task_t *task, uint32_t opc, int err, uint32_t *retry_cnt, + uint32_t timeout_sec, bool long_delay) { uint32_t delay = 0; - if (err == -DER_INPROGRESS || err == -DER_UPDATE_AGAIN) - ++(*inprogress_cnt); - - if (++(*retry_cnt) > 1) { + /* Randomly delay [1, max_delay - 5] for DER_OVERLOAD_RETRY case. */ + if (err == -DER_OVERLOAD_RETRY) { + delay = daos_rpc_rand_delay(timeout_sec) << 20; + } else if (++(*retry_cnt) > 1) { /* Randomly delay [31 ~ 1023] us if it is not the first retried object RPC. */ delay = (d_rand() | ((1 << 5) - 1)) & ((1 << 10) - 1); /* Rebuild is being established on the server side, wait a bit longer */ - if (err == -DER_UPDATE_AGAIN) + if (err == -DER_UPDATE_AGAIN || long_delay) { delay <<= 10; - else if (opc == DAOS_OBJ_RPC_COLL_PUNCH) - /* 128 times of the delay for collective object RPC. */ - delay <<= 7; - else if (opc == DAOS_OBJ_RPC_CPD) - /* 8 times of the delay for compounded RPC. */ - delay <<= 3; - D_DEBUG(DB_IO, "Try to re-sched task %p (%u) for %u/%u times with %u us delay\n", - task, opc, *inprogress_cnt, *retry_cnt, delay); + } else { + switch (opc) { + case DAOS_OBJ_RPC_COLL_PUNCH: + case DAOS_OBJ_RPC_COLL_QUERY: + /* 256 times of the delay for collective object RPC. */ + delay <<= 8; + break; + case DAOS_OBJ_RPC_CPD: + /* 8 times of the delay for compounded RPC. */ + delay <<= 3; + break; + default: + break; + } + + /* Increase delay after multiple times retry. */ + if (*retry_cnt >= 5) + delay <<= 1; + } } - /* - * Randomly delay [1, max_delay - 5] for DER_OVERLOAD_RETRY case. - */ - if (err == -DER_OVERLOAD_RETRY) - delay = daos_rpc_rand_delay(timeout_sec) << 20; + D_DEBUG(DB_IO, "Try to re-sched task %p (%u) for %u times with %u us delay\n", task, opc, + *retry_cnt, delay); return delay; } @@ -1755,10 +1763,12 @@ obj_retry_cb(tse_task_t *task, struct dc_object *obj, struct obj_auxi_args *obj_auxi, bool pmap_stale, bool *io_task_reinited) { - tse_sched_t *sched = tse_task2sched(task); - tse_task_t *pool_task = NULL; - int result = task->dt_result; - int rc; + tse_sched_t *sched = tse_task2sched(task); + tse_task_t *pool_task = NULL; + uint32_t delay = 0; + uint32_t opc = obj_auxi->opc; + int result = task->dt_result; + int rc; if (pmap_stale) { rc = obj_pool_query_task(sched, obj, 0, &pool_task); @@ -1767,8 +1777,6 @@ obj_retry_cb(tse_task_t *task, struct dc_object *obj, } if (obj_auxi->io_retry) { - uint32_t delay = 0; - if (pool_task != NULL) { rc = dc_task_depend(task, 1, &pool_task); if (rc != 0) { @@ -1778,19 +1786,24 @@ obj_retry_cb(tse_task_t *task, struct dc_object *obj, } } + if (obj_is_modification_opc(opc) && result == -DER_TIMEDOUT) + obj_auxi->long_retry_delay = 1; + else if (result != -DER_INPROGRESS) + obj_auxi->long_retry_delay = 0; + if (!pmap_stale) { uint32_t now = daos_gettime_coarse(); - delay = - dc_obj_retry_delay(task, obj_auxi->opc, result, &obj_auxi->retry_cnt, - &obj_auxi->inprogress_cnt, obj_auxi->max_delay); + delay = dc_obj_retry_delay(task, opc, result, &obj_auxi->retry_cnt, + obj_auxi->max_delay, + obj_auxi->long_retry_delay == 1 ? true : false); if (result == -DER_INPROGRESS && - ((obj_auxi->retry_warn_ts == 0 && obj_auxi->inprogress_cnt >= 10) || + ((obj_auxi->retry_warn_ts == 0 && obj_auxi->retry_cnt >= 10) || (obj_auxi->retry_warn_ts > 0 && obj_auxi->retry_warn_ts + 10 < now))) { obj_auxi->retry_warn_ts = now; obj_auxi->flags |= ORF_MAYBE_STARVE; D_WARN("The task %p has been retried for %u times, maybe starve\n", - task, obj_auxi->inprogress_cnt); + task, obj_auxi->retry_cnt); } } diff --git a/src/object/obj_internal.h b/src/object/obj_internal.h index 0710c3ad59d..598c37644ee 100644 --- a/src/object/obj_internal.h +++ b/src/object/obj_internal.h @@ -464,40 +464,17 @@ struct obj_auxi_args { * ec_wait_recov -- obj fetch wait another EC recovery task, * ec_in_recov -- a EC recovery task */ - uint32_t io_retry:1, - args_initialized:1, - to_leader:1, - spec_shard:1, - spec_group:1, - req_reasbed:1, - is_ec_obj:1, - csum_retry:1, - csum_report:1, - tx_uncertain:1, - nvme_io_err:1, - no_retry:1, - ec_wait_recov:1, - ec_in_recov:1, - new_shard_tasks:1, - reset_param:1, - force_degraded:1, - shards_scheded:1, - sub_anchors:1, - ec_degrade_fetch:1, - tx_convert:1, - cond_modify:1, - /* conf_fetch split to multiple sub-tasks */ - cond_fetch_split:1, - reintegrating:1, - tx_renew:1, - rebuilding:1, - for_migrate:1, - req_dup_sgl:1; + uint32_t new_shard_tasks : 1, reset_param : 1, force_degraded : 1, shards_scheded : 1, + io_retry : 1, args_initialized : 1, to_leader : 1, spec_shard : 1, spec_group : 1, + req_reasbed : 1, is_ec_obj : 1, csum_retry : 1, csum_report : 1, tx_uncertain : 1, + nvme_io_err : 1, no_retry : 1, ec_wait_recov : 1, ec_in_recov : 1, rebuilding : 1, + sub_anchors : 1, ec_degrade_fetch : 1, long_retry_delay : 1, cond_fetch_split : 1, + cond_modify : 1, reintegrating : 1, tx_renew : 1, tx_convert : 1, req_dup_sgl : 1, + for_migrate : 1; /* request flags. currently only: ORF_RESEND */ - uint32_t specified_shard; - uint32_t flags; - uint16_t retry_cnt; - uint16_t inprogress_cnt; + uint32_t specified_shard; + uint32_t flags; + uint32_t retry_cnt; /* Last timestamp (in second) when report retry warning message. */ uint32_t retry_warn_ts; struct obj_req_tgts req_tgts; @@ -925,8 +902,8 @@ void obj_decref(struct dc_object *obj); int obj_get_grp_size(struct dc_object *obj); struct dc_object *obj_hdl2ptr(daos_handle_t oh); uint32_t -dc_obj_retry_delay(tse_task_t *task, uint32_t opc, int err, uint16_t *retry_cnt, - uint16_t *inprogress_cnt, uint32_t timeout_secs); +dc_obj_retry_delay(tse_task_t *task, uint32_t opc, int err, uint32_t *retry_cnt, + uint32_t timeout_secs, bool long_delay); /* handles, pointers for handling I/O */ struct obj_io_context { diff --git a/src/object/obj_tx.c b/src/object/obj_tx.c index 7d37a091f4d..dfe3461eff0 100644 --- a/src/object/obj_tx.c +++ b/src/object/obj_tx.c @@ -101,8 +101,7 @@ struct dc_tx { /** The read requests count */ uint32_t tx_read_cnt; - uint16_t tx_retry_cnt; - uint16_t tx_inprogress_cnt; + uint32_t tx_retry_cnt; /* Last timestamp (in second) when report retry warning message. */ uint32_t tx_retry_warn_ts; /** Pool map version when trigger first IO. */ @@ -1083,15 +1082,14 @@ dc_tx_commit_cb(tse_task_t *task, void *data) if (rc != -DER_TX_RESTART) { uint32_t now = daos_gettime_coarse(); - delay = dc_obj_retry_delay(task, DAOS_OBJ_RPC_CPD, rc, &tx->tx_retry_cnt, - &tx->tx_inprogress_cnt, 0); + delay = dc_obj_retry_delay(task, DAOS_OBJ_RPC_CPD, rc, &tx->tx_retry_cnt, 0, false); if (rc == -DER_INPROGRESS && - ((tx->tx_retry_warn_ts == 0 && tx->tx_inprogress_cnt >= 10) || + ((tx->tx_retry_warn_ts == 0 && tx->tx_retry_cnt >= 10) || (tx->tx_retry_warn_ts > 0 && tx->tx_retry_warn_ts + 10 < now))) { tx->tx_retry_warn_ts = now; tx->tx_maybe_starve = 1; D_WARN("The dist TX task %p has been retried for %u times, maybe starve\n", - task, tx->tx_inprogress_cnt); + task, tx->tx_retry_cnt); } rc1 = tse_task_reinit_with_delay(task, delay); @@ -2588,7 +2586,6 @@ dc_tx_restart_begin(struct dc_tx *tx, uint32_t *backoff) */ tx->tx_status = TX_RESTARTING; tx->tx_retry_cnt = 0; - tx->tx_inprogress_cnt = 0; *backoff = d_backoff_seq_next(&tx->tx_backoff_seq); } diff --git a/src/object/srv_obj.c b/src/object/srv_obj.c index f94a0f9db21..258cc1f3fe8 100644 --- a/src/object/srv_obj.c +++ b/src/object/srv_obj.c @@ -5983,13 +5983,14 @@ ds_obj_coll_query_handler(crt_rpc_t *rpc) rc = dtx_leader_end(dlh, ioc.ioc_coc, rc); out: - D_DEBUG(DB_IO, "Handled collective query RPC %p %s forwarding for obj "DF_UOID - " on rank %u XS %u/%u epc "DF_X64" pmv %u, with dti "DF_DTI", dct_nr %u, " - "forward width %u, forward depth %u\n: "DF_RC"\n", rpc, - ocqi->ocqi_tgts.ca_count <= 1 ? "without" : "with", DP_UOID(ocqi->ocqi_oid), - myrank, dmi->dmi_xs_id, tgt_id, ocqi->ocqi_epoch, ocqi->ocqi_map_ver, - DP_DTI(&ocqi->ocqi_xid), (unsigned int)ocqi->ocqi_tgts.ca_count, - ocqi->ocqi_disp_width, ocqi->ocqi_disp_depth, DP_RC(rc)); + DL_CDEBUG(rc != 0 && rc != -DER_INPROGRESS, DLOG_ERR, DB_IO, rc, + "Handled collective query RPC %p %s forwarding for obj " DF_UOID " on rank %u XS " + "%u/%u epc " DF_X64 " pmv %u, with dti " DF_DTI ", dct_nr %u, forward width %u, " + "forward depth %u", + rpc, ocqi->ocqi_tgts.ca_count <= 1 ? "without" : "with", DP_UOID(ocqi->ocqi_oid), + myrank, dmi->dmi_xs_id, tgt_id, ocqi->ocqi_epoch, ocqi->ocqi_map_ver, + DP_DTI(&ocqi->ocqi_xid), (unsigned int)ocqi->ocqi_tgts.ca_count, + ocqi->ocqi_disp_width, ocqi->ocqi_disp_depth); obj_reply_set_status(rpc, rc); obj_reply_map_version_set(rpc, version); From 7b7ed996c5cbb3daec58413584f24ff1c3649365 Mon Sep 17 00:00:00 2001 From: Liu Xuezhao Date: Thu, 20 Nov 2025 23:19:21 +0800 Subject: [PATCH 026/253] DAOS-18161 object: refine EC aggregation processing (#17112) 1. After restart take the sc_ec_agg_eph_boundary as ec aggregation's min epoch to avoid scan from epoch 0. 2. Consume more credits for layout calculate in EC agg. 3. Don't bump sc_ec_agg_eph after it reset during EC agg, to avoid data corruption. Signed-off-by: Xuezhao Liu --- src/container/srv_container.c | 3 +- src/container/srv_target.c | 15 +++++++-- src/object/srv_ec_aggregate.c | 59 ++++++++++++++++++++++++++--------- 3 files changed, 59 insertions(+), 18 deletions(-) diff --git a/src/container/srv_container.c b/src/container/srv_container.c index e9bbda10d74..e671162db5f 100644 --- a/src/container/srv_container.c +++ b/src/container/srv_container.c @@ -2110,7 +2110,8 @@ cont_agg_eph_sync(struct ds_pool *pool, struct cont_svc *svc) min_ec_agg_eph = eph_ldr->cte_rdb_ec_agg_eph; if (min_ec_agg_eph == eph_ldr->cte_current_ec_agg_eph && - min_stable_eph == eph_ldr->cte_current_stable_eph) + min_stable_eph == eph_ldr->cte_current_stable_eph && + eph_ldr->cte_current_ec_agg_eph != 0) continue; /** diff --git a/src/container/srv_target.c b/src/container/srv_target.c index b7dd3ce9b4f..fdf3a915f04 100644 --- a/src/container/srv_target.c +++ b/src/container/srv_target.c @@ -260,6 +260,7 @@ get_hae(struct ds_cont_child *cont, bool vos_agg) /* EC aggregation */ if (!vos_agg) return cont->sc_ec_agg_eph; + /* * Query the 'Highest Aggregated Epoch', the HAE will be bumped * in vos_aggregate() @@ -2795,9 +2796,17 @@ ds_cont_eph_report(struct ds_pool *pool) } } - if (min_ec_agg_eph == 0 || min_ec_agg_eph == DAOS_EPOCH_MAX || - min_stable_eph == 0 || min_stable_eph == DAOS_EPOCH_MAX || - (min_ec_agg_eph <= ec_eph->cte_last_ec_agg_epoch && + if (min_ec_agg_eph <= ec_eph->cte_last_ec_agg_epoch && + min_stable_eph <= ec_eph->cte_last_stable_epoch && + pool->sp_reclaim == DAOS_RECLAIM_DISABLED) + continue; + + /* if aggregation enabled, make sure to report ec_agg_eph at the start phase + * when min_ec_agg_eph and cte_last_ec_agg_epoch are both zero. + */ + if (min_ec_agg_eph == DAOS_EPOCH_MAX || min_stable_eph == DAOS_EPOCH_MAX || + (ec_eph->cte_last_ec_agg_epoch != 0 && + min_ec_agg_eph <= ec_eph->cte_last_ec_agg_epoch && min_stable_eph <= ec_eph->cte_last_stable_epoch)) { if (min_ec_agg_eph > 0 && min_stable_eph > 0 && (min_ec_agg_eph < ec_eph->cte_last_ec_agg_epoch || diff --git a/src/object/srv_ec_aggregate.c b/src/object/srv_ec_aggregate.c index 1390eb656b4..982969e17c3 100644 --- a/src/object/srv_ec_aggregate.c +++ b/src/object/srv_ec_aggregate.c @@ -115,7 +115,7 @@ struct ec_agg_entry { struct ec_agg_par_extent ae_par_extent; /* Parity extent */ daos_handle_t ae_obj_hdl; /* Object handle for cur obj */ struct pl_obj_layout *ae_obj_layout; - struct daos_shard_loc ae_peer_pshards[OBJ_EC_MAX_P]; + struct daos_shard_loc ae_peer_pshards[OBJ_EC_MAX_P]; uint32_t ae_grp_idx; uint32_t ae_is_leader:1, ae_process_partial:1; @@ -2219,7 +2219,7 @@ agg_shard_is_parity(struct ds_pool *pool, struct ec_agg_entry *agg_entry) /* Initializes the struct holding the iteration state (ec_agg_entry). */ static void -agg_reset_dkey_entry(struct ec_agg_entry *agg_entry, vos_iter_entry_t *entry) +agg_reset_dkey_entry(struct ec_agg_entry *agg_entry) { agg_clear_extents(agg_entry); agg_reset_pos(VOS_ITER_AKEY, agg_entry); @@ -2257,7 +2257,7 @@ agg_dkey(daos_handle_t ih, vos_iter_entry_t *entry, D_DEBUG(DB_EPC, "oid:"DF_UOID":"DF_KEY" ec agg starting leader %s\n", DP_UOID(agg_entry->ae_oid), DP_KEY(&agg_entry->ae_dkey), agg_entry->ae_is_leader ? "yes" : "no"); - agg_reset_dkey_entry(&agg_param->ap_agg_entry, entry); + agg_reset_dkey_entry(&agg_param->ap_agg_entry); rc = agg_get_obj_handle(agg_entry, true); } else { *acts |= VOS_ITER_CB_SKIP; @@ -2346,8 +2346,8 @@ agg_reset_entry(struct ec_agg_entry *agg_entry, vos_iter_entry_t *entry, agg_entry->ae_rsize = 0UL; if (entry) { - agg_entry->ae_oid = entry->ie_oid; - agg_entry->ae_codec = obj_id2ec_codec(entry->ie_oid.id_pub); + agg_entry->ae_oid = entry->ie_oid; + agg_entry->ae_codec = obj_id2ec_codec(entry->ie_oid.id_pub); D_ASSERT(agg_entry->ae_codec); } else { agg_entry->ae_codec = NULL; @@ -2366,12 +2366,12 @@ agg_reset_entry(struct ec_agg_entry *agg_entry, vos_iter_entry_t *entry, } for (i = 0; i < OBJ_EC_MAX_P; i++) { - agg_entry->ae_peer_pshards[i].sd_rank = DAOS_TGT_IGNORE; + agg_entry->ae_peer_pshards[i].sd_rank = DAOS_TGT_IGNORE; agg_entry->ae_peer_pshards[i].sd_tgt_idx = DAOS_TGT_IGNORE; } agg_reset_pos(VOS_ITER_DKEY, agg_entry); - agg_reset_dkey_entry(agg_entry, entry); + agg_reset_dkey_entry(agg_entry); } static int @@ -2473,7 +2473,7 @@ ec_agg_object(daos_handle_t ih, vos_iter_entry_t *entry, struct ec_agg_param *ag md.omd_pdom_lvl = props.dcp_perf_domain; md.omd_pda = props.dcp_ec_pda; shard_nr = daos_oclass_grp_size(&oca) * daos_obj_id2grp_nr(md.omd_id); - agg_param->ap_credits += roundup(shard_nr, 128) / 128; + agg_param->ap_credits += min(512, roundup(shard_nr, 32) / 32); rc = pl_obj_place(map, agg_entry->ae_oid.id_layout_ver, &md, DAOS_OO_RO, NULL, &agg_entry->ae_obj_layout); @@ -2683,6 +2683,7 @@ cont_ec_aggregate_cb(struct ds_cont_child *cont, daos_epoch_range_t *epr, struct dtx_id dti = { 0 }; struct dtx_epoch epoch = { 0 }; daos_unit_oid_t oid = { 0 }; + uint64_t ec_agg_eph; int blocks = 0; int rc = 0; @@ -2700,6 +2701,20 @@ cont_ec_aggregate_cb(struct ds_cont_child *cont, daos_epoch_range_t *epr, return rc; } + if (likely(cont->sc_ec_agg_eph_valid)) { + if (cont->sc_ec_agg_eph == 0) { + D_INFO(DF_CONT ": update cont->sc_ec_agg_eph to " DF_X64, + DP_CONT(cont->sc_pool->spc_uuid, cont->sc_uuid), + cont->sc_ec_agg_eph_boundary); + cont->sc_ec_agg_eph = cont->sc_ec_agg_eph_boundary; + } + } else { + D_DEBUG(DB_EPC, DF_CONT ": pause EC aggregation for sc_ec_agg_eph_boundary.\n", + DP_CONT(cont->sc_pool->spc_uuid, cont->sc_uuid)); + return 0; + } + + ec_agg_eph = cont->sc_ec_agg_eph; ec_agg_param->ap_min_unagg_eph = DAOS_EPOCH_MAX; if (flags & VOS_AGG_FL_FORCE_SCAN) { /** We don't want to use the latest container aggregation epoch for the filter @@ -2710,9 +2725,12 @@ cont_ec_aggregate_cb(struct ds_cont_child *cont, daos_epoch_range_t *epr, ec_agg_param->ap_filter_eph = MAX(epr->epr_lo, cont->sc_ec_agg_eph); } - if (ec_agg_param->ap_filter_eph != 0 && + /* Currently cont->sc_ec_update_timestamp is in memory so this optimization won't be helpful + * when there is no container update since restart. + */ + if (ec_agg_param->ap_filter_eph != 0 && cont->sc_ec_update_timestamp != 0 && ec_agg_param->ap_filter_eph >= cont->sc_ec_update_timestamp) { - D_DEBUG(DB_EPC, DF_CONT" skip EC agg "DF_U64">= "DF_U64"\n", + D_DEBUG(DB_EPC, DF_CONT " skip EC agg " DF_U64 ">= " DF_U64 "\n", DP_CONT(cont->sc_pool_uuid, cont->sc_uuid), ec_agg_param->ap_filter_eph, cont->sc_ec_update_timestamp); goto update_hae; @@ -2785,20 +2803,33 @@ cont_ec_aggregate_cb(struct ds_cont_child *cont, daos_epoch_range_t *epr, cont->sc_ec_agg_active = 0; if (rc == 0) { + /* If pool map updated during this round of aggregation, the sc_ec_agg_eph + * possibly be reset by ds_cont_child_reset_ec_agg_eph_all(). + * For that case should not bump local sc_ec_agg_eph and rescan from the reset + * value (sc_ec_agg_eph_boundary). + */ + if (cont->sc_ec_agg_eph != ec_agg_eph) { + D_INFO(DF_CONT " sc_ec_agg_eph changed from " DF_X64 " to " DF_X64 + " don't bump EC aggregation epoch", + DP_CONT(cont->sc_pool_uuid, cont->sc_uuid), ec_agg_eph, + cont->sc_ec_agg_eph); + return rc; + } + cont->sc_ec_agg_eph = max(cont->sc_ec_agg_eph, epr->epr_hi); if (!cont->sc_stopping && cont->sc_query_ec_agg_eph) { - uint64_t orig, cur; + uint64_t orig, cur, cur_eph; + cur_eph = min(ec_agg_param->ap_min_unagg_eph, cont->sc_ec_agg_eph); orig = d_hlc2sec(*cont->sc_query_ec_agg_eph); - cur = d_hlc2sec(cont->sc_ec_agg_eph); + cur = d_hlc2sec(cur_eph); if (orig && cur > orig && (cur - orig) >= 600) D_WARN(DF_CONT" Sluggish EC boundary bumping: " ""DF_U64" -> "DF_U64", gap:"DF_U64"\n", DP_CONT(cont->sc_pool_uuid, cont->sc_uuid), orig, cur, cur - orig); - *cont->sc_query_ec_agg_eph = min(ec_agg_param->ap_min_unagg_eph, - cont->sc_ec_agg_eph); + *cont->sc_query_ec_agg_eph = cur_eph; } } From 7f6343cb2f4af04ef2b935e1e6bf750d840259bd Mon Sep 17 00:00:00 2001 From: Alexander Oganezov Date: Thu, 20 Nov 2025 10:48:50 -0800 Subject: [PATCH 027/253] DAOS-18167 client: Fail proto query after all engines tried (#17049) - Fail protocol query after all the engines have been tried. This avoids infinite flood of errors if app is started while engines are offline or just starting up. Signed-off-by: Alexander A Oganezov --- src/client/api/rpc.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/client/api/rpc.c b/src/client/api/rpc.c index 189d34197a9..7e75daec705 100644 --- a/src/client/api/rpc.c +++ b/src/client/api/rpc.c @@ -98,6 +98,7 @@ daos_rpc_send_wait(crt_rpc_t *rpc) struct rpc_proto { int rank_idx; + int num_retries_left; crt_endpoint_t ep; int version; int rc; @@ -122,6 +123,17 @@ query_cb(struct crt_proto_query_cb_info *cb_info) nr_ranks = dc_mgmt_net_get_num_srv_ranks(); D_ASSERT(nr_ranks > 0); rproto->rank_idx = (rproto->rank_idx + 1) % nr_ranks; + rproto->num_retries_left--; + + /** We tried all engines and found none alive */ + if (rproto->num_retries_left <= 0) { + D_ERROR("crt_proto_query_with_ctx() failed -- All %d targets tried\n", + nr_ranks); + rproto->rc = cb_info->pq_rc; + rproto->completed = true; + return; + } + rank = dc_mgmt_net_get_srv_rank(rproto->rank_idx); D_ASSERT(rank != CRT_NO_RANK); rproto->ep.ep_rank = rank; @@ -170,6 +182,7 @@ daos_rpc_proto_query(crt_opcode_t base_opc, uint32_t *ver_array, int count, int D_GOTO(out_free, -DER_NONEXIST); } rproto->rank_idx = d_rand() % nr_ranks; + rproto->num_retries_left = nr_ranks; rank = dc_mgmt_net_get_srv_rank(rproto->rank_idx); D_ASSERT(rank != CRT_NO_RANK); rproto->ep.ep_rank = rank; From 137f84a0712bf13f590db152327843a2bb383583 Mon Sep 17 00:00:00 2001 From: Nasf-Fan Date: Fri, 21 Nov 2025 16:29:38 +0800 Subject: [PATCH 028/253] DAOS-18221 vos: handle race between DTX aggregation and DTX reindex (#17142) DTX aggregation maybe scheduled before committed DTX table has been reindexed. Then vos_container::vc_dtx_committed_count maybe smaller than the count of removed DTX entries. Need to filter out those DTX entries that have not been handled by reindex before re-calculating vc_dtx_committed_count to avoid negative overflow. Signed-off-by: Fan Yong --- src/vos/vos_dtx.c | 74 +++++++++++++++++++----------------------- src/vos/vos_internal.h | 7 ++-- 2 files changed, 35 insertions(+), 46 deletions(-) diff --git a/src/vos/vos_dtx.c b/src/vos/vos_dtx.c index f3cd477239b..86005bca3bc 100644 --- a/src/vos/vos_dtx.c +++ b/src/vos/vos_dtx.c @@ -391,8 +391,9 @@ static int dtx_cmt_ent_update(struct btr_instance *tins, struct btr_record *rec, d_iov_t *key, d_iov_t *val, d_iov_t *val_out) { - struct vos_dtx_cmt_ent *dce_new = val->iov_buf; - struct vos_dtx_cmt_ent *dce_old; + struct vos_dtx_cmt_ent *dce_new = val->iov_buf; + struct vos_dtx_cmt_ent *dce_old; + int rc = 0; dce_old = umem_off2ptr(&tins->ti_umm, rec->rec_off); @@ -418,20 +419,11 @@ dtx_cmt_ent_update(struct btr_instance *tins, struct btr_record *rec, if (dce_old->dce_invalid) { rec->rec_off = umem_ptr2off(&tins->ti_umm, dce_new); D_FREE(dce_old); - } else if (!dce_old->dce_reindex) { - /* If two client threads (such as non-initialized context after fork) use the same - * DTX ID (by chance), then it is possible to arrive here. But once comes here, we - * have no chance to require related client/application to restart the transaction - * since related RPC may has already completed. - * */ - if (unlikely(dce_new->dce_reindex == 0)) - D_WARN("Commit DTX " DF_DTI " for more than once, maybe reused\n", - DP_DTI(&DCE_XID(dce_new))); - else - dce_new->dce_exist = 1; + } else { + rc = -DER_EXIST; } - return 0; + return rc; } static btr_ops_t dtx_committed_btr_ops = { @@ -944,12 +936,15 @@ vos_dtx_commit_one(struct vos_container *cont, struct dtx_id *dti, daos_epoch_t *dae_p = dae; out: - if (rc != -DER_ALREADY && rc != -DER_NONEXIST) - DL_CDEBUG(rc != 0, DLOG_ERR, DB_IO, rc, "Commit the DTX " DF_DTI, DP_DTI(dti)); - if (rc != 0) D_FREE(dce); + if (rc == -DER_EXIST) + rc = 0; + + if (rc != -DER_ALREADY && rc != -DER_NONEXIST) + DL_CDEBUG(rc != 0, DLOG_ERR, DB_IO, rc, "Commit the DTX " DF_DTI, DP_DTI(dti)); + if (rm_cos != NULL && ((rc == 0 && !keep_act) || rc == -DER_NONEXIST || (rc == -DER_ALREADY && dae == NULL))) *rm_cos = true; @@ -3058,10 +3053,11 @@ dtx_blob_aggregate(struct umem_instance *umm, struct vos_tls *tls, struct vos_co struct vos_cont_df *cont_df, umem_off_t dbd_off, const uint64_t *cmt_time) { struct vos_dtx_blob_df *dbd; - umem_off_t dbd_next_off = UMOFF_NULL; - uint64_t epoch; - int dtx_aggr_count; - bool is_dbd_freed = false; + umem_off_t dbd_next_off = UMOFF_NULL; + uint64_t epoch = cont_df->cd_newest_aggregated; + int dtx_aggr_count = 0; + int cached_count = 0; + bool is_dbd_freed = false; int i; int rc; @@ -3078,8 +3074,6 @@ dtx_blob_aggregate(struct umem_instance *umm, struct vos_tls *tls, struct vos_co goto out; } - dtx_aggr_count = 0; - epoch = cont_df->cd_newest_aggregated; for (i = 0; i < dbd->dbd_count; i++) { struct vos_dtx_cmt_ent_df *dce_df; d_iov_t kiov; @@ -3091,6 +3085,8 @@ dtx_blob_aggregate(struct umem_instance *umm, struct vos_tls *tls, struct vos_co d_iov_set(&kiov, &dce_df->dce_xid, sizeof(dce_df->dce_xid)); rc = dbtree_delete(cont->vc_dtx_committed_hdl, BTR_PROBE_EQ, &kiov, NULL); + if (rc == 0) + cached_count++; if (rc == -DER_NONEXIST) rc = 0; if (unlikely(rc != 0)) { @@ -3199,20 +3195,23 @@ dtx_blob_aggregate(struct umem_instance *umm, struct vos_tls *tls, struct vos_co goto out; } - if (dtx_aggr_count > 0) { - cont->vc_dtx_committed_count -= dtx_aggr_count; - cont->vc_pool->vp_dtx_committed_count -= dtx_aggr_count; - d_tm_dec_gauge(tls->vtl_committed, dtx_aggr_count); + if (cached_count > 0) { + D_ASSERTF(cont->vc_dtx_committed_count >= cached_count, + "Unexpected committed DTX entries count during aggregation: %u vs %u\n", + cont->vc_dtx_committed_count, cached_count); - D_DEBUG(DB_IO, - "Release %i DTX committed entries of blob %p (" UMOFF_PF - ") of cont " DF_UUID, - dtx_aggr_count, dbd, UMOFF_P(dbd_off), DP_UUID(cont->vc_id)); + cont->vc_dtx_committed_count -= cached_count; + cont->vc_pool->vp_dtx_committed_count -= cached_count; + d_tm_dec_gauge(tls->vtl_committed, cached_count); } + D_DEBUG(DB_TRACE, + "Release %d/%d DTX committed entries of blob %p (" UMOFF_PF ") of cont " DF_UUID, + cached_count, dtx_aggr_count, dbd, UMOFF_P(dbd_off), DP_UUID(cont->vc_id)); + if (is_dbd_freed) { cont->vc_cmt_dtx_reindex_pos = dbd_next_off; - D_DEBUG(DB_IO, + D_DEBUG(DB_TRACE, "Removed blob of DTX committed entries %p (" UMOFF_PF ") of cont " DF_UUID, dbd, UMOFF_P(dbd_off), DP_UUID(cont->vc_id)); rc = 1; @@ -3593,25 +3592,18 @@ vos_dtx_cmt_reindex(daos_handle_t coh) D_GOTO(out, rc = -DER_NOMEM); memcpy(&dce->dce_base, dce_df, sizeof(dce->dce_base)); - dce->dce_reindex = 1; d_iov_set(&kiov, &DCE_XID(dce), sizeof(DCE_XID(dce))); d_iov_set(&riov, dce, sizeof(*dce)); rc = dbtree_upsert(cont->vc_dtx_committed_hdl, BTR_PROBE_EQ, DAOS_INTENT_UPDATE, &kiov, &riov, NULL); if (rc != 0) { + if (rc == -DER_EXIST) + rc = 1; D_FREE(dce); goto out; } - /* The committed DTX entry is already in the index. - * Related re-index logic can stop. - */ - if (dce->dce_exist) { - D_FREE(dce); - D_GOTO(out, rc = 1); - } - cnt++; } diff --git a/src/vos/vos_internal.h b/src/vos/vos_internal.h index 832624eba6a..18e6438ce6e 100644 --- a/src/vos/vos_internal.h +++ b/src/vos/vos_internal.h @@ -517,11 +517,8 @@ struct vos_dtx_act_ent { #define DAE_MBS_OFF(dae) ((dae)->dae_base.dae_mbs_off) struct vos_dtx_cmt_ent { - struct vos_dtx_cmt_ent_df dce_base; - - uint32_t dce_reindex:1, - dce_exist:1, - dce_invalid:1; + struct vos_dtx_cmt_ent_df dce_base; + uint32_t dce_invalid : 1; }; #define DCE_XID(dce) ((dce)->dce_base.dce_xid) From eaed9023ddb5a9a18135f3e188f6d02881f841fe Mon Sep 17 00:00:00 2001 From: wiliamhuang Date: Fri, 21 Nov 2025 09:18:46 -0600 Subject: [PATCH 029/253] DAOS-17495 client: intercept getcwd with trampoline (#16398) realpath() calls getcwd() in libc. Intercepting getcwd() with trampoline can work around getcwd() issue in dfuse due to evicting dentry cache. Also get_current_dir_name() does not resolve symbolic link. This is fixed by calling libc getcwd instead. Signed-off-by: Lei Huang --- src/client/dfuse/inval.c | 19 ++++++-- src/client/dfuse/pil4dfs/int_dfs.c | 76 ++++++++++++++++-------------- src/tests/ftest/daos_test/dfuse.py | 2 + src/tests/suite/dfuse_test.c | 73 +++++++++++++++++++++++++++- 4 files changed, 129 insertions(+), 41 deletions(-) diff --git a/src/client/dfuse/inval.c b/src/client/dfuse/inval.c index 3ddcc052d86..b668928e5c7 100644 --- a/src/client/dfuse/inval.c +++ b/src/client/dfuse/inval.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2016-2024 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * (C) Copyright 2025 Google LLC * * SPDX-License-Identifier: BSD-2-Clause-Patent @@ -78,6 +79,8 @@ #define INVAL_DIRECTORY_GRACE (60 * 60 * 24 * 365 * 20) /* 20 years to avoid getcwd failures */ #define INVAL_FILE_GRACE 2 +static double expiration_time_dir = INVAL_DIRECTORY_GRACE; + /* Represents one timeout value (time). Maintains a ordered list of dentries that are using * this timeout. */ @@ -262,7 +265,13 @@ ival_bucket_add(d_list_t *list, double timeout) int ival_init(struct dfuse_info *dfuse_info) { - int rc; + int rc; + uint64_t expiration_time_dir_env; + + /* this env is only used for testing */ + rc = d_getenv_uint64_t("D_EXPIRATION_TIME_DIR", &expiration_time_dir_env); + if (rc != -DER_NONEXIST) + expiration_time_dir = 1.0 * expiration_time_dir_env; DFUSE_TRA_UP(&ival_data, dfuse_info, "invalidator"); @@ -343,7 +352,7 @@ ival_update_inode(struct dfuse_inode_entry *inode, double timeout) bool wake = false; if (S_ISDIR(inode->ie_stat.st_mode)) - timeout += INVAL_DIRECTORY_GRACE; + timeout += expiration_time_dir; else timeout += INVAL_FILE_GRACE; @@ -454,13 +463,13 @@ ival_add_cont_buckets(struct dfuse_cont *dfc) D_MUTEX_LOCK(&ival_lock); - rc = ival_bucket_add_value(dfc->dfc_dentry_dir_timeout + INVAL_DIRECTORY_GRACE); + rc = ival_bucket_add_value(dfc->dfc_dentry_dir_timeout + expiration_time_dir); if (rc != 0) goto out; if (dfc->dfc_dentry_timeout != 0) { rc = ival_bucket_add_value(dfc->dfc_dentry_timeout + INVAL_FILE_GRACE); if (rc != 0) - ival_bucket_dec_value(dfc->dfc_dentry_dir_timeout + INVAL_DIRECTORY_GRACE); + ival_bucket_dec_value(dfc->dfc_dentry_dir_timeout + expiration_time_dir); } out: @@ -475,7 +484,7 @@ ival_dec_cont_buckets(struct dfuse_cont *dfc) D_MUTEX_LOCK(&ival_lock); if (dfc->dfc_dentry_timeout != 0) ival_bucket_dec_value(dfc->dfc_dentry_timeout + INVAL_FILE_GRACE); - ival_bucket_dec_value(dfc->dfc_dentry_dir_timeout + INVAL_DIRECTORY_GRACE); + ival_bucket_dec_value(dfc->dfc_dentry_dir_timeout + expiration_time_dir); D_MUTEX_UNLOCK(&ival_lock); } diff --git a/src/client/dfuse/pil4dfs/int_dfs.c b/src/client/dfuse/pil4dfs/int_dfs.c index d55d88128a3..3eee719c4ae 100644 --- a/src/client/dfuse/pil4dfs/int_dfs.c +++ b/src/client/dfuse/pil4dfs/int_dfs.c @@ -414,7 +414,7 @@ static int (*next_rename)(const char *old_name, const char *new_name); static int (*next_renameat)(int olddirfd, const char *oldpath, int newdirfd, const char *newpath); -static char *(*next_getcwd)(char *buf, size_t size); +static char *(*libc_getcwd)(char *buf, size_t size); static int (*libc_unlink)(const char *path); @@ -495,12 +495,6 @@ static int (*next_mpi_init)(int *argc, char ***argv); static int (*next_pmpi_init)(int *argc, char ***argv); static void *(*next_dlopen)(const char *filename, int flags); -/* to do!! */ -/** - * static char * (*org_realpath)(const char *pathname, char *resolved_path); - * org_realpath real_realpath=NULL; - */ - static int remove_dot_dot(char path[], int *len); static int @@ -5094,36 +5088,57 @@ renameat(int olddirfd, const char *oldpath, int newdirfd, const char *newpath) } char * -getcwd(char *buf, size_t size) +new_getcwd(char *buf, size_t size) { - if (next_getcwd == NULL) { - next_getcwd = dlsym(RTLD_NEXT, "getcwd"); - D_ASSERT(next_getcwd != NULL); - } + char *cwd; + size_t len; + int rc; + int idx; + struct duns_attr_t attr = {0}; if (!d_hook_enabled) - return next_getcwd(buf, size); + return libc_getcwd(buf, size); - if (cur_dir[0] != '/') - update_cwd(); + if (cur_dir[0] != '/') { + /* cur_dir is not initialized yet */ + cwd = libc_getcwd(cur_dir, DFS_MAX_PATH); + if (cwd == NULL) + return NULL; + } - if (query_dfs_mount(cur_dir) < 0) - return next_getcwd(buf, size); + idx = query_dfs_mount(cur_dir); + if (idx < 0) + return libc_getcwd(buf, size); if (buf == NULL) { - size_t len; - if (size == 0) size = PATH_MAX; len = strnlen(cur_dir, size); if (len >= size) { - errno = ERANGE; + errno = ENAMETOOLONG; return NULL; } return strdup(cur_dir); } - strncpy(buf, cur_dir, size); + rc = duns_resolve_path(cur_dir, &attr); + if (rc) { + errno = rc; + return NULL; + } + + rc = snprintf(buf, size, "%s%s", dfs_list[idx].fs_root, attr.da_rel_path); + if (rc == size) { + /* buffer size is not large enough */ + errno = ENAMETOOLONG; + D_FREE(attr.da_rel_path); + return NULL; + } else if (rc < 0) { + D_FREE(attr.da_rel_path); + return NULL; + } + + D_FREE(attr.da_rel_path); return buf; } @@ -6813,23 +6828,12 @@ static void update_cwd(void) { char *cwd = NULL; - char *pt_end = NULL; /* daos_init() may be not called yet. */ - cwd = get_current_dir_name(); - + cwd = libc_getcwd(cur_dir, DFS_MAX_PATH); if (cwd == NULL) { - D_FATAL("fatal error to get CWD with get_current_dir_name(): %d (%s)\n", errno, - strerror(errno)); + D_FATAL("fatal error to get CWD with getcwd(): %d (%s)\n", errno, strerror(errno)); abort(); - } else { - pt_end = stpncpy(cur_dir, cwd, DFS_MAX_PATH - 1); - if ((long int)(pt_end - cur_dir) >= DFS_MAX_PATH - 1) { - D_FATAL("fatal error, cwd path is too long: %d (%s)\n", ENAMETOOLONG, - strerror(ENAMETOOLONG)); - abort(); - } - free(cwd); } } @@ -7240,7 +7244,6 @@ init_myhook(void) return; } - update_cwd(); rc = D_MUTEX_INIT(&lock_reserve_fd, NULL); if (rc) return; @@ -7331,6 +7334,7 @@ init_myhook(void) register_a_hook("libc", "exit", (void *)new_exit, (long int *)(&next_exit)); register_a_hook("libc", "dup3", (void *)new_dup3, (long int *)(&libc_dup3)); register_a_hook("libc", "readlink", (void *)new_readlink, (long int *)(&libc_readlink)); + register_a_hook("libc", "getcwd", (void *)new_getcwd, (long int *)(&libc_getcwd)); libc_version = query_libc_version(); if (libc_ver_cmp(libc_version, 2.34) < 0) @@ -7348,6 +7352,8 @@ init_myhook(void) install_hook(); + update_cwd(); + d_hook_enabled = 1; hook_enabled_bak = d_hook_enabled; } diff --git a/src/tests/ftest/daos_test/dfuse.py b/src/tests/ftest/daos_test/dfuse.py index 0e8b8d998a3..09fce4008f4 100644 --- a/src/tests/ftest/daos_test/dfuse.py +++ b/src/tests/ftest/daos_test/dfuse.py @@ -74,6 +74,8 @@ def run_test(self, il_lib=None): container.set_attr(attrs=cont_attrs) dfuse = get_dfuse(self, self.hostlist_clients) + # Only for test. Set directory expiration time 1 second. + dfuse.env['D_EXPIRATION_TIME_DIR'] = '1' start_dfuse(self, dfuse, pool, container) mount_dir = dfuse.mount_dir.value diff --git a/src/tests/suite/dfuse_test.c b/src/tests/suite/dfuse_test.c index 9f14659651d..b963bb0ec9b 100644 --- a/src/tests/suite/dfuse_test.c +++ b/src/tests/suite/dfuse_test.c @@ -629,7 +629,23 @@ do_directory(void **state) DIR *dirp; struct dirent **namelist; long pos; - int entry_count = 100; + int entry_count = 100; + bool with_pil4dfs = false; + bool use_dfuse = true; + char *env_ldpreload; + /* "/tmp/dfuse-test" is assigned in src/tests/ftest/daos_test/dfuse.py */ + char native_mount_dir[] = "/tmp/dfuse-test"; + char cwd[1024]; + char cwd_saved[1024]; + char *resolved_path; + char *path_ret; + + if (strstr(test_dir, native_mount_dir)) + use_dfuse = false; + + env_ldpreload = getenv("LD_PRELOAD"); + if (env_ldpreload != NULL && strstr(env_ldpreload, "libpil4dfs.so") != NULL) + with_pil4dfs = true; printf("Creating dir and files\n"); root = open(test_dir, O_PATH | O_DIRECTORY); @@ -724,6 +740,61 @@ do_directory(void **state) rc = close(root); assert_return_code(rc, errno); + + if (!with_pil4dfs || !use_dfuse) + return; + + /* start testing getcwd() and realpath() */ + resolved_path = malloc(PATH_MAX); + assert_true(resolved_path != NULL); + + path_ret = getcwd(cwd_saved, sizeof(cwd_saved)); + assert_true(path_ret != NULL); + + rc = chdir(test_dir); + assert_return_code(rc, errno); + + rc = mkdir("dir_test", 0755); + assert_return_code(rc, errno); + + rc = symlink("dir_test", "link_test"); + assert_return_code(rc, errno); + + rc = chdir("link_test"); + assert_return_code(rc, errno); + + path_ret = getcwd(cwd, sizeof(cwd)); + assert_true(path_ret != NULL); + assert_true(strstr(cwd, "dir_test") != NULL); + + path_ret = realpath(".", resolved_path); + assert_true(path_ret != NULL); + assert_true(strstr(resolved_path, "dir_test") != NULL); + + sleep(2); + + path_ret = getcwd(cwd, sizeof(cwd)); + assert_true(path_ret != NULL); + assert_true(strstr(cwd, "dir_test") != NULL); + + path_ret = realpath(".", resolved_path); + assert_true(path_ret != NULL); + assert_true(strstr(resolved_path, "dir_test") != NULL); + + rc = chdir(".."); + assert_return_code(rc, errno); + + rc = unlink("link_test"); + assert_return_code(rc, errno); + + rc = rmdir("dir_test"); + assert_return_code(rc, errno); + + rc = chdir(cwd_saved); + assert_return_code(rc, errno); + + free(resolved_path); + /* end testing getcwd() and realpath() */ } void From 1bcdeba91b3208c799599de51cfeebca70f71d5a Mon Sep 17 00:00:00 2001 From: Tomasz Gromadzki Date: Fri, 21 Nov 2025 18:26:17 +0100 Subject: [PATCH 030/253] DAOS-18204 packaging: restore libfabric, isa-l and argobots changelog (#17150) Provide a changelog for libfabric, isal and argobots RPMs. Signed-off-by: Tomasz Gromadzki --- utils/rpms/argobots.changelog | 55 +++++++ utils/rpms/argobots.sh | 1 + utils/rpms/isa-l.changelog | 41 ++++++ utils/rpms/isa-l.sh | 1 + utils/rpms/libfabric.changelog | 258 +++++++++++++++++++++++++++++++++ utils/rpms/libfabric.sh | 1 + utils/rpms/package_info.sh | 6 +- 7 files changed, 360 insertions(+), 3 deletions(-) create mode 100644 utils/rpms/argobots.changelog create mode 100644 utils/rpms/isa-l.changelog create mode 100644 utils/rpms/libfabric.changelog diff --git a/utils/rpms/argobots.changelog b/utils/rpms/argobots.changelog new file mode 100644 index 00000000000..4864f0fa8e3 --- /dev/null +++ b/utils/rpms/argobots.changelog @@ -0,0 +1,55 @@ +%changelog +* Wed Nov 19 2025 Tomasz Gromadzki - 1.2-4 +- Restore the RPM changelog, which has not been available since version 1.2-1 + +* Fri Sep 12 2025 Jeff Olivier - 1.2-3 +- Fix leap package name + +* Mon Aug 11 2025 Jeff Olivier - 1.2-2 +- Switch to fpm build for RPMs + +* Wed Oct 02 2024 Cedric Koch-Hofer - 1.2-1 +- Update to 1.2 +- Add patch 411e5b3 Fix DAOS-14248: ULTs stacks dump works only once +- Add patch bb0c908 Restore the libunwind support + +* Tue Jun 06 2023 Brian J. Murrell - 1.1-3 +- Update to build on EL9 + +* Wed Oct 19 2022 Brian J. Murrell - 1.1-2 +- Create debuginfo packages for SUSE +- Fix up some issues found by rpmlint + +* Thu Apr 01 2021 Brian J. Murrell - 1.1-1 +- Update to 1.1 + +* Wed Mar 17 2021 Brian J. Murrell - 1.1~rc1-1 +- Update to 1.1rc1 + +* Tue Feb 23 2021 B.Faccini - 1.1~b1-1 +- Update to 1.1b1 +- Build with unwinding enabled + +* Mon Aug 17 2020 Brian J. Murrell - 1.0-1 +- Update to 1.0 final + +* Mon Jun 22 2020 Brian J. Murrell - 1.0rc-5 +- Update License: + +* Sat Sep 21 2019 Brian J. Murrell - 1.0rc-4 +- Add BR: pkgconfig + +* Sat Sep 21 2019 Brian J. Murrell - 1.0rc-3 +- Revert libabt0 packaging for EL7; RH just doesn't do that + +* Fri Sep 20 2019 Brian J. Murrell - 1.0rc-2 +- Add patch to bring up to 89507c1f8c +- Create a libabt0 subpackage +- Force autogen.sh since we add a patch that modifies a Makefile.am + +* Wed Apr 17 2019 Brian J. Murrell - 1.0rc-1 +- Update to 1.0rc1 +- Add patch to bring up to 9d48af08 + +* Wed Apr 03 2019 Brian J. Murrell - 0.99-1 +- Initial package diff --git a/utils/rpms/argobots.sh b/utils/rpms/argobots.sh index 65a3dc937f8..71726148e93 100755 --- a/utils/rpms/argobots.sh +++ b/utils/rpms/argobots.sh @@ -16,6 +16,7 @@ DESCRIPTION="Argobots is a lightweight, low-level threading and tasking framewor This release is an experimental version of Argobots that contains features related to user-level threads, tasklets, and some schedulers." URL="https://argobots.org" +RPM_CHANGELOG="argobots.changelog" files=() TARGET_PATH="${libdir}" diff --git a/utils/rpms/isa-l.changelog b/utils/rpms/isa-l.changelog new file mode 100644 index 00000000000..0d86547362a --- /dev/null +++ b/utils/rpms/isa-l.changelog @@ -0,0 +1,41 @@ +%changelog +* Wed Nov 19 2025 Tomasz Gromadzki - 2.31.1-8 +- Restore the RPM changelog, which has not been available since version 2.30.0-2 + +* Fri Sep 12 2025 Jeff Olivier - 2.31.1-7 +- Fix leap package name + +* Mon Aug 11 2025 Jeff Olivier - 2.31.1-6 +- Switch to fpm build for RPMs +- Update isa-l to 2.31.1 + +* Fri May 19 2023 Brian J. Murrell - 2.30.0-2 +- Disable static library build +- Add debuginfo generation for Leap 15 +- Add hardened build flags for CentOS 7 and Leap 15 + +* Thu Jan 28 2021 Brian J. Murrell - 2.30.0-1 +- Update to latest +- Add %%{_libdir}/pkgconfig/libisal.pc to -devel package + +* Tue Jun 16 2020 Brian J. Murrell - 2.26.0-3 +- Add %%license files + +* Wed Oct 02 2019 John E. Malmberg - 2.26.0-2 +- Fix some SUSE rpmlint packaging complaints + +* Wed May 15 2019 Brian J. Murrell - 2.26.0-1 +- Update to latest +- Split into a man utilities package with igizp and a library + package + - Obsoletes: the older isa-l packages accordingly + +* Tue May 07 2019 Brian J. Murrell - 2.21.0-3 +- Bump release for RPM cache coherency + +* Fri May 03 2019 Brian J. Murrell - 2.21.0-2 +- Use the more stable "archive" URL for the source +- Define a make_build macro for SLES 12.3 + +* Fri Apr 05 2019 Brian J. Murrell - 2.21.0-1 +- initial package \ No newline at end of file diff --git a/utils/rpms/isa-l.sh b/utils/rpms/isa-l.sh index 96580340f56..d258c42d6c3 100755 --- a/utils/rpms/isa-l.sh +++ b/utils/rpms/isa-l.sh @@ -17,6 +17,7 @@ DESCRIPTION="Intelligent Storage Acceleration Library. Provides various algorithms for erasure coding, crc, raid, compression and decompression" URL="https://github.com/intel/isa-l" +RPM_CHANGELOG="isa-l.changelog" files=() TARGET_PATH="${bindir}" diff --git a/utils/rpms/libfabric.changelog b/utils/rpms/libfabric.changelog new file mode 100644 index 00000000000..b3e780be7bd --- /dev/null +++ b/utils/rpms/libfabric.changelog @@ -0,0 +1,258 @@ +%changelog +* Wed Nov 19 2025 Tomasz Gromadzki - 1-22.0-5 +- Restore the RPM changelog, which has not been available since version 1.22.0-2 + +* Fri Sep 12 2025 Jeff Olivier - 1.22.0-4 +- Fix leap package name + +* Mon Aug 11 2025 Jeff Olivier - 1.22.0-3 +- Switch to fpm build for RPMs + +* Thu Feb 06 2025 Jerome Soumagne - 1.22.0-2 +- Re-enable psm2 provider for other applications depending on libfabric + +* Fri Oct 25 2024 Jerome Soumagne - 1.22.0-1 +- Update to 1.22.0 +- Drop prov/verbs patch merged upstream + +* Thu Mar 14 2024 Jerome Soumagne - 1.19.1-1 +- Update to 1.19.1 +- Drop prov/tcp multi-recv patch merged upstream +- Add prov/verbs assert patch + +* Mon Oct 30 2023 Jerome Soumagne - 1.19.0-1 +- Update to 1.19.0 +- Drop prov/tcp patches that were merged in 1.19.0 +- Drop prov/opx patch that was merged in 1.19.0 +- Add prov/tcp multi-recv patch +- Drop support for CentOS7 + +* Fri Jul 21 2023 Jerome Soumagne - 1.18.1-1 +- Update to 1.18.1 +- Drop patches that have been merged to 1.18.1 +- Add additional prov/tcp patches + +* Fri Jun 23 2023 Brian J. Murrell - 1.18.0-4 +- Rebuild for EL9 + +* Wed Jun 14 2023 Jerome Soumagne - 1.18.0-3 +- Add prov/tcp patch to fix registration lock issue +- Add prov/opx patch to fix 32-bit conversion issue +- Fix build_opx macro logic + +* Thu Jun 1 2023 Jerome Soumagne - 1.18.0-2 +- Add prov/tcp patch to fix busy spin issue + +* Wed May 3 2023 Jerome Soumagne - 1.18.0-1 +- Update to 1.18.0 +- Enable opx provider and add libuuid-devel dependency +- Add libnuma/numactl-devel dependency +- Clean up spec file and disable unused / deprecated providers +- Use tar.bz2 archive instead of tar.gz to skip autogen process +- Add prov/verbs patch to recover from qp error state + +* Thu Apr 13 2023 Alexander Oganezov - 1.17.1-1 +- Update to v1.17.1 +- Apply DAOS-12407 workaround to ofi + +* Thu Jan 26 2023 Brian J. Murrell - 1.15.1-4 +- Remove libpsm2[-devel] dependencies + +* Mon Aug 1 2022 Jerome Soumagne - 1.15.1-3 +- Drop CXI compat patch that is no longer needed + +* Tue Jul 5 2022 Jerome Soumagne - 1.15.1-2 +- Add patch to keep backward compatibility with CXI provider using v1.14.x + +* Wed May 18 2022 Lei Huang - 1.15.1-1 +- Update to v1.15.1 + +* Wed May 4 2022 Brian J. Murrell - 1.15.0~rc3-2 +- Add _hardened_build flag to build PIE binaries on CentOS 7 +- Add options to C*FLAGS to build PIE binaries on Leap 15 + +* Tue Apr 19 2022 Lei Huang - 1.15.0~rc3-1 +- Update to v1.15.0rc3 +- Remove patches already landed + +* Mon Apr 04 2022 Dmitry Eremin - 1.14.0-2 +- Apply patch for TCP provider +- Revert patch with performance degradation + +* Mon Jan 17 2022 Johann Lombardi - 1.14.0-1 +- Upgrade to 1.14.0 GA +- Apply patch for DAOS-9376 + +* Fri Dec 17 2021 Phillip Henderson - 1.14.0~rc3-3 +- Enable building debuginfo package on SUSE platforms + +* Wed Dec 8 2021 Alexander Oganezov - 1.14.0~rc3-2 +- Apply patch for DAOS-9173 + +* Sat Nov 13 2021 Alexander Oganezov - 1.14.0~rc3-1 +- Update to v1.14.0rc3 + +* Fri Oct 8 2021 Alexander Oganezov - 1.13.2~rc1-1 +- Update to v1.13.2rc1 + +* Wed Mar 10 2021 Alexander Oganezov - 1.12.0-1 +- Update to v1.12.0 + +* Tue Feb 16 2021 Alexander Oganezov - 1.12.0~rc1-1 +- Update to v1.12.0rc1 + +* Tue Nov 24 2020 Brian J. Murrell - 1.11.1-1 +- Update to 1.11.1 GA +- Make the use of %%{dl_verison} more automatic + +* Thu Oct 15 2020 Alexander Oganezov - 1.11.1~rc1-2 +- Fix to include DL_VERSION in Makefile + +* Fri Oct 9 2020 Alexander Oganezov - 1.11.1~rc1-1 +- Update to libfabric v1.11.1rc1 + +* Thu Oct 1 2020 Alexander Oganezov - 1.11.0-2 +- Disable EFA provider + +* Mon Sep 14 2020 Alexander Oganezov - 1.11.0-1 +- Update to libfabric v1.11.0 + +* Thu Aug 20 2020 Li Wei - 1.9.0-8 +- Update sockets_provider.patch to report the original connect errors + +* Wed Jul 1 2020 Alexander Oganezov - 1.9.0-7 +- Commented out infinipath from BuildRequires +- Removed --enable-psm from configuration flags + +* Mon May 18 2020 Alexander Oganezov - 1.9.0-6 +- update to 8fa7c5bbbfee7df5194b65d9294929a893eb4093 +- apply custom patch for sockets provider + +* Wed Mar 25 2020 Alexander Oganezov - 1.9.0-5 +- update to 62f6c937601776dac8a1f97c8bb1b1a6acfbc3c0 + +* Tue Mar 17 2020 Alexander Oganezov - 1.9.0-4 +- update to 15ce5c62e2f87715b32bc546d33bb132b97aea4c + +* Fri Mar 6 2020 Alexander Oganezov - 1.9.0-3 +- update to 8af3c112bfce155eb04218bef656f58f3609ce19 + +* Thu Feb 6 2020 Alexander Oganezov - 1.9.0-2 +- update to 955f3a07dd011fb1dbfa6b6c772ada03d5af320e to pick configure.ac fix + +* Wed Feb 5 2020 Brian J. Murrell - 1.9.0-1 +- Update to 1b8ed7876204692fd95b07df8cba21683707e5dc + +* Sat Nov 9 2019 Alexander Oganezov - 1.8.0-6 +- Update to 863407 + +* Wed Sep 25 2019 Brian J. Murrell - 1.8.0-5 +- Update BR: for psm2 to 11.2.78 +- Accordingly, devel subpackage should Requires: psm2-devel + +* Mon Sep 23 2019 Brian J. Murrell - 1.8.0-4 +- %%setup -> %%autosetup +- Add patch to bring up to 3712eb0 +- Set _default_patch_fuzz 1 due to GitHub's dirty compare/ patches +- Once again create the libfabric1 subpackage for SLES + +* Thu Aug 22 2019 Brian J. Murrell - 1.8.0-3 +- Revert previous change as it was causing (on SLES 12.3): +/usr/lib64/libfabric.so.1: undefined reference to `psm2_epaddr_to_epid@PSM2_1.0' +/usr/lib64/libfabric.so.1: undefined reference to `psm2_ep_disconnect2@PSM2_1.0' +/usr/lib64/libfabric.so.1: undefined reference to `psm2_am_register_handlers_2@PSM2_1.0' +/usr/lib64/libfabric.so.1: undefined reference to `psm2_info_query@PSM2_1.0' +/usr/lib64/libfabric.so.1: undefined reference to `psm2_get_capability_mask@PSM2_1.0' +/usr/lib64/libfabric.so.1: undefined reference to `psm2_ep_epid_lookup2@PSM2_1.0' + +* Tue Aug 20 2019 Brian J. Murrell - 1.8.0-2 +- Install libnl3-devel on all platforms +- Create a libfabric1 subpackage with the shared library +- Clean up much of SUSE's post build linting errors/warnings + +* Thu Jul 25 2019 Alexander A. Oganezov - 1.8.0-1 +- Update to 1.8.0 + +* Wed Jun 26 2019 Brian J. Murrell - 1.7.1rc1-4 +- Add BuildRequires: libpsm2-devel >= 10.3.58 + - needed for psm2_am_register_handlers_2@PSM2_1.0 + +* Tue May 14 2019 Brian J. Murrell - 1.7.1rc1-3 +- Fix SLES 12.3 OS conditionals >= 1315 + +* Wed May 01 2019 Brian J. Murrell - 1.7.1rc1-2 +- Disable psm2 on SLES 12.3 as the psm2 library there is too old + +* Tue Mar 19 2019 Brian J. Murrell - 1.7.1rc1-1 +- Update to 1.7.1 RC1 + +* Mon Mar 11 2019 Brian J. Murrell - 1.7.0rc3-1 +- Rebase to latest release 1.7.0rc3 + +* Wed Aug 15 2018 Brian J. Murrell - 1.6.0-1 +- Rebase to latest release 1.6.0 +- Remove obsolete patch +- Strip out local libtool Rpathing per + https://fedoraproject.org/wiki/RPath_Packaging_Draft#Removing_Rpath + +* Wed Jan 10 2018 Honggang Li - 1.5.3-1 +- Rebase to latest release 1.5.3 +- Resolves: bz1533293 + +* Thu Jan 4 2018 Honggang Li - 1.5.1-3 +- Add support of different CQ formats for the verbs/RDM +- Resolves: bz1530715 + +* Fri Oct 20 2017 Honggang Li - 1.5.1-2 +- Fix PPC32 compiling issue +- Resolves: bz1504395 + +* Tue Oct 17 2017 Honggang Li - 1.5.1-1 +- Rebase to v1.5.1 +- Resolves: bz1452791 + +* Tue May 16 2017 Honggang Li - 1.4.2-1 +- Update to upstream v1.4.2 release +- Related: bz1451100 + +* Wed Mar 01 2017 Jarod Wilson - 1.4.1-1 +- Update to upstream v1.4.1 release +- Related: bz1382827 + +* Mon May 30 2016 Honggang Li - 1.3.0-3 +- Rebuild against latest infinipath-psm. +- Related: bz1280143 + +* Mon May 30 2016 Honggang Li - 1.3.0-2 +- Rebuild libfabric to support Intel OPA PSM2. +- Related: bz1280143 + +* Wed May 4 2016 Honggang Li - 1.3.0-1 +- Update to latest upstream release +- Related: bz1280143 + +* Wed Sep 30 2015 Doug Ledford - 1.1.0-2 +- Rebuild against libnl3 now that the UD RoCE bug is fixed +- Related: bz1261028 + +* Fri Aug 14 2015 Honggang Li - 1.1.0-1 +- Rebase to upstream 1.1.0 +- Resolves: bz1253381 + +* Fri Aug 07 2015 Michal Schmidt - 1.1.0-0.2.rc4 +- Packaging Guidelines conformance fixes and spec file cleanups +- Related: bz1235266 + +* Thu Aug 6 2015 Honggang Li - 1.1.0-0.1.rc4 +- fix N-V-R issue and disable static library +- Related: bz1235266 + +* Tue Aug 4 2015 Honggang Li - 1.1.0rc4 +- Initial build for RHEL-7.2 +- Related: bz1235266 + +* Fri Jun 26 2015 Open Fabrics Interfaces Working Group 1.1.0rc1 +- Release 1.1.0rc1 + +* Sun May 3 2015 Open Fabrics Interfaces Working Group 1.0.0 +- Release 1.0.0 diff --git a/utils/rpms/libfabric.sh b/utils/rpms/libfabric.sh index 5b3077c410e..10853a4e5f3 100755 --- a/utils/rpms/libfabric.sh +++ b/utils/rpms/libfabric.sh @@ -16,6 +16,7 @@ ARCH=${isa} DESCRIPTION="Provides a user-space API to access high-performance fabric services, such as RDMA. This package contains the runtime library." URL="https://github.com/ofiwg/libfabric" +RPM_CHANGELOG="libfabric.changelog" files=() TARGET_PATH="${bindir}" diff --git a/utils/rpms/package_info.sh b/utils/rpms/package_info.sh index 60c8caa0e58..d4c5eba3e3d 100644 --- a/utils/rpms/package_info.sh +++ b/utils/rpms/package_info.sh @@ -35,19 +35,19 @@ daos_release="$(grep "^Release: " "${root}/utils/rpms/daos.spec" | \ export daos_release export libfabric_version="1.22.0" -export libfabric_release="4${distro_name}" +export libfabric_release="5${distro_name}" export libfabric_full="${libfabric_version}-${libfabric_release}" export mercury_version="2.4.0" export mercury_release="8${distro_name}" export mercury_full="${mercury_version}-${mercury_release}" export argobots_version="1.2" -export argobots_release="3${distro_name}" +export argobots_release="4${distro_name}" export argobots_full="${argobots_version}-${argobots_release}" export pmdk_version="2.1.2" export pmdk_release="1${distro_name}" export pmdk_full="${pmdk_version}-${pmdk_release}" export isal_version="2.31.1" -export isal_release="7${distro_name}" +export isal_release="8${distro_name}" export isal_full="${isal_version}-${isal_release}" export isal_crypto_version="2.24.0" export isal_crypto_release="3${distro_name}" From f7a7981d19acbd6bd6efaa67e5e9bc6a4b18bf96 Mon Sep 17 00:00:00 2001 From: Nasf-Fan Date: Sat, 22 Nov 2025 01:27:21 +0800 Subject: [PATCH 031/253] DAOS-18200 chk: use deep stack for collective check query task (#17143) Otherwise related ULT stack maybe overflow when query bad pool. Signed-off-by: Fan Yong --- src/chk/chk_engine.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/chk/chk_engine.c b/src/chk/chk_engine.c index a0ab820d9bd..8f4a56fe50a 100644 --- a/src/chk/chk_engine.c +++ b/src/chk/chk_engine.c @@ -2577,7 +2577,7 @@ chk_engine_query_pool(uuid_t uuid, void *args) coll_ops.co_func = chk_engine_query_one; coll_args.ca_func_args = shard; - rc = dss_thread_collective_reduce(&coll_ops, &coll_args, 0); + rc = dss_thread_collective_reduce(&coll_ops, &coll_args, DSS_ULT_DEEP_STACK); out: D_CDEBUG(rc != 0, DLOG_ERR, DLOG_DBG, From 2e560d8f2b3d24812f529fc879f326302238d760 Mon Sep 17 00:00:00 2001 From: Ding Date: Fri, 21 Nov 2025 12:29:49 -0500 Subject: [PATCH 032/253] DAOS-17796 test: move rebuild/basic.py to HW (#16993) move rebuild/basic.py to HW since VMs are slow. Signed-off-by: Ding-Hwa Ho --- src/tests/ftest/rebuild/basic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tests/ftest/rebuild/basic.py b/src/tests/ftest/rebuild/basic.py index 8a8f8148f3c..a6b0801775a 100644 --- a/src/tests/ftest/rebuild/basic.py +++ b/src/tests/ftest/rebuild/basic.py @@ -25,7 +25,7 @@ def test_rebuild_basic(self): Multiple pool rebuild, single client, various record/object counts :avocado: tags=all,daily_regression - :avocado: tags=vm + :avocado: tags=hw,large :avocado: tags=rebuild,pool,daos_cmd :avocado: tags=RbldBasic,test_rebuild_basic """ From 6796a3260ef22046bc60f5a3b8d7558842b1da8b Mon Sep 17 00:00:00 2001 From: Niu Yawei Date: Mon, 24 Nov 2025 22:58:47 +0800 Subject: [PATCH 033/253] DAOS-17607 bio: SPDK I/O monitor (#17071) Monitor inflight SPDK I/Os, if any I/O isn't completed within certain amount of time (120 seconds, configurable through env var DAOS_SPDK_IO_TIMEOUT), we assume the SPDK I/O is stalled due to hardware issue (or software bug), RAS event will be raised and the corresponding device will be marked as faulty. Signed-off-by: Niu Yawei --- src/bio/bio_buffer.c | 46 +++++++++++++++++++-- src/bio/bio_context.c | 8 ++-- src/bio/bio_internal.h | 64 +++++++++++++++++++++++++++++- src/bio/bio_monitor.c | 9 ++++- src/bio/bio_recovery.c | 11 ++++- src/bio/bio_xstream.c | 21 +++++++++- src/engine/sched.c | 8 +++- src/include/daos_srv/daos_engine.h | 9 +++-- 8 files changed, 160 insertions(+), 16 deletions(-) diff --git a/src/bio/bio_buffer.c b/src/bio/bio_buffer.c index b043de2f00b..55b67988cff 100644 --- a/src/bio/bio_buffer.c +++ b/src/bio/bio_buffer.c @@ -280,6 +280,7 @@ bio_iod_alloc(struct bio_io_context *ctxt, struct umem_instance *umem, return NULL; D_ASSERT(type < BIO_IOD_TYPE_MAX); + bio_io_lug_init(&biod->bd_io_lug); biod->bd_umem = umem; biod->bd_ctxt = ctxt; biod->bd_type = type; @@ -336,6 +337,7 @@ bio_iod_free(struct bio_desc *biod) bio_sgl_fini(&biod->bd_sgls[i]); D_FREE(biod->bd_bulk_hdls); + bio_io_lug_fini(&biod->bd_io_lug); D_FREE(biod); } @@ -1041,8 +1043,7 @@ rw_completion(void *cb_arg, int err) bxb = biod->bd_ctxt->bic_xs_blobstore; D_ASSERT(bxb != NULL); - D_ASSERT(bxb->bxb_blob_rw > 0); - bxb->bxb_blob_rw--; + bio_io_lug_dequeue(bxb, &biod->bd_io_lug); io_ctxt = biod->bd_ctxt; D_ASSERT(io_ctxt != NULL); @@ -1184,7 +1185,7 @@ nvme_rw(struct bio_desc *biod, struct bio_rsrvd_region *rg) biod->bd_dma_issued = 1; biod->bd_inflights++; - bxb->bxb_blob_rw++; + bio_io_lug_enqueue(xs_ctxt, bxb, &biod->bd_io_lug); biod->bd_ctxt->bic_inflight_dmas++; rw_cnt = (pg_cnt > bio_chk_sz) ? bio_chk_sz : pg_cnt; @@ -1982,3 +1983,42 @@ bio_copy(struct bio_io_context *ioctxt, struct umem_instance *umem, return rc; } + +#define IO_MONITOR_INTVL 1000000 /* us, 1 second */ + +void +bio_io_monitor(struct bio_xs_context *xs_ctxt, uint64_t now) +{ + enum smd_dev_type st; + struct bio_xs_blobstore *bxb; + struct bio_io_lug *io_lug; + struct media_error_msg *mem; + + if ((xs_ctxt->bxc_io_monitor_ts + IO_MONITOR_INTVL) > now) + return; + + xs_ctxt->bxc_io_monitor_ts = now; + + for (st = SMD_DEV_TYPE_DATA; st < SMD_DEV_TYPE_MAX; st++) { + bxb = xs_ctxt->bxc_xs_blobstores[st]; + + if (!bxb || d_list_empty(&bxb->bxb_pending_ios)) + continue; + + io_lug = d_list_entry(bxb->bxb_pending_ios.next, struct bio_io_lug, bil_link); + D_ASSERT(io_lug->bil_submit_ts != 0); + + if ((io_lug->bil_submit_ts + bio_io_timeout) >= now) + continue; + + D_ALLOC_PTR(mem); + if (mem == NULL) { + D_ERROR("Out of memory: NVMe stalled I/O report is skipped\n"); + continue; + } + mem->mem_err_type = MET_IO_STALLED; + mem->mem_bs = bxb->bxb_blobstore; + mem->mem_tgt_id = xs_ctxt->bxc_tgt_id; + spdk_thread_send_msg(owner_thread(mem->mem_bs), bio_media_error, mem); + } +} diff --git a/src/bio/bio_context.c b/src/bio/bio_context.c index 989e20e9501..7c41a1abdcd 100644 --- a/src/bio/bio_context.c +++ b/src/bio/bio_context.c @@ -13,6 +13,7 @@ struct blob_cp_arg { spdk_blob_id bca_id; struct spdk_blob *bca_blob; + struct bio_io_lug bca_io_lug; /* * Completion could run on different xstream when NVMe * device is shared by multiple xstreams. @@ -36,6 +37,7 @@ blob_cp_arg_init(struct blob_cp_arg *ba) { int rc; + bio_io_lug_init(&ba->bca_io_lug); rc = ABT_eventual_create(0, &ba->bca_eventual); if (rc != ABT_SUCCESS) return dss_abterr2der(rc); @@ -46,6 +48,7 @@ blob_cp_arg_init(struct blob_cp_arg *ba) static inline void blob_cp_arg_fini(struct blob_cp_arg *ba) { + bio_io_lug_fini(&ba->bca_io_lug); ABT_eventual_free(&ba->bca_eventual); } @@ -164,8 +167,7 @@ blob_unmap_cb(void *arg, int rc) bxb = bma->bma_ioc->bic_xs_blobstore; D_ASSERT(bxb != NULL); - D_ASSERT(bxb->bxb_blob_rw > 0); - bxb->bxb_blob_rw--; + bio_io_lug_dequeue(bxb, &ba->bca_io_lug); blob_common_cb(ba, rc); } @@ -1232,7 +1234,7 @@ blob_unmap_sgl(struct bio_io_context *ioctxt, d_sg_list_t *unmap_sgl, uint32_t b drain_inflight_ios(xs_ctxt, bxb); ba->bca_inflights++; - bxb->bxb_blob_rw++; + bio_io_lug_enqueue(xs_ctxt, bxb, &ba->bca_io_lug); pg_off = (uint64_t)unmap_iov->iov_buf; pg_cnt = unmap_iov->iov_len; diff --git a/src/bio/bio_internal.h b/src/bio/bio_internal.h index 408c02fa297..5b97582d18c 100644 --- a/src/bio/bio_internal.h +++ b/src/bio/bio_internal.h @@ -278,7 +278,7 @@ struct bio_dev_health { void *bdh_intel_smart_buf; /*Intel SMART attributes*/ uint64_t bdh_stat_age; unsigned int bdh_inflights; - unsigned int bdh_stopping:1; + unsigned int bdh_stopping : 1, bdh_io_stalled : 1; uint16_t bdh_vendor_id; /* PCI vendor ID */ /** @@ -365,10 +365,21 @@ struct bio_blobstore { bb_faulty_done:1; /* Faulty reaction is done */ }; +struct bio_io_lug { + /* Link to bio_xs_blobstore::bxb_pending_ios */ + d_list_t bil_link; + /* When the I/O is submitted */ + uint64_t bil_submit_ts; + /* Reference count */ + uint32_t bil_ref; +}; + /* Per-xstream blobstore */ struct bio_xs_blobstore { /* In-flight blob read/write */ unsigned int bxb_blob_rw; + /* Pending I/Os */ + d_list_t bxb_pending_ios; /* spdk io channel */ struct spdk_io_channel *bxb_io_channel; /* per bio blobstore */ @@ -381,6 +392,7 @@ struct bio_xs_blobstore { /* Per-xstream NVMe context */ struct bio_xs_context { int bxc_tgt_id; + uint64_t bxc_io_monitor_ts; struct spdk_thread *bxc_thread; struct bio_xs_blobstore *bxc_xs_blobstores[SMD_DEV_TYPE_MAX]; struct bio_dma_buffer *bxc_dma_buf; @@ -388,6 +400,52 @@ struct bio_xs_context { unsigned int bxc_skip_draining : 1; }; +static inline void +bio_io_lug_init(struct bio_io_lug *io_lug) +{ + D_INIT_LIST_HEAD(&io_lug->bil_link); + io_lug->bil_submit_ts = 0; + io_lug->bil_ref = 0; +} + +static inline void +bio_io_lug_fini(struct bio_io_lug *io_lug) +{ + D_ASSERT(io_lug->bil_ref == 0); + D_ASSERT(d_list_empty(&io_lug->bil_link)); +} + +static inline void +bio_io_lug_dequeue(struct bio_xs_blobstore *bxb, struct bio_io_lug *io_lug) +{ + D_ASSERT(bxb->bxb_blob_rw > 0); + bxb->bxb_blob_rw--; + + D_ASSERT(!d_list_empty(&io_lug->bil_link)); + D_ASSERT(io_lug->bil_submit_ts != 0); + D_ASSERT(io_lug->bil_ref > 0); + io_lug->bil_ref--; + if (io_lug->bil_ref == 0) + d_list_del_init(&io_lug->bil_link); +} + +static inline void +bio_io_lug_enqueue(struct bio_xs_context *xs_ctxt, struct bio_xs_blobstore *bxb, + struct bio_io_lug *io_lug) +{ + bxb->bxb_blob_rw++; + if (io_lug->bil_ref == 0) { + if (xs_ctxt->bxc_io_monitor_ts) + io_lug->bil_submit_ts = xs_ctxt->bxc_io_monitor_ts; + else + io_lug->bil_submit_ts = d_timeus_secdiff(0); + + D_ASSERT(d_list_empty(&io_lug->bil_link)); + d_list_add_tail(&io_lug->bil_link, &bxb->bxb_pending_ios); + } + io_lug->bil_ref++; +} + /* Per VOS instance I/O context */ struct bio_io_context { d_list_t bic_link; /* link to bxb_io_ctxts */ @@ -437,6 +495,7 @@ struct bio_rsrvd_dma { /* I/O descriptor */ struct bio_desc { + struct bio_io_lug bd_io_lug; struct umem_instance *bd_umem; struct bio_io_context *bd_ctxt; /* DMA buffers reserved by this io descriptor */ @@ -546,6 +605,7 @@ extern unsigned int bio_chk_cnt_max; extern unsigned int bio_numa_node; extern unsigned int bio_spdk_max_unmap_cnt; extern unsigned int bio_max_async_sz; +extern unsigned int bio_io_timeout; int xs_poll_completion(struct bio_xs_context *ctxt, unsigned int *inflights, uint64_t timeout); @@ -583,6 +643,8 @@ int iod_add_region(struct bio_desc *biod, struct bio_dma_chunk *chk, uint64_t end, uint8_t media); int dma_buffer_grow(struct bio_dma_buffer *buf, unsigned int cnt); void iod_dma_wait(struct bio_desc *biod); +void +bio_io_monitor(struct bio_xs_context *xs_ctxt, uint64_t now); static inline struct bio_dma_buffer * iod_dma_buf(struct bio_desc *biod) diff --git a/src/bio/bio_monitor.c b/src/bio/bio_monitor.c index 22e20a4f9f5..272e0dc80c5 100644 --- a/src/bio/bio_monitor.c +++ b/src/bio/bio_monitor.c @@ -680,7 +680,8 @@ get_spdk_health_info_completion(struct spdk_bdev_io *bdev_io, bool success, static bool is_bbs_faulty(struct bio_blobstore *bbs) { - struct nvme_stats *dev_stats = &bbs->bb_dev_health.bdh_health_state; + struct bio_dev_health *bdh = &bbs->bb_dev_health; + struct nvme_stats *dev_stats = &bdh->bdh_health_state; /* * Used for DAOS NVMe Recovery Tests. Will trigger bs faulty reaction @@ -708,6 +709,12 @@ is_bbs_faulty(struct bio_blobstore *bbs) } } + /* Auto-faulty for stalled I/O stalled is always enabled */ + if (bdh->bdh_io_stalled) { + D_ERROR("I/O stalled on NVMe device " DF_UUID "\n", DP_UUID(bbs->bb_dev->bb_uuid)); + return true; + } + if (!glb_criteria.fc_enabled) return false; diff --git a/src/bio/bio_recovery.c b/src/bio/bio_recovery.c index 4c2506aee23..c717301abe7 100644 --- a/src/bio/bio_recovery.c +++ b/src/bio/bio_recovery.c @@ -697,11 +697,20 @@ bio_media_error(void *msg_arg) "Device: "DF_UUID" csum error logged from tgt_id:%d\n", DP_UUID(mem->mem_bs->bb_dev->bb_uuid), mem->mem_tgt_id); break; + case MET_IO_STALLED: + /* I/O stalling has been reported for this device */ + if (bdh->bdh_io_stalled) + goto out; + bdh->bdh_io_stalled = 1; + snprintf(err_str, DAOS_RAS_STR_FIELD_SIZE, + "Device: " DF_UUID " stalled I/O logged from tgt_id:%d\n", + DP_UUID(mem->mem_bs->bb_dev->bb_uuid), mem->mem_tgt_id); + break; } ras_notify_event(RAS_DEVICE_MEDIA_ERROR, err_str, RAS_TYPE_INFO, RAS_SEV_ERROR, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL); +out: auto_faulty_detect(mem->mem_bs); - D_FREE(mem); } diff --git a/src/bio/bio_xstream.c b/src/bio/bio_xstream.c index 9785e038d31..966eadf1c42 100644 --- a/src/bio/bio_xstream.c +++ b/src/bio/bio_xstream.c @@ -62,6 +62,7 @@ unsigned int bio_spdk_subsys_timeout = 25000; /* ms */ /* How many blob unmap calls can be called in a row */ unsigned int bio_spdk_max_unmap_cnt = 32; unsigned int bio_max_async_sz = (1UL << 15) /* 32k */; +unsigned int bio_io_timeout = 120000000; /* us, 120 seconds */ struct bio_nvme_data { ABT_mutex bd_mutex; @@ -219,7 +220,7 @@ bio_nvme_init_ext(const char *nvme_conf, int numa_node, unsigned int mem_size, { char *env; int rc, fd; - unsigned int size_mb = BIO_DMA_CHUNK_MB; + unsigned int size_mb = BIO_DMA_CHUNK_MB, io_timeout_secs = 0; if (tgt_nr <= 0) { D_ERROR("tgt_nr: %u should be > 0\n", tgt_nr); @@ -277,6 +278,16 @@ bio_nvme_init_ext(const char *nvme_conf, int numa_node, unsigned int mem_size, d_getenv_uint("DAOS_MAX_ASYNC_SZ", &bio_max_async_sz); D_INFO("Max async data size is set to %u bytes\n", bio_max_async_sz); + d_getenv_uint("DAOS_SPDK_IO_TIMEOUT", &io_timeout_secs); + if (io_timeout_secs > 0) { + if (io_timeout_secs < 30 || io_timeout_secs > 300) + D_WARN("DAOS_SPDK_IO_TIMEOUT(%u) is invalid. Min:30,Max:300,Default:120\n", + io_timeout_secs); + else + bio_io_timeout = io_timeout_secs * 1000000; /* convert to us */ + } + D_INFO("SPDK IO timeout set to %u us\n", bio_io_timeout); + /* Hugepages disabled */ if (mem_size == 0) { D_INFO("Set per-xstream DMA buffer upper bound to %u %uMB chunks\n", @@ -1241,6 +1252,7 @@ alloc_xs_blobstore(void) if (bxb == NULL) return NULL; + D_INIT_LIST_HEAD(&bxb->bxb_pending_ios); D_INIT_LIST_HEAD(&bxb->bxb_io_ctxts); return bxb; @@ -1761,8 +1773,10 @@ bio_nvme_ctl(unsigned int cmd, void *arg) static inline void reset_media_errors(struct bio_blobstore *bbs) { - struct nvme_stats *dev_stats = &bbs->bb_dev_health.bdh_health_state; + struct bio_dev_health *bdh = &bbs->bb_dev_health; + struct nvme_stats *dev_stats = &bdh->bdh_health_state; + bdh->bdh_io_stalled = 0; dev_stats->bio_read_errs = 0; dev_stats->bio_write_errs = 0; dev_stats->bio_unmap_errs = 0; @@ -1992,5 +2006,8 @@ bio_nvme_poll(struct bio_xs_context *ctxt) bio_led_event_monitor(ctxt, now); } + /* Detect stalled I/Os */ + bio_io_monitor(ctxt, now); + return rc; } diff --git a/src/engine/sched.c b/src/engine/sched.c index 76ee6478810..0cc388c3085 100644 --- a/src/engine/sched.c +++ b/src/engine/sched.c @@ -1928,7 +1928,13 @@ need_nvme_poll(struct dss_xstream *dx, struct sched_cycle *cycle) dmi = dss_get_module_info(); D_ASSERT(dmi != NULL); - return bio_need_nvme_poll(dmi->dmi_nvme_ctxt); + /* + * If SPDK I/O stalls indefinitely due to a hardware fault (or software bug), + * the resulting backlog of undrained I/Os will cause bio_need_nvme_poll() to + * consistently return true. To prevent starvation and ensure system progress, + * schedule the NVMe polling ULT and other ULTs intverleavingly. + */ + return !cycle->sc_age_nvme && bio_need_nvme_poll(dmi->dmi_nvme_ctxt); } static ABT_unit diff --git a/src/include/daos_srv/daos_engine.h b/src/include/daos_srv/daos_engine.h index 5c18dfff639..f6c14324155 100644 --- a/src/include/daos_srv/daos_engine.h +++ b/src/include/daos_srv/daos_engine.h @@ -739,10 +739,11 @@ enum dss_init_state { }; enum dss_media_error_type { - MET_WRITE = 0, /* write error */ - MET_READ, /* read error */ - MET_UNMAP, /* unmap error */ - MET_CSUM /* checksum error */ + MET_WRITE = 0, /* NVME write error */ + MET_READ, /* NVME read error */ + MET_UNMAP, /* NVME unmap error */ + MET_CSUM, /* Checksum error */ + MET_IO_STALLED, /* NVMe I/O stalled */ }; void dss_init_state_set(enum dss_init_state state); From 56dda8faa5230ab31f3b8593b0a8ccdbccc11248 Mon Sep 17 00:00:00 2001 From: Jerome Soumagne Date: Tue, 25 Nov 2025 11:38:40 -0600 Subject: [PATCH 034/253] DAOS-18188 build: tweak fpm options (#17087) Ignore vendor/prefix fields and set maintainer Signed-off-by: Jerome Soumagne --- utils/rpms/fpm_common.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/utils/rpms/fpm_common.sh b/utils/rpms/fpm_common.sh index 2e78742a1b2..18cce473275 100644 --- a/utils/rpms/fpm_common.sh +++ b/utils/rpms/fpm_common.sh @@ -179,6 +179,9 @@ build_package() { --architecture "${ARCH}" \ --description "${DESCRIPTION}" \ --url "${URL}" \ + --vendor "" \ + --maintainer "DAOS Foundation " \ + --prefix "" \ "${depends[@]}" \ "${conflicts[@]}" \ "${EXTRA_OPTS[@]}" \ From 75fb15ccca33aefec599fef0d398f685caecec1c Mon Sep 17 00:00:00 2001 From: Tomasz Gromadzki Date: Tue, 25 Nov 2025 18:41:58 +0100 Subject: [PATCH 035/253] SRE-2257 ci: no proxy option for docker (#16551) `DAOS_NO_PROXY` variable is used for the list of hosts/network that should not be accessed via proxy. The variable is propagated to Dockers build procedure via [pipeline-lib #472](https://github.com/daos-stack/pipeline-lib/pull/472) Signed-off-by: Tomasz Gromadzki --- utils/docker/Dockerfile.el.8 | 34 ++++++++------------- utils/docker/Dockerfile.el.9 | 10 ++++++ utils/docker/Dockerfile.leap.15 | 26 +++++++--------- utils/docker/Dockerfile.ubuntu | 10 ++++++ utils/scripts/helpers/repo-helper-el8.sh | 15 +++++---- utils/scripts/helpers/repo-helper-el9.sh | 11 +++---- utils/scripts/helpers/repo-helper-leap15.sh | 11 +++---- utils/scripts/helpers/repo-helper-ubuntu.sh | 11 +++---- 8 files changed, 66 insertions(+), 62 deletions(-) diff --git a/utils/docker/Dockerfile.el.8 b/utils/docker/Dockerfile.el.8 index 9108b8aab7d..bf80f3fc122 100644 --- a/utils/docker/Dockerfile.el.8 +++ b/utils/docker/Dockerfile.el.8 @@ -22,6 +22,16 @@ ARG REPO_FILE_URL ARG JENKINS_URL ARG REPOS ARG DAOS_LAB_CA_FILE_URL + +# Accept DAOS_NO_PROXY at build time +ARG DAOS_NO_PROXY +# Propagate into the build environment +ENV no_proxy=${DAOS_NO_PROXY} +ENV NO_PROXY=${DAOS_NO_PROXY} +# Persist into /etc/environment for use by shells and services +RUN echo "no_proxy=${DAOS_NO_PROXY}" >> /etc/environment && \ + echo "NO_PROXY=${DAOS_NO_PROXY}" >> /etc/environment + # script to install OS updates basic tools and daos dependencies COPY ./utils/scripts/install-el8.sh /tmp/install.sh # script to setup local repo if available @@ -37,9 +47,7 @@ FROM basic # with a local repository, yet needing a proxy to reach outside repositories. # This needs to be moved to a shell script like above in the future to # properly only remove the proxy variables only when they need to be removed -RUN if [ -n "$REPO_FILE_URL" ]; then direct="${REPO_FILE_URL##*//}; "\ - direct="${direct%%/*}"; export no_proxy="${direct}"; fi; \ - dnf upgrade && \ +RUN dnf upgrade && \ /tmp/install.sh && \ dnf clean all && \ rm -f /tmp/install.sh @@ -59,8 +67,6 @@ RUN mkdir -p /opt/daos /mnt/daos /var/run/daos_server /var/run/daos_agent /home/ chown -R daos_server.daos_server /opt/daos /mnt/daos /var/run/daos_server /home/daos && \ chown daos_agent.daos_agent /var/run/daos_agent -ARG JENKINS_URL - USER daos_server:daos_server # Setup a python venv so that python packages can be installed locally. @@ -96,8 +102,6 @@ ARG DAOS_PACKAGES_BUILD=yes # ensure that latest dependencies are used. USER root:root RUN [ "$DAOS_DEPS_BUILD" != "yes" ] || { \ - if [ -n "$REPO_FILE_URL" ]; then direct="${REPO_FILE_URL##*//}; " \ - direct="${direct%%/*}"; export no_proxy="${direct}"; fi; \ dnf upgrade --exclude=spdk,spdk-devel,dpdk-devel,dpdk,mercury-devel,mercury && \ dnf clean all; \ } @@ -106,8 +110,6 @@ USER daos_server:daos_server ARG DEPS_JOBS=1 RUN [ "$DAOS_DEPS_BUILD" != "yes" ] || { \ - if [ -n "$REPO_FILE_URL" ]; then direct="${REPO_FILE_URL##*//}; "\ - direct="${direct%%/*}"; export no_proxy="${direct}"; fi; \ scons --build-deps=only --jobs $DEPS_JOBS PREFIX=/opt/daos \ TARGET_TYPE=$DAOS_TARGET_TYPE && \ ([ "$DAOS_KEEP_BUILD" != "no" ] || /bin/rm -rf build *.gz); \ @@ -118,8 +120,7 @@ COPY --chown=daos_server:daos_server utils/sl utils/sl # Build third party RPMs RUN [ "$DAOS_PACKAGES_BUILD" != "yes" ] || [ "$DAOS_DEPS_BUILD" != "yes" ] || { \ - if [ -n "$REPO_FILE_URL" ]; then direct="${REPO_FILE_URL##*//}; "\ - direct="${direct%%/*}"; export no_proxy="${direct}"; fi; \ + export DISTRO="el8" && \ utils/rpms/build_packages.sh deps && \ mkdir -p /home/daos/rpms && \ mv *.rpm /home/daos/rpms; \ @@ -129,10 +130,7 @@ USER root:root # force an upgrade to get any newly built RPMs, but only if CB1 is set. ARG CB1 RUN [ -z "$CB1" ] || { \ - if [ -n "$REPO_FILE_URL" ]; then direct="${REPO_FILE_URL##*//}; " \ - direct="${direct%%/*}"; export no_proxy="${direct}"; fi; \ - dnf upgrade \ - --exclude=spdk,spdk-devel,dpdk-devel,dpdk,mercury-devel,mercury && \ + dnf upgrade --exclude=spdk,spdk-devel,dpdk-devel,dpdk,mercury-devel,mercury && \ dnf clean all; \ } USER daos_server:daos_server @@ -158,8 +156,6 @@ ARG DAOS_BUILD=$DAOS_DEPS_BUILD # Build DAOS RUN [ "$DAOS_BUILD" != "yes" ] || { \ - if [ -n "$REPO_FILE_URL" ]; then direct="${REPO_FILE_URL##*//}; " \ - direct="${direct%%/*}"; export no_proxy="${direct}"; fi; \ scons --jobs $JOBS install PREFIX=/opt/daos COMPILER=$COMPILER \ FIRMWARE_MGMT=1 BUILD_TYPE=$DAOS_BUILD_TYPE TARGET_TYPE=$DAOS_TARGET_TYPE && \ ([ "$DAOS_KEEP_BUILD" != "no" ] || /bin/rm -rf build) && \ @@ -171,8 +167,6 @@ COPY --chown=daos_server:daos_server utils utils # Build DAOS RPMs RUN [ "$DAOS_PACKAGES_BUILD" != "yes" ] || [ "$DAOS_BUILD" != "yes" ] || { \ - if [ -n "$REPO_FILE_URL" ]; then direct="${REPO_FILE_URL##*//}; " \ - direct="${direct%%/*}"; export no_proxy="${direct}"; fi; \ utils/rpms/build_packages.sh daos && \ mkdir -p /home/daos/rpms && \ cp *.rpm /home/daos/rpms; \ @@ -188,8 +182,6 @@ WORKDIR /home/daos/daos/src/client/java ARG DAOS_JAVA_BUILD=$DAOS_BUILD RUN [ "$DAOS_JAVA_BUILD" != "yes" ] || { \ - if [ -n "$REPO_FILE_URL" ]; then direct="${REPO_FILE_URL##*//}; " \ - direct="${direct%%/*}"; export no_proxy="${direct}"; fi; \ mkdir /home/daos/.m2 && \ cp /home/daos/daos/utils/scripts/helpers/maven-settings.xml.in /home/daos/.m2/settings.xml && \ export JAVA_HOME=$(daos-java/find_java_home.sh) && \ diff --git a/utils/docker/Dockerfile.el.9 b/utils/docker/Dockerfile.el.9 index 5187d0d4407..82c5287ff26 100644 --- a/utils/docker/Dockerfile.el.9 +++ b/utils/docker/Dockerfile.el.9 @@ -22,6 +22,16 @@ ARG REPO_FILE_URL ARG JENKINS_URL ARG REPOS ARG DAOS_LAB_CA_FILE_URL + +# Accept DAOS_NO_PROXY at build time +ARG DAOS_NO_PROXY +# Propagate into the build environment +ENV no_proxy=${DAOS_NO_PROXY} +ENV NO_PROXY=${DAOS_NO_PROXY} +# Persist into /etc/environment for use by shells and services +RUN echo "no_proxy=${DAOS_NO_PROXY}" >> /etc/environment && \ + echo "NO_PROXY=${DAOS_NO_PROXY}" >> /etc/environment + # script to install OS updates basic tools and daos dependencies COPY ./utils/scripts/install-el9.sh /tmp/install.sh # script to setup local repo if available diff --git a/utils/docker/Dockerfile.leap.15 b/utils/docker/Dockerfile.leap.15 index 7ea185fb60c..93b40c12630 100644 --- a/utils/docker/Dockerfile.leap.15 +++ b/utils/docker/Dockerfile.leap.15 @@ -22,6 +22,16 @@ ARG REPO_FILE_URL ARG JENKINS_URL ARG REPOS ARG DAOS_LAB_CA_FILE_URL + +# Accept DAOS_NO_PROXY at build time +ARG DAOS_NO_PROXY +# Propagate into the build environment +ENV no_proxy=${DAOS_NO_PROXY} +ENV NO_PROXY=${DAOS_NO_PROXY} +# Persist into /etc/environment for use by shells and services +RUN echo "no_proxy=${DAOS_NO_PROXY}" >> /etc/environment && \ + echo "NO_PROXY=${DAOS_NO_PROXY}" >> /etc/environment + # script to install OS updates basic tools and daos dependencies COPY ./utils/scripts/install-leap15.sh /tmp/install.sh # script to setup local repo if available @@ -38,9 +48,7 @@ FROM basic # with a local repository, yet needing a proxy to reach outside repositories. # This needs to be moved to a shell script like above in the future to # properly only remove the proxy variables only when they need to be removed -RUN if [ -n "$REPO_FILE_URL" ]; then direct="${REPO_FILE_URL##*//}; "\ - direct="${direct%%/*}"; export no_proxy="${direct}"; fi; \ - dnf upgrade && \ +RUN dnf upgrade && \ /tmp/install.sh && \ dnf clean all && \ rm -f /tmp/install.sh @@ -96,8 +104,6 @@ ARG DAOS_PACKAGES_BUILD=yes # The dnf upgrade can add or re-enable distro repositories. USER root:root RUN [ "$DAOS_DEPS_BUILD" != "yes" ] || { \ - if [ -n "$REPO_FILE_URL" ]; then direct="${REPO_FILE_URL##*//}; " \ - direct="${direct%%/*}"; export no_proxy="${direct}"; fi; \ dnf upgrade --exclude=fuse,fuse-libs,fuse-devel,libraft0,raft-devel,mercury,mercury-devel && \ dnf clean all; \ } @@ -106,8 +112,6 @@ USER daos_server:daos_server ARG DEPS_JOBS=1 RUN [ "$DAOS_DEPS_BUILD" != "yes" ] || { \ - if [ -n "$REPO_FILE_URL" ]; then direct="${REPO_FILE_URL##*//}; "\ - direct="${direct%%/*}"; export no_proxy="${direct}"; fi; \ scons --build-deps=only --jobs $DEPS_JOBS PREFIX=/opt/daos \ TARGET_TYPE=$DAOS_TARGET_TYPE && \ ([ "$DAOS_KEEP_BUILD" != "no" ] || /bin/rm -rf build *.gz); \ @@ -140,8 +144,6 @@ RUN if [ "$COMPILER" = "icc" ]; then rpm --import https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB && \ dnf config-manager --add-repo https://yum.repos.intel.com/oneapi oneAPI; \ fi; \ - if [ -n "$REPO_FILE_URL" ]; then direct="${REPO_FILE_URL##*//}; "\ - direct="${direct%%/*}"; export no_proxy="${direct}"; fi; \ dnf install intel-oneapi-compiler-dpcpp-cpp && \ dnf clean all; \ fi @@ -149,8 +151,6 @@ RUN if [ "$COMPILER" = "icc" ]; then # force an upgrade to get any newly built RPMs, but only if CB1 is set. ARG CB1 RUN [ -z "$CB1" ] || { \ - if [ -n "$REPO_FILE_URL" ]; then direct="${REPO_FILE_URL##*//}; " \ - direct="${direct%%/*}"; export no_proxy="${direct}"; fi; \ dnf upgrade --exclude=fuse,fuse-libs,fuse-devel,libraft0,raft-devel,mercury,mercury-devel && \ dnf clean all; \ } @@ -175,8 +175,6 @@ ARG DAOS_BUILD=$DAOS_DEPS_BUILD # Build DAOS RUN [ "$DAOS_BUILD" != "yes" ] || { \ - if [ -n "$REPO_FILE_URL" ]; then direct="${REPO_FILE_URL##*//}; " \ - direct="${direct%%/*}"; export no_proxy="${direct}"; fi; \ scons --jobs $JOBS install PREFIX=/opt/daos COMPILER=$COMPILER \ BUILD_TYPE=$DAOS_BUILD_TYPE TARGET_TYPE=$DAOS_TARGET_TYPE \ FIRMWARE_MGMT=1 && \ @@ -205,8 +203,6 @@ WORKDIR /home/daos/daos/src/client/java ARG DAOS_JAVA_BUILD=$DAOS_BUILD RUN [ "$DAOS_JAVA_BUILD" != "yes" ] || { \ - if [ -n "$REPO_FILE_URL" ]; then direct="${REPO_FILE_URL##*//}; " \ - direct="${direct%%/*}"; export no_proxy="${direct}"; fi; \ mkdir /home/daos/.m2 && \ cp /home/daos/daos/utils/scripts/helpers/maven-settings.xml.in /home/daos/.m2/settings.xml && \ mvn clean install -ntp -T 1C -DskipITs -Dgpg.skip -Ddaos.install.path=/opt/daos; \ diff --git a/utils/docker/Dockerfile.ubuntu b/utils/docker/Dockerfile.ubuntu index b3790b5f980..589c3f3dd9d 100644 --- a/utils/docker/Dockerfile.ubuntu +++ b/utils/docker/Dockerfile.ubuntu @@ -22,6 +22,16 @@ ENV DEBIAN_FRONTEND=noninteractive # Install basic tools ARG DAOS_LAB_CA_FILE_URL + +# Accept DAOS_NO_PROXY at build time +ARG DAOS_NO_PROXY +# Propagate into the build environment +ENV no_proxy=${DAOS_NO_PROXY} +ENV NO_PROXY=${DAOS_NO_PROXY} +# Persist into /etc/environment for use by shells and services +RUN echo "no_proxy=${DAOS_NO_PROXY}" >> /etc/environment && \ + echo "NO_PROXY=${DAOS_NO_PROXY}" >> /etc/environment + # script to setup local repo if available and install packages COPY ./utils/scripts/helpers/repo-helper-ubuntu.sh /tmp/repo-helper.sh COPY ./utils/scripts/install-ubuntu.sh /tmp/install.sh diff --git a/utils/scripts/helpers/repo-helper-el8.sh b/utils/scripts/helpers/repo-helper-el8.sh index 88bcf6654aa..3572699d634 100755 --- a/utils/scripts/helpers/repo-helper-el8.sh +++ b/utils/scripts/helpers/repo-helper-el8.sh @@ -74,8 +74,8 @@ if [ -n "$REPO_FILE_URL" ]; then popd # These may have been created in the Dockerfile must be removed # when using a local repository. - unset HTTPS_PROXY - unset https_proxy + # unset HTTPS_PROXY + # unset https_proxy fi dnf -y --disablerepo \*epel\* install dnf-plugins-core dnf -y config-manager --save --setopt=assumeyes=True @@ -130,10 +130,9 @@ disable_repos /etc/yum.repos.d/ "${save_repos[@]}" if [ -n "$REPO_FILE_URL" ]; then trusted_host="${REPO_FILE_URL##*//}" trusted_host="${trusted_host%%/*}"; \ - { - echo "[global]" - echo "trusted-host = ${trusted_host}" - echo "index-url = https://${trusted_host}/artifactory/api/pypi/pypi-proxy/simple" - echo "proxy = \"\"" - } > /etc/pip.conf + cat < /etc/pip.conf +[global] + trusted-host = ${trusted_host} + index-url = https://${trusted_host}/artifactory/api/pypi/pypi-proxy/simple +EOF fi diff --git a/utils/scripts/helpers/repo-helper-el9.sh b/utils/scripts/helpers/repo-helper-el9.sh index f96a144b674..b595d5d5c15 100644 --- a/utils/scripts/helpers/repo-helper-el9.sh +++ b/utils/scripts/helpers/repo-helper-el9.sh @@ -126,10 +126,9 @@ disable_repos /etc/yum.repos.d/ "${save_repos[@]}" if [ -n "$REPO_FILE_URL" ]; then trusted_host="${REPO_FILE_URL##*//}" trusted_host="${trusted_host%%/*}"; \ - { - echo "[global]" - echo "trusted-host = ${trusted_host}" - echo "index-url = https://${trusted_host}/artifactory/api/pypi/pypi-proxy/simple" - echo "proxy = " - } > /etc/pip.conf + cat < /etc/pip.conf +[global] + trusted-host = ${trusted_host} + index-url = https://${trusted_host}/artifactory/api/pypi/pypi-proxy/simple +EOF fi diff --git a/utils/scripts/helpers/repo-helper-leap15.sh b/utils/scripts/helpers/repo-helper-leap15.sh index ab01f2cda51..98ced4e82ea 100755 --- a/utils/scripts/helpers/repo-helper-leap15.sh +++ b/utils/scripts/helpers/repo-helper-leap15.sh @@ -174,10 +174,9 @@ update-ca-certificates if [ -n "$REPO_FILE_URL" ]; then trusted_host="${REPO_FILE_URL##*//}" trusted_host="${trusted_host%%/*}"; \ - { - echo "[global]" - echo "trusted-host = ${trusted_host}" - echo "index-url = https://${trusted_host}/artifactory/api/pypi/pypi-proxy/simple" - echo "proxy = \"\"" - } > /etc/pip.conf + cat < /etc/pip.conf +[global] + trusted-host = ${trusted_host} + index-url = https://${trusted_host}/artifactory/api/pypi/pypi-proxy/simple +EOF fi diff --git a/utils/scripts/helpers/repo-helper-ubuntu.sh b/utils/scripts/helpers/repo-helper-ubuntu.sh index 32cf3663065..c7738a982de 100644 --- a/utils/scripts/helpers/repo-helper-ubuntu.sh +++ b/utils/scripts/helpers/repo-helper-ubuntu.sh @@ -111,10 +111,9 @@ apt-get clean all if [ -n "$REPO_FILE_URL" ]; then trusted_host="${REPO_FILE_URL##*//}" trusted_host="${trusted_host%%/*}"; \ - { - echo "[global]" - echo "trusted-host = ${trusted_host}" - echo "index-url = https://${trusted_host}/artifactory/api/pypi/pypi-proxy/simple" - echo "proxy = \"\"" - } > /etc/pip.conf + cat < /etc/pip.conf +[global] + trusted-host = ${trusted_host} + index-url = https://${trusted_host}/artifactory/api/pypi/pypi-proxy/simple +EOF fi From 57d09544ee608838e25b4cc30ed31db6bf8e6ada Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Tue, 25 Nov 2025 14:47:13 -0500 Subject: [PATCH 036/253] DAOS-18116 test: Fix pool/mem_ratio.py test (#17065) Search the server control log file for requested NVMe capacity too small allowable errors instead of the server log files. Signed-off-by: Phil Henderson --- src/tests/ftest/pool/mem_ratio.py | 23 +++++++++++------- src/tests/ftest/util/server_utils.py | 36 ++++++++++++++++++++++------ 2 files changed, 43 insertions(+), 16 deletions(-) diff --git a/src/tests/ftest/pool/mem_ratio.py b/src/tests/ftest/pool/mem_ratio.py index 822e393ea2b..64b86358951 100644 --- a/src/tests/ftest/pool/mem_ratio.py +++ b/src/tests/ftest/pool/mem_ratio.py @@ -22,16 +22,21 @@ def check_insufficient_size(self, error): Args: error (Exception): the error raised during pool creation """ - allowed_errors = [ - "Insufficient scm size", - "No space on storage target", - "requested NVMe capacity too small"] - pattern = f"({'|'.join(allowed_errors)})" self.log.debug("Verifying Pool creation failure: %s", error) - result = self.server_managers[0].search_engine_logs(pattern) - if not result.passed: - raise error - self.log.debug("Pool create failure expected due to: '%s'", pattern) + pattern_methods = ( + ("(Insufficient scm size|No space on storage target)", + self.server_managers[0].search_engine_logs), + ("requested NVMe capacity too small", + self.server_managers[0].search_control_logs) + ) + for pattern, method in pattern_methods: + result = method(pattern) + for data in result.output: + if data.passed and not data.timeout: + # Expected failure detected in at least one of the logs + self.log.debug("Pool create failure expected due to: '%s'", pattern) + return + raise error @staticmethod def readable_bytes(size): diff --git a/src/tests/ftest/util/server_utils.py b/src/tests/ftest/util/server_utils.py index f6d3223257a..42581f0d43e 100644 --- a/src/tests/ftest/util/server_utils.py +++ b/src/tests/ftest/util/server_utils.py @@ -1198,18 +1198,40 @@ def get_vos_files(self, pool, pattern="vos"): return vos_files def search_engine_logs(self, pattern): - """Search the server logs for a specific pattern. + """Search the server log files for a specific pattern. Args: - pattern (str): The pattern to search for in the logs. + pattern (str): The pattern to search for in the log files. Returns: - CommandResult: Result of the grep command run against each server log. + CommandResult: Result of the grep command run against each server log file. """ - # Get the path of one of the server log files log_dir = os.path.dirname(self.get_config_value("log_file")) - command = (f"find {log_dir} -type f -regextype egrep " - r"-regex '.*/daos_server[[:digit:]]?\.log\.[[:digit:]]+' -print0 " - f"| xargs -0 -r grep -E -e '{pattern}'") + find_args = (f"{log_dir} -type f -regextype egrep -regex " + r"'.*/daos_server[[:digit:]]?\.log\.[[:digit:]]+'") + return self._search_logs(find_args, pattern) + + def search_control_logs(self, pattern): + """Search the control log files for a specific pattern. + + Args: + pattern (str): The pattern to search for in the log files + + Returns: + CommandResult: Result of the grep command run against each control log file. + """ + return self._search_logs(f"{self.get_config_value('control_log_file')}", pattern) + + def _search_logs(self, find_args, pattern): + """Search the log files for a specific pattern. + + Args: + find_args (str): arguments used with the find command to locate the log files + pattern (str): The pattern to search for in the log files + + Returns: + CommandResult: Result of the grep command run against each log file. + """ + command = f"find {find_args} -print0 | xargs -0 -r grep -E -e '{pattern}'" result = run_remote(self.log, self.hosts, command_as_user(command, "root")) return result From 0b303732b715f23297a486e7b697c79a5593b3e3 Mon Sep 17 00:00:00 2001 From: Jeff Olivier Date: Tue, 25 Nov 2025 17:25:47 -0700 Subject: [PATCH 037/253] DAOS-17207 build: Upgrade isa-l_crypto to 2.25.0 (#17090) * SPDK 24.09 uses this version * Avoid deprecated APIs as it causes compiler warnings * Add required `nasm` package to install scripts * Add isa-l_crypto.changelog Signed-off-by: Jeff Olivier --- src/common/multihash_isal.c | 119 +++++++++++------- .../dlck/tests/fault_injection_dlck.yaml | 1 + utils/build.config | 2 +- utils/rpms/daos.changelog | 3 + utils/rpms/daos.sh | 2 + utils/rpms/daos.spec | 2 +- utils/rpms/isa-l_crypto.changelog | 24 ++++ utils/rpms/isa-l_crypto.sh | 1 + utils/rpms/package_info.sh | 4 +- utils/scripts/install-el8.sh | 1 + utils/scripts/install-el9.sh | 1 + utils/scripts/install-leap15.sh | 1 + utils/scripts/install-ubuntu.sh | 1 + 13 files changed, 115 insertions(+), 47 deletions(-) create mode 100644 utils/rpms/isa-l_crypto.changelog diff --git a/src/common/multihash_isal.c b/src/common/multihash_isal.c index 6b858e23c9c..cf293507bce 100644 --- a/src/common/multihash_isal.c +++ b/src/common/multihash_isal.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2020-2021 Intel Corporation. + * (C) Copyright 2025 Google LLC * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -237,6 +238,13 @@ crc64_finish(void *daos_mhash_ctx, uint8_t *buf, size_t buf_len) return 0; } +#define HANDLE_ERROR(statement) \ + ({ \ + int __rc__ = statement; \ + D_ASSERTF(__rc__ == 0, #statement " = %d\n", __rc__); \ + __rc__ == 0 ? 0 : -DER_INVAL; \ + }) + struct hash_ft crc64_algo = { .cf_update = crc64_update, .cf_init = crc64_init, @@ -250,7 +258,7 @@ struct hash_ft crc64_algo = { /** SHA1 */ struct sha1_ctx { - struct mh_sha1_ctx s1_ctx; + struct isal_mh_sha1_ctx s1_ctx; bool s1_updated; }; @@ -264,10 +272,14 @@ sha1_init(void **daos_mhash_ctx) if (ctx == NULL) return -DER_NOMEM; - rc = mh_sha1_init(&ctx->s1_ctx); - if (rc == 0) - *daos_mhash_ctx = ctx; - return rc; + rc = isal_mh_sha1_init(&ctx->s1_ctx); + if (rc != 0) { + D_FREE(ctx); + return HANDLE_ERROR(rc); + } + *daos_mhash_ctx = ctx; + + return 0; } static int @@ -276,7 +288,7 @@ sha1_reset(void *daos_mhash_ctx) struct sha1_ctx *ctx = daos_mhash_ctx; ctx->s1_updated = false; - return mh_sha1_init(&ctx->s1_ctx); + return HANDLE_ERROR(isal_mh_sha1_init(&ctx->s1_ctx)); } static void @@ -291,7 +303,7 @@ sha1_update(void *daos_mhash_ctx, uint8_t *buf, size_t buf_len) struct sha1_ctx *ctx = daos_mhash_ctx; ctx->s1_updated = true; - return mh_sha1_update(&ctx->s1_ctx, buf, buf_len); + return HANDLE_ERROR(isal_mh_sha1_update(&ctx->s1_ctx, buf, buf_len)); } static int @@ -300,7 +312,7 @@ sha1_finish(void *daos_mhash_ctx, uint8_t *buf, size_t buf_len) struct sha1_ctx *ctx = daos_mhash_ctx; if (ctx->s1_updated) - return mh_sha1_finalize(&ctx->s1_ctx, buf); + return HANDLE_ERROR(isal_mh_sha1_finalize(&ctx->s1_ctx, buf)); return 0; } @@ -317,8 +329,8 @@ struct hash_ft sha1_algo = { /** SHA256 */ struct sha256_ctx { - struct mh_sha256_ctx s2_ctx; - bool s2_updated; + struct isal_mh_sha256_ctx s2_ctx; + bool s2_updated; }; static int @@ -331,10 +343,16 @@ sha256_init(void **daos_mhash_ctx) if (ctx == NULL) return -DER_NOMEM; - rc = mh_sha256_init(&ctx->s2_ctx); - if (rc == 0) - *daos_mhash_ctx = ctx; - return rc; + rc = isal_mh_sha256_init(&ctx->s2_ctx); + if (rc != 0) { + D_FREE(ctx); + return HANDLE_ERROR(rc); + } + + *daos_mhash_ctx = ctx; + ctx->s2_updated = false; + + return 0; } static int @@ -343,7 +361,7 @@ sha256_reset(void *daos_mhash_ctx) struct sha256_ctx *ctx = daos_mhash_ctx; ctx->s2_updated = false; - return mh_sha256_init(&ctx->s2_ctx); + return HANDLE_ERROR(isal_mh_sha256_init(&ctx->s2_ctx)); } static void @@ -358,7 +376,7 @@ sha256_update(void *daos_mhash_ctx, uint8_t *buf, size_t buf_len) struct sha256_ctx *ctx = daos_mhash_ctx; ctx->s2_updated = true; - return mh_sha256_update(&ctx->s2_ctx, buf, buf_len); + return HANDLE_ERROR(isal_mh_sha256_update(&ctx->s2_ctx, buf, buf_len)); } static int @@ -367,7 +385,7 @@ sha256_finish(void *daos_mhash_ctx, uint8_t *buf, size_t buf_len) struct sha256_ctx *ctx = daos_mhash_ctx; if (ctx->s2_updated) - return mh_sha256_finalize(&ctx->s2_ctx, buf); + return HANDLE_ERROR(isal_mh_sha256_finalize(&ctx->s2_ctx, buf)); return 0; } @@ -384,22 +402,28 @@ struct hash_ft sha256_algo = { /** SHA512 */ struct sha512_ctx { - SHA512_HASH_CTX_MGR s5_mgr; - SHA512_HASH_CTX s5_ctx; - bool s5_updated; + ISAL_SHA512_HASH_CTX_MGR s5_mgr; + ISAL_SHA512_HASH_CTX s5_ctx; + bool s5_updated; }; static int sha512_init(void **daos_mhash_ctx) { struct sha512_ctx *ctx; + int rc; D_ALLOC_PTR(ctx); if (ctx == NULL) return -DER_NOMEM; - sha512_ctx_mgr_init(&ctx->s5_mgr); - hash_ctx_init(&ctx->s5_ctx); + rc = isal_sha512_ctx_mgr_init(&ctx->s5_mgr); + if (rc != 0) { + D_FREE(ctx); + return HANDLE_ERROR(rc); + } + isal_hash_ctx_init(&ctx->s5_ctx); + ctx->s5_updated = false; *daos_mhash_ctx = ctx; return 0; @@ -417,6 +441,8 @@ sha512_reset(void *daos_mhash_ctx) struct sha512_ctx *ctx = daos_mhash_ctx; ctx->s5_updated = false; + isal_hash_ctx_init(&ctx->s5_ctx); + return 0; } @@ -424,48 +450,55 @@ static int sha512_update(void *daos_mhash_ctx, uint8_t *buf, size_t buf_len) { struct sha512_ctx *ctx = daos_mhash_ctx; - SHA512_HASH_CTX *tmp; + ISAL_SHA512_HASH_CTX *tmp = NULL; + int rc; if (!ctx->s5_updated) - tmp = sha512_ctx_mgr_submit(&ctx->s5_mgr, - &ctx->s5_ctx, buf, - buf_len, - HASH_FIRST); + rc = isal_sha512_ctx_mgr_submit(&ctx->s5_mgr, &ctx->s5_ctx, &tmp, buf, buf_len, + ISAL_HASH_FIRST); else - tmp = sha512_ctx_mgr_submit(&ctx->s5_mgr, - &ctx->s5_ctx, buf, - buf_len, - HASH_UPDATE); + rc = isal_sha512_ctx_mgr_submit(&ctx->s5_mgr, &ctx->s5_ctx, &tmp, buf, buf_len, + ISAL_HASH_UPDATE); + + if (rc != 0) + return HANDLE_ERROR(rc); - if (tmp == NULL) - sha512_ctx_mgr_flush(&ctx->s5_mgr); + if (tmp == NULL) { + rc = isal_sha512_ctx_mgr_flush(&ctx->s5_mgr, &tmp); + if (rc != 0) + return HANDLE_ERROR(rc); + } ctx->s5_updated = true; - return ctx->s5_ctx.error; + return HANDLE_ERROR(ctx->s5_ctx.error); } static int sha512_finish(void *daos_mhash_ctx, uint8_t *buf, size_t buf_len) { struct sha512_ctx *ctx = daos_mhash_ctx; + int rc = 0; if (ctx->s5_updated) { - SHA512_HASH_CTX *tmp; + ISAL_SHA512_HASH_CTX *tmp = NULL; - tmp = sha512_ctx_mgr_submit(&ctx->s5_mgr, - &ctx->s5_ctx, NULL, - 0, - HASH_LAST); + rc = isal_sha512_ctx_mgr_submit(&ctx->s5_mgr, &ctx->s5_ctx, &tmp, NULL, 0, + ISAL_HASH_LAST); + if (rc != 0) + return HANDLE_ERROR(rc); - if (tmp == NULL) - sha512_ctx_mgr_flush(&ctx->s5_mgr); + if (tmp == NULL) { + rc = isal_sha512_ctx_mgr_flush(&ctx->s5_mgr, &tmp); + if (rc != 0) + return HANDLE_ERROR(rc); + } memcpy(buf, ctx->s5_ctx.job.result_digest, buf_len); - return ctx->s5_ctx.error; + rc = ctx->s5_ctx.error; } - return 0; + return HANDLE_ERROR(rc); } struct hash_ft sha512_algo = { diff --git a/src/utils/dlck/tests/fault_injection_dlck.yaml b/src/utils/dlck/tests/fault_injection_dlck.yaml index 36cb8095976..8dd036f3ce0 100644 --- a/src/utils/dlck/tests/fault_injection_dlck.yaml +++ b/src/utils/dlck/tests/fault_injection_dlck.yaml @@ -1,4 +1,5 @@ # Uncomment a fault you would like to trigger +# yamllint disable rule:comments-indentation fault_config: # - id: 131328 # DLCK_FAULT_CREATE_LOG_DIR # - id: 131329 # DLCK_FAULT_CREATE_POOL_DIR diff --git a/utils/build.config b/utils/build.config index 7aa44484c0f..735a160be05 100644 --- a/utils/build.config +++ b/utils/build.config @@ -6,7 +6,7 @@ argobots=v1.2 fused=v1.0.0 pmdk=2.1.2 isal=v2.31.1 -isal_crypto=v2.24.0 +isal_crypto=v2.25.0 spdk=v22.01.2 ofi=v1.22.0 mercury=v2.4.0 diff --git a/utils/rpms/daos.changelog b/utils/rpms/daos.changelog index 59172d6c460..312526fc063 100644 --- a/utils/rpms/daos.changelog +++ b/utils/rpms/daos.changelog @@ -1,4 +1,7 @@ %changelog +* Mon Nov 24 2025 Jeff Olivier 2.7.102-2 +- Require isal_crypto 2.25.0 due to API deprecation warnings + * Fri Nov 17 2025 Phillip Henderson 2.7.102-1 - Bump version to 2.7.102 diff --git a/utils/rpms/daos.sh b/utils/rpms/daos.sh index f6d4a97df42..6a79e0ccfaa 100755 --- a/utils/rpms/daos.sh +++ b/utils/rpms/daos.sh @@ -65,6 +65,7 @@ install_list+=("${tmp}${sysconfdir}/daos/certs=${sysconfdir}/daos") EXTRA_OPTS+=("--rpm-attr" "0755,root,root:${sysconfdir}/daos/certs") DEPENDS=( "mercury >= ${mercury_full}" "${libfabric_lib} >= ${libfabric_full}" ) +DEPENDS+=( "${isal_crypto_lib} >= ${isal_crypto_version}" ) build_package "daos" # Only build server RPMs if we built the server @@ -180,6 +181,7 @@ EOF DEPENDS=( "daos = ${VERSION}-${RELEASE}" "daos-spdk = ${daos_spdk_full}" ) DEPENDS+=( "${pmemobj_lib} >= ${pmdk_full}" "${argobots_lib} >= ${argobots_full}" ) + DEPENDS+=( "${isal_crypto_lib} >= ${isal_crypto_version}" ) build_package "daos-server" TARGET_PATH="${bindir}" diff --git a/utils/rpms/daos.spec b/utils/rpms/daos.spec index ec834343eda..2f54a5d9624 100644 --- a/utils/rpms/daos.spec +++ b/utils/rpms/daos.spec @@ -25,7 +25,7 @@ Name: daos Version: 2.7.102 -Release: 1%{?relval}%{?dist} +Release: 2%{?relval}%{?dist} Summary: DAOS Storage Engine License: BSD-2-Clause-Patent diff --git a/utils/rpms/isa-l_crypto.changelog b/utils/rpms/isa-l_crypto.changelog new file mode 100644 index 00000000000..fa390eeb012 --- /dev/null +++ b/utils/rpms/isa-l_crypto.changelog @@ -0,0 +1,24 @@ +* Thu Nov 6 2025 Jeff Olivier - 2.25.0-1 +- Update DAOS to 2.25.0 release +- Restore change log missing since 2.24.0-1 + +* Wed Sep 10 2025 Jeff Olivier - 2.24.0-2 +- Use fpm to build isa-l_crypto + +* Thu Jun 22 2023 Brian J. Murrell - 2.24.0-1 +- Update to new version +- Disable static library build +- Add debuginfo generation for Leap 15 + +* Mon Feb 01 2021 Brian J. Murrell - 2.23.0-1 +- Update to new version +- Add %%{_libdir}/pkgconfig/libisal_crypto.pc to -devel package + +* Wed Oct 02 2019 John E. Malmberg - 2.21.0-3 +- Fix the Red Hat family devel package name. + +* Wed Oct 02 2019 John E. Malmberg - 2.21.0-2 +- Fix some SUSE rpmlint packaging complaints + +* Fri Aug 16 2019 Ryon Jensen - 2.21.0-1 +- initial package diff --git a/utils/rpms/isa-l_crypto.sh b/utils/rpms/isa-l_crypto.sh index 5fbe9a7eee3..32c6910cf45 100755 --- a/utils/rpms/isa-l_crypto.sh +++ b/utils/rpms/isa-l_crypto.sh @@ -26,6 +26,7 @@ SHA1, SHA256, SHA512, MD5) Provides various algorithms for erasure coding, crc, raid, compression and decompression" URL="https://github.com/intel/isa-l_crypto" +RPM_CHANGELOG="isa-l_crypto.changelog" files=() TARGET_PATH="${libdir}" diff --git a/utils/rpms/package_info.sh b/utils/rpms/package_info.sh index d4c5eba3e3d..0d31aa6ec60 100644 --- a/utils/rpms/package_info.sh +++ b/utils/rpms/package_info.sh @@ -49,8 +49,8 @@ export pmdk_full="${pmdk_version}-${pmdk_release}" export isal_version="2.31.1" export isal_release="8${distro_name}" export isal_full="${isal_version}-${isal_release}" -export isal_crypto_version="2.24.0" -export isal_crypto_release="3${distro_name}" +export isal_crypto_version="2.25.0" +export isal_crypto_release="1${distro_name}" export isal_crypto_full="${isal_crypto_version}-${isal_crypto_release}" export daos_spdk_version="1.0.0" export daos_spdk_release="4${distro_name}" diff --git a/utils/scripts/install-el8.sh b/utils/scripts/install-el8.sh index cb51c8a7f65..5b1239dc094 100755 --- a/utils/scripts/install-el8.sh +++ b/utils/scripts/install-el8.sh @@ -59,6 +59,7 @@ dnf --nodocs install ${dnf_install_args} \ Lmod \ lz4-devel \ make \ + nasm \ ndctl \ ndctl-devel \ numactl \ diff --git a/utils/scripts/install-el9.sh b/utils/scripts/install-el9.sh index 268f1c109ca..355272f1c63 100755 --- a/utils/scripts/install-el9.sh +++ b/utils/scripts/install-el9.sh @@ -59,6 +59,7 @@ dnf --nodocs install ${dnf_install_args} \ lz4-devel \ Lmod \ make \ + nasm \ ndctl \ ndctl-devel \ numactl \ diff --git a/utils/scripts/install-leap15.sh b/utils/scripts/install-leap15.sh index ae859e4fffb..87447d05695 100755 --- a/utils/scripts/install-leap15.sh +++ b/utils/scripts/install-leap15.sh @@ -60,6 +60,7 @@ dnf --nodocs install ${dnf_install_args} \ lua-lmod \ make \ maven \ + nasm \ numactl \ openmpi3-devel \ pandoc \ diff --git a/utils/scripts/install-ubuntu.sh b/utils/scripts/install-ubuntu.sh index 8c41006d70b..0c4dbae59e6 100755 --- a/utils/scripts/install-ubuntu.sh +++ b/utils/scripts/install-ubuntu.sh @@ -53,6 +53,7 @@ apt-get install ${apt_get_install_args} \ libyaml-dev \ locales \ maven \ + nasm \ numactl \ openjdk-8-jdk \ pandoc \ From f42142f60009149f5eae2d51f7674b3763391193 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Wed, 26 Nov 2025 15:56:31 +0800 Subject: [PATCH 038/253] DAOS-18280 test: Fix container_rf test failure (#17175) pass correct str arg to verify rd_fac function. Signed-off-by: Wang Shilong --- src/tests/ftest/util/container_rf_test_base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tests/ftest/util/container_rf_test_base.py b/src/tests/ftest/util/container_rf_test_base.py index f9af2673faa..045f77d3ef4 100644 --- a/src/tests/ftest/util/container_rf_test_base.py +++ b/src/tests/ftest/util/container_rf_test_base.py @@ -166,9 +166,9 @@ def execute_cont_rf_test(self, create_container=True, mode=None): # Verify the rank to be excluded has at least one object self.verify_rank_has_objects() # Start the rebuild process - self.start_rebuild_cont_rf(rd_fac) + self.start_rebuild_cont_rf(rf_match.group(1)) # Execute the test steps during rebuild - self.execute_during_rebuild_cont_rf(rd_fac, expect_cont_status) + self.execute_during_rebuild_cont_rf(rf_match.group(1), expect_cont_status) # Refresh local pool and container self.log.info("==>(6)Check for pool and container info after rebuild.") self.pool.check_pool_info() From dcf310565633de6879c24087d980e6f0ea2e5822 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 26 Nov 2025 07:44:59 -0800 Subject: [PATCH 039/253] DAOS-18277 cq: bump GHA versions (#17163) Updates `actions/checkout` from 5.0.0 to 6.0.0 Updates `github/codeql-action` from 4.31.2 to 4.31.4 Updates `dorny/test-reporter` from 2.1.1 to 2.2.0 Signed-off-by: dependabot[bot] --- .github/workflows/bash_unit_testing.yml | 4 ++-- .github/workflows/bullseye-coverage.yml | 8 +++---- .github/workflows/ci2.yml | 4 ++-- .github/workflows/create_release.yml | 2 +- .github/workflows/landing-builds.yml | 10 ++++----- .github/workflows/linting.yml | 22 +++++++++---------- .github/workflows/ossf-scorecard.yml | 4 ++-- .github/workflows/pr-metadata.yml | 2 +- .../workflows/rpm-build-and-test-report.yml | 4 ++-- .github/workflows/rpm-build-and-test.yml | 8 +++---- .github/workflows/trivy.yml | 4 ++-- .github/workflows/unit-testing.yml | 2 +- 12 files changed, 37 insertions(+), 37 deletions(-) diff --git a/.github/workflows/bash_unit_testing.yml b/.github/workflows/bash_unit_testing.yml index 872d11a1314..6cd7e554a5f 100644 --- a/.github/workflows/bash_unit_testing.yml +++ b/.github/workflows/bash_unit_testing.yml @@ -20,11 +20,11 @@ jobs: runs-on: [self-hosted, light] steps: - name: Checkout code - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 with: ref: ${{ github.event.pull_request.head.sha }} - name: Checkout bash_unit project - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 with: repository: 'pgrange/bash_unit' path: bash_unit diff --git a/.github/workflows/bullseye-coverage.yml b/.github/workflows/bullseye-coverage.yml index 92826fb5b56..e3a4dd5201f 100644 --- a/.github/workflows/bullseye-coverage.yml +++ b/.github/workflows/bullseye-coverage.yml @@ -109,7 +109,7 @@ jobs: matrix: ${{ steps.matrix.outputs.text }} steps: - name: Checkout code - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 with: ref: ${{ github.event.pull_request.head.sha }} - name: Import commit pragmas @@ -235,7 +235,7 @@ jobs: COMMIT_STATUS_DISTRO_VERSION: steps: - name: Checkout code - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 with: submodules: 'recursive' fetch-depth: 500 @@ -409,7 +409,7 @@ jobs: matrix: ${{ steps.matrix.outputs.text }} steps: - name: Checkout code - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 with: ref: ${{ github.event.pull_request.head.sha }} - name: Import commit pragmas @@ -519,7 +519,7 @@ jobs: SIZE: steps: - name: Checkout code - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 with: submodules: 'recursive' fetch-depth: 500 diff --git a/.github/workflows/ci2.yml b/.github/workflows/ci2.yml index 3832de6f163..8533cea6fbd 100644 --- a/.github/workflows/ci2.yml +++ b/.github/workflows/ci2.yml @@ -34,7 +34,7 @@ jobs: DOCKER_BASE: ${{ matrix.base }} steps: - name: Checkout code - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 with: submodules: true fetch-depth: 500 @@ -100,7 +100,7 @@ jobs: COMPILER: ${{ matrix.compiler }} steps: - name: Checkout code - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 with: submodules: true fetch-depth: 500 diff --git a/.github/workflows/create_release.yml b/.github/workflows/create_release.yml index 54f0b0e95db..1c04bf70022 100644 --- a/.github/workflows/create_release.yml +++ b/.github/workflows/create_release.yml @@ -18,7 +18,7 @@ jobs: permissions: contents: write steps: - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 with: fetch-depth: 2 - uses: ./.github/actions/make_release diff --git a/.github/workflows/landing-builds.yml b/.github/workflows/landing-builds.yml index b083d8d300a..d9e2ff2aa32 100644 --- a/.github/workflows/landing-builds.yml +++ b/.github/workflows/landing-builds.yml @@ -64,7 +64,7 @@ jobs: DOCKER_BASE: ${{ matrix.base }} steps: - name: Checkout code - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 with: submodules: 'recursive' fetch-depth: 500 @@ -112,7 +112,7 @@ jobs: COMPILER: clang steps: - name: Checkout code - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 with: submodules: 'recursive' fetch-depth: 500 @@ -181,7 +181,7 @@ jobs: COMPILER: ${{ matrix.compiler }} steps: - name: Checkout code - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 with: submodules: 'recursive' fetch-depth: 500 @@ -255,7 +255,7 @@ jobs: BASE_DISTRO: ${{ matrix.with }} steps: - name: Checkout code - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 with: submodules: 'recursive' fetch-depth: 500 @@ -344,7 +344,7 @@ jobs: COMPILER: ${{ matrix.compiler }} steps: - name: Checkout code - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 with: submodules: 'recursive' fetch-depth: 500 diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index fe3db350144..4121d0f8653 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -25,7 +25,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout code - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 with: ref: ${{ github.event.pull_request.head.sha }} - name: Set up Python environment @@ -48,7 +48,7 @@ jobs: runs-on: ubuntu-24.04 steps: - name: Checkout code - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 with: ref: ${{ github.event.pull_request.head.sha }} - name: Run @@ -66,7 +66,7 @@ jobs: runs-on: ubuntu-24.04 steps: - name: Checkout code - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 with: ref: ${{ github.event.pull_request.head.sha }} - name: Check DAOS logging macro use. @@ -77,7 +77,7 @@ jobs: runs-on: ubuntu-24.04 steps: - name: Checkout code - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 - name: Check DAOS ftest tags. run: \[ ! -x src/tests/ftest/tags.py \] || ./src/tests/ftest/tags.py lint --verbose @@ -86,7 +86,7 @@ jobs: name: Flake8 check steps: - name: Check out source repository - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 with: ref: ${{ github.event.pull_request.head.sha }} - name: Set up Python environment @@ -119,7 +119,7 @@ jobs: runs-on: ubuntu-24.04 steps: - name: Checkout code - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 with: ref: ${{ github.event.pull_request.head.sha }} - name: Install doxygen @@ -140,7 +140,7 @@ jobs: runs-on: ubuntu-24.04 steps: - name: Checkout code - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 with: ref: ${{ github.event.pull_request.head.sha }} - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0 @@ -160,7 +160,7 @@ jobs: runs-on: ubuntu-24.04 steps: - name: Checkout code - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 - name: Install extra python packages run: python3 -m pip install --requirement utils/cq/requirements.txt - name: Run check @@ -175,7 +175,7 @@ jobs: runs-on: ubuntu-24.04 steps: - name: Check out source repository - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 with: ref: ${{ github.event.pull_request.head.sha }} fetch-depth: 0 @@ -202,7 +202,7 @@ jobs: runs-on: ubuntu-24.04 steps: - name: Check out source repository - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 with: ref: ${{ github.event.pull_request.head.sha }} - name: Set up Python environment @@ -219,7 +219,7 @@ jobs: runs-on: ubuntu-24.04 steps: - name: Check out source repository - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 with: ref: ${{ github.event.pull_request.head.sha }} fetch-depth: 0 diff --git a/.github/workflows/ossf-scorecard.yml b/.github/workflows/ossf-scorecard.yml index c33fe62c222..9fcb09cfa87 100644 --- a/.github/workflows/ossf-scorecard.yml +++ b/.github/workflows/ossf-scorecard.yml @@ -33,7 +33,7 @@ jobs: steps: - name: "Checkout code" - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 with: persist-credentials: false @@ -71,6 +71,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard (optional). # Commenting out will disable upload of results to your repo's Code Scanning dashboard - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@0499de31b99561a6d14a36a5f662c2a54f91beee # v4.31.2 + uses: github/codeql-action/upload-sarif@e12f0178983d466f2f6028f5cc7a6d786fd97f4b # v4.31.4 with: sarif_file: results.sarif diff --git a/.github/workflows/pr-metadata.yml b/.github/workflows/pr-metadata.yml index 7a9a1838604..511de31eb15 100644 --- a/.github/workflows/pr-metadata.yml +++ b/.github/workflows/pr-metadata.yml @@ -19,7 +19,7 @@ jobs: name: Report Jira data to PR comment steps: - name: Checkout - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 - name: install jira run: python3 -m pip install jira - name: Load jira metadata diff --git a/.github/workflows/rpm-build-and-test-report.yml b/.github/workflows/rpm-build-and-test-report.yml index 456fbc187eb..6801977ecc4 100644 --- a/.github/workflows/rpm-build-and-test-report.yml +++ b/.github/workflows/rpm-build-and-test-report.yml @@ -93,7 +93,7 @@ jobs: esac echo "STAGE_NAME=Build RPM on $DISTRO_NAME $DISTRO_VERSION" >> $GITHUB_ENV - name: Test Report - uses: dorny/test-reporter@dc3a92680fcc15842eef52e8c4606ea7ce6bd3f3 # v2.1.1 + uses: dorny/test-reporter@7b7927aa7da8b82e81e755810cb51f39941a2cc7 # v2.2.0 with: artifact: ${{ env.STAGE_NAME }} test-results name: ${{ env.STAGE_NAME }} Test Results (dorny) @@ -112,7 +112,7 @@ jobs: - name: Set variables run: echo "STAGE_NAME=Functional Hardware ${{ matrix.stage }}" >> $GITHUB_ENV - name: Test Report - uses: dorny/test-reporter@dc3a92680fcc15842eef52e8c4606ea7ce6bd3f3 # v2.1.1 + uses: dorny/test-reporter@7b7927aa7da8b82e81e755810cb51f39941a2cc7 # v2.2.0 with: artifact: ${{ env.STAGE_NAME }} test-results name: ${{ env.STAGE_NAME }} Test Results (dorny) diff --git a/.github/workflows/rpm-build-and-test.yml b/.github/workflows/rpm-build-and-test.yml index 3132c34043d..ad3ac5ad2a2 100644 --- a/.github/workflows/rpm-build-and-test.yml +++ b/.github/workflows/rpm-build-and-test.yml @@ -118,7 +118,7 @@ jobs: matrix: ${{ steps.matrix.outputs.text }} steps: - name: Checkout code - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 with: ref: ${{ github.event.pull_request.head.sha }} - name: Import commit pragmas @@ -244,7 +244,7 @@ jobs: COMMIT_STATUS_DISTRO_VERSION: steps: - name: Checkout code - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 with: submodules: recursive fetch-depth: 500 @@ -418,7 +418,7 @@ jobs: matrix: ${{ steps.matrix.outputs.text }} steps: - name: Checkout code - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 with: ref: ${{ github.event.pull_request.head.sha }} - name: Import commit pragmas @@ -528,7 +528,7 @@ jobs: SIZE: steps: - name: Checkout code - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 with: submodules: recursive fetch-depth: 500 diff --git a/.github/workflows/trivy.yml b/.github/workflows/trivy.yml index 4a708cdcbf8..38ce98f92d6 100644 --- a/.github/workflows/trivy.yml +++ b/.github/workflows/trivy.yml @@ -33,7 +33,7 @@ jobs: security-events: write steps: - name: Checkout code - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 - name: Run Trivy vulnerability scanner in filesystem mode (table format) uses: aquasecurity/trivy-action@b6643a29fecd7f34b3597bc6acb0a98b03d33ff8 # 0.33.1 @@ -68,7 +68,7 @@ jobs: trivy-config: 'utils/trivy/trivy.yaml' - name: Upload Trivy scan results to GitHub Security tab - uses: github/codeql-action/upload-sarif@0499de31b99561a6d14a36a5f662c2a54f91beee # v4.31.2 + uses: github/codeql-action/upload-sarif@e12f0178983d466f2f6028f5cc7a6d786fd97f4b # v4.31.4 with: sarif_file: 'trivy-results.sarif' diff --git a/.github/workflows/unit-testing.yml b/.github/workflows/unit-testing.yml index ee64db399b9..425fa8079eb 100644 --- a/.github/workflows/unit-testing.yml +++ b/.github/workflows/unit-testing.yml @@ -15,7 +15,7 @@ jobs: runs-on: [self-hosted, docker] steps: - name: Checkout code - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 with: submodules: 'recursive' - name: Build deps in Docker From 390ff59debb8e930e70a2276231736658c8305f9 Mon Sep 17 00:00:00 2001 From: Niu Yawei Date: Thu, 27 Nov 2025 20:31:08 +0800 Subject: [PATCH 040/253] DAOS-18153 vos: re-initialize tcx feats on probe (#17179) The tree context feats needs be re-initialized before probe, since the crucial BTR_FEAT_EMBEDDED bit could be stale. Let's imagine following race: 1. Scrubbing ULT iterates into dkey tree when the tree is non-embedded. (the BTR_FEAT_EMBEDDED bit isn't set in persistent root feats and the context feats for scrubbing ULT) 2. Scrubbing ULT yield. 3. Discarding ULT deletes some dkeys and turns the dkey tree into embedded tree. (the BTR_FEAT_EMBEDDED bit is set to both persistent root feats and the context feats for discarding ULT) 4. Scrubbing ULT resumes and try to revalidate by probe, the feats in it's context is stale now, the btr_probe() will mistakenly perform normal probe procedure on an embedded tree. Signed-off-by: Niu Yawei --- src/common/btree.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/common/btree.c b/src/common/btree.c index 7ef46cb1d39..82afe158e60 100644 --- a/src/common/btree.c +++ b/src/common/btree.c @@ -1702,11 +1702,12 @@ btr_probe(struct btr_context *tcx, dbtree_probe_opc_t probe_opc, memset(&tcx->tc_traces[0], 0, sizeof(tcx->tc_traces[0]) * BTR_TRACE_MAX); - /* depth could be changed by dbtree_delete/dbtree_iter_delete from - * a different btr_context, so we always reinitialize both depth - * and start point of trace for the context. + /* depth & feats could be changed by dbtree_delete/dbtree_iter_delete + * from a different btr_context, so we always reinitialize both depth, + * feats and start point of trace for the context. */ btr_context_set_depth(tcx, tcx->tc_tins.ti_root->tr_depth); + tcx->tc_feats = tcx->tc_tins.ti_root->tr_feats; if (btr_root_empty(tcx)) { /* empty tree */ D_DEBUG(DB_TRACE, "Empty tree\n"); From 27c7b589a76619b18cb1b7bdf78f5f5b6496d752 Mon Sep 17 00:00:00 2001 From: Liu Xuezhao Date: Thu, 27 Nov 2025 21:04:12 +0800 Subject: [PATCH 041/253] DAOS-18157 container: treat rank as failed if all tgts failed for cont_agg_eph_sync (#17037) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The “pool exclude” does not set rank’s domain status as DOWN, so in cont_agg_eph_sync() the “map_ranks_init(pool->sp_map, PO_COMP_ST_DOWNOUT | PO_COMP_ST_DOWN, &fail_ranks)” cannot get the excluded ranks and cause the EC aggregation boundary epoch cannot be synced to other engines correctly. This patch treats rank as failed if all its targets failed to fix it. Signed-off-by: Xuezhao Liu --- src/container/srv_container.c | 2 +- src/include/daos_srv/pool.h | 3 +- src/pool/srv_util.c | 63 +++++++++++++++++++++++++++++++++++ 3 files changed, 66 insertions(+), 2 deletions(-) diff --git a/src/container/srv_container.c b/src/container/srv_container.c index e671162db5f..a9c5a9fe354 100644 --- a/src/container/srv_container.c +++ b/src/container/srv_container.c @@ -2059,7 +2059,7 @@ cont_agg_eph_sync(struct ds_pool *pool, struct cont_svc *svc) int i; int rc = 0; - rc = map_ranks_init(pool->sp_map, PO_COMP_ST_DOWNOUT | PO_COMP_ST_DOWN, &fail_ranks); + rc = map_ranks_failed(pool->sp_map, &fail_ranks); if (rc) { D_ERROR(DF_UUID ": ranks init failed: %d\n", DP_UUID(pool->sp_uuid), rc); return; diff --git a/src/include/daos_srv/pool.h b/src/include/daos_srv/pool.h index 852cadb71ea..a08c3e702c6 100644 --- a/src/include/daos_srv/pool.h +++ b/src/include/daos_srv/pool.h @@ -385,7 +385,8 @@ ds_pool_child_map_refresh_async(struct ds_pool_child *dpc); int map_ranks_init(const struct pool_map *map, unsigned int status, d_rank_list_t *ranks); - +int +map_ranks_failed(const struct pool_map *map, d_rank_list_t *ranks); void map_ranks_fini(d_rank_list_t *ranks); diff --git a/src/pool/srv_util.c b/src/pool/srv_util.c index 9b4e771dc20..f9faa28bae9 100644 --- a/src/pool/srv_util.c +++ b/src/pool/srv_util.c @@ -65,6 +65,69 @@ map_ranks_init(const struct pool_map *map, unsigned int status, d_rank_list_t *r return 0; } +static bool +all_tgts_match(struct pool_domain *rank_dom, unsigned int status) +{ + int i; + + for (i = 0; i < rank_dom->do_target_nr; i++) { + if ((status & rank_dom->do_targets[i].ta_comp.co_status) == 0) + return false; + } + + return true; +} + +/* Build failed rank list, treats the rank as DOWN if all its targets are DOWN . */ +int +map_ranks_failed(const struct pool_map *map, d_rank_list_t *ranks) +{ + struct pool_domain *domains = NULL; + unsigned int status = PO_COMP_ST_DOWNOUT | PO_COMP_ST_DOWN; + int nranks; + int n = 0; + int i; + d_rank_t *rs; + + nranks = pool_map_find_ranks((struct pool_map *)map, PO_COMP_ID_ALL, &domains); + if (nranks == 0) { + D_ERROR("no nodes in pool map\n"); + return -DER_IO; + } + + for (i = 0; i < nranks; i++) { + if ((status & domains[i].do_comp.co_status) || all_tgts_match(&domains[i], status)) + n++; + } + + if (n == 0) { + ranks->rl_nr = 0; + ranks->rl_ranks = NULL; + return 0; + } + + D_ALLOC_ARRAY(rs, n); + if (rs == NULL) + return -DER_NOMEM; + + ranks->rl_nr = n; + ranks->rl_ranks = rs; + + n = 0; + for (i = 0; i < nranks; i++) { + if ((status & domains[i].do_comp.co_status) || + all_tgts_match(&domains[i], status)) { + D_ASSERT(n < ranks->rl_nr); + ranks->rl_ranks[n] = domains[i].do_comp.co_rank; + n++; + continue; + } + } + D_ASSERTF(n == ranks->rl_nr, "%d != %u\n", n, ranks->rl_nr); + + return 0; +} + void map_ranks_fini(d_rank_list_t *ranks) { From 8f8b9578ce15778aad858de8a4a36e0f0ac2eb90 Mon Sep 17 00:00:00 2001 From: Jeff Olivier Date: Fri, 28 Nov 2025 06:26:33 -0700 Subject: [PATCH 042/253] DAOS-17207 build: upgrade to SPDK 24.09 (#16774) Upgrade SPDK version used by DAOS from v22.01 to v24.09 to take advantage of the numerous improvements. In this change: - SPDK API usage in DAOS updated - Build updates including those related to DAOS dependency RPMs - Control-plane go binding NVMe discovery fix for segfault issue - Control-plane go binding NVMe cmocka unit test fix Signed-off-by: Jeff Olivier --- ...8322b812fe31cc3e1d0308a7f5bd4b06b9886.diff | 51 +++ ...ba3fcd5aceceea530a702922153bc75664978.diff | 61 --- ...a4c808badbad3942696ecf16fa60e8129a747.diff | 70 ---- deps/patches/spdk/0002_spdk_rwf_nowait.patch | 78 ++++ deps/patches/spdk/0003_external_isal.patch | 362 ++++++++++++++++++ site_scons/components/__init__.py | 27 +- src/bio/SConscript | 2 +- src/bio/bio_xstream.c | 95 ++++- src/control/SConscript | 8 +- src/control/lib/spdk/ctests/SConscript | 2 +- src/control/lib/spdk/ctests/nvme_control_ut.c | 21 +- .../lib/spdk/include/nvme_control_common.h | 5 +- src/control/lib/spdk/nvme_default.go | 3 +- src/control/lib/spdk/src/nvme_control.c | 10 +- .../lib/spdk/src/nvme_control_common.c | 8 +- src/control/server/ctl_storage_rpc.go | 26 +- src/control/server/engine/exec.go | 8 +- src/control/server/instance_exec.go | 13 +- src/control/server/instance_storage_rpc.go | 8 +- src/control/server/storage/bdev/provider.go | 8 +- src/dtx/tests/SConscript | 6 +- src/engine/SConscript | 2 +- src/mgmt/tests/SConscript | 3 +- src/object/tests/SConscript | 2 +- src/tests/SConscript | 2 +- src/utils/ddb/SConscript | 2 +- src/vos/tests/SConscript | 7 +- utils/build.config | 4 +- utils/rpms/daos.changelog | 4 + utils/rpms/daos.sh | 2 +- utils/rpms/daos.spec | 2 +- utils/rpms/package_info.sh | 5 +- utils/rpms/spdk.changelog | 6 + utils/rpms/spdk.sh | 9 +- utils/run_utest.py | 3 +- utils/scripts/install-el8.sh | 2 + utils/scripts/install-el9.sh | 2 + utils/scripts/install-leap15.sh | 1 + utils/scripts/install-ubuntu.sh | 2 + utils/test_memcheck.supp | 8 + 40 files changed, 728 insertions(+), 212 deletions(-) create mode 100644 deps/patches/spdk/0001_3428322b812fe31cc3e1d0308a7f5bd4b06b9886.diff delete mode 100644 deps/patches/spdk/0001_b0aba3fcd5aceceea530a702922153bc75664978.diff delete mode 100644 deps/patches/spdk/0002_445a4c808badbad3942696ecf16fa60e8129a747.diff create mode 100644 deps/patches/spdk/0002_spdk_rwf_nowait.patch create mode 100644 deps/patches/spdk/0003_external_isal.patch create mode 100644 utils/rpms/spdk.changelog diff --git a/deps/patches/spdk/0001_3428322b812fe31cc3e1d0308a7f5bd4b06b9886.diff b/deps/patches/spdk/0001_3428322b812fe31cc3e1d0308a7f5bd4b06b9886.diff new file mode 100644 index 00000000000..f427d33d2d8 --- /dev/null +++ b/deps/patches/spdk/0001_3428322b812fe31cc3e1d0308a7f5bd4b06b9886.diff @@ -0,0 +1,51 @@ +diff --git a/module/bdev/aio/bdev_aio.c b/module/bdev/aio/bdev_aio.c +index 075459b1564..b51d6c83a3f 100644 +--- a/module/bdev/aio/bdev_aio.c ++++ b/module/bdev/aio/bdev_aio.c +@@ -64,7 +64,9 @@ struct file_disk { + struct spdk_bdev disk; + char *filename; + int fd; ++#ifdef RWF_NOWAIT + bool use_nowait; ++#endif + TAILQ_ENTRY(file_disk) link; + bool block_size_override; + bool readonly; +@@ -114,7 +116,9 @@ bdev_aio_open(struct file_disk *disk) + { + int fd; + int io_flag = disk->readonly ? O_RDONLY : O_RDWR; ++#ifdef RWF_NOWAIT + struct stat st; ++#endif + + fd = open(disk->filename, io_flag | O_DIRECT); + if (fd < 0) { +@@ -129,11 +133,14 @@ bdev_aio_open(struct file_disk *disk) + } + + disk->fd = fd; ++ ++#ifdef RWF_NOWAIT + /* Some aio operations can block, for example if number outstanding + * I/O exceeds number of block layer tags. But not all files can + * support RWF_NOWAIT flag. So use RWF_NOWAIT on block devices only. + */ + disk->use_nowait = fstat(fd, &st) == 0 && S_ISBLK(st.st_mode); ++#endif + + return 0; + } +@@ -205,9 +212,11 @@ bdev_aio_submit_io(enum spdk_bdev_io_type type, struct file_disk *fdisk, + io_set_eventfd(iocb, aio_ch->group_ch->efd); + } + iocb->data = aio_task; ++#ifdef RWF_NOWAIT + if (fdisk->use_nowait) { + iocb->aio_rw_flags = RWF_NOWAIT; + } ++#endif + aio_task->len = nbytes; + aio_task->ch = aio_ch; + diff --git a/deps/patches/spdk/0001_b0aba3fcd5aceceea530a702922153bc75664978.diff b/deps/patches/spdk/0001_b0aba3fcd5aceceea530a702922153bc75664978.diff deleted file mode 100644 index 9186e715e2b..00000000000 --- a/deps/patches/spdk/0001_b0aba3fcd5aceceea530a702922153bc75664978.diff +++ /dev/null @@ -1,61 +0,0 @@ -diff --git a/scripts/setup.sh b/scripts/setup.sh -index d0c09430a6f..a56c74dd686 100755 ---- a/scripts/setup.sh -+++ b/scripts/setup.sh -@@ -141,6 +141,10 @@ function linux_bind_driver() { - - pci_dev_echo "$bdf" "$old_driver_name -> $driver_name" - -+ if [[ $driver_name == "none" ]]; then -+ return 0 -+ fi -+ - echo "$ven_dev_id" > "/sys/bus/pci/drivers/$driver_name/new_id" 2> /dev/null || true - echo "$bdf" > "/sys/bus/pci/drivers/$driver_name/bind" 2> /dev/null || true - -@@ -248,6 +252,17 @@ function collect_devices() { - if [[ $PCI_ALLOWED != *"$bdf"* ]]; then - pci_dev_echo "$bdf" "Skipping not allowed VMD controller at $bdf" - in_use=1 -+ elif [[ " ${drivers_d[*]} " =~ "nvme" ]]; then -+ if [[ "${DRIVER_OVERRIDE}" != "none" ]]; then -+ if [ "$mode" == "config" ]; then -+ cat <<- MESSAGE -+ Binding new driver to VMD device. If there are NVMe SSDs behind the VMD endpoint -+ which are attached to the kernel NVMe driver,the binding process may go faster -+ if you first run this script with DRIVER_OVERRIDE="none" to unbind only the -+ NVMe SSDs, and then run again to unbind the VMD devices." -+ MESSAGE -+ fi -+ fi - fi - fi - fi -@@ -305,7 +320,9 @@ function configure_linux_pci() { - fi - fi - -- if [[ -n "${DRIVER_OVERRIDE}" ]]; then -+ if [[ "${DRIVER_OVERRIDE}" == "none" ]]; then -+ driver_name=none -+ elif [[ -n "${DRIVER_OVERRIDE}" ]]; then - driver_path="$DRIVER_OVERRIDE" - driver_name="${DRIVER_OVERRIDE##*/}" - # modprobe and the sysfs don't use the .ko suffix. -@@ -337,10 +354,12 @@ function configure_linux_pci() { - fi - - # modprobe assumes the directory of the module. If the user passes in a path, we should use insmod -- if [[ -n "$driver_path" ]]; then -- insmod $driver_path || true -- else -- modprobe $driver_name -+ if [[ $driver_name != "none" ]]; then -+ if [[ -n "$driver_path" ]]; then -+ insmod $driver_path || true -+ else -+ modprobe $driver_name -+ fi - fi - - for bdf in "${!all_devices_d[@]}"; do diff --git a/deps/patches/spdk/0002_445a4c808badbad3942696ecf16fa60e8129a747.diff b/deps/patches/spdk/0002_445a4c808badbad3942696ecf16fa60e8129a747.diff deleted file mode 100644 index 11bd483eb89..00000000000 --- a/deps/patches/spdk/0002_445a4c808badbad3942696ecf16fa60e8129a747.diff +++ /dev/null @@ -1,70 +0,0 @@ -diff --git a/CONFIG b/CONFIG -index 5f552fe81df..481643dcc3b 100644 ---- a/CONFIG -+++ b/CONFIG -@@ -195,3 +195,6 @@ CONFIG_USDT=n - # Build with IDXD kernel support. - # In this mode, SPDK shares the DSA device with the kernel. - CONFIG_IDXD_KERNEL=n -+ -+# arc4random is available in stdlib.h -+CONFIG_HAVE_ARC4RANDOM=n -diff --git a/configure b/configure -index a18f34a004d..688d72bfbf6 100755 ---- a/configure -+++ b/configure -@@ -850,6 +850,11 @@ if [[ "${CONFIG[TSAN]}" = "y" ]]; then - fi - fi - -+if echo -e '#include \nint main(void) { arc4random(); return 0; }\n' \ -+ | "${BUILD_CMD[@]}" - 2> /dev/null; then -+ CONFIG[HAVE_ARC4RANDOM]="y" -+fi -+ - if [[ "${CONFIG[OCF]}" = "y" ]]; then - # If OCF_PATH is a file, assume it is a library and use it to compile with - if [ -f ${CONFIG[OCF_PATH]} ]; then -diff --git a/lib/iscsi/iscsi.c b/lib/iscsi/iscsi.c -index 00b1d62e26b..3c403b972f3 100644 ---- a/lib/iscsi/iscsi.c -+++ b/lib/iscsi/iscsi.c -@@ -62,7 +62,6 @@ - - #ifdef __FreeBSD__ - #define HAVE_SRANDOMDEV 1 --#define HAVE_ARC4RANDOM 1 - #endif - - struct spdk_iscsi_globals g_iscsi = { -@@ -97,7 +96,7 @@ srandomdev(void) - } - #endif /* HAVE_SRANDOMDEV */ - --#ifndef HAVE_ARC4RANDOM -+#ifndef SPDK_CONFIG_HAVE_ARC4RANDOM - static int g_arc4random_initialized = 0; - - static uint32_t -@@ -115,7 +114,7 @@ arc4random(void) - r = (r1 << 16) | r2; - return r; - } --#endif /* HAVE_ARC4RANDOM */ -+#endif /* SPDK_CONFIG_HAVE_ARC4RANDOM */ - - static void - gen_random(uint8_t *buf, size_t len) -diff --git a/scripts/check_format.sh b/scripts/check_format.sh -index 1dbc25d205e..e2e47131537 100755 ---- a/scripts/check_format.sh -+++ b/scripts/check_format.sh -@@ -270,7 +270,7 @@ function check_posix_includes() { - local rc=0 - - echo -n "Checking for POSIX includes..." -- git grep -I -i -f scripts/posix.txt -- './*' ':!include/spdk/stdinc.h' ':!include/linux/**' ':!lib/rte_vhost*/**' ':!scripts/posix.txt' ':!*.patch' > scripts/posix.log || true -+ git grep -I -i -f scripts/posix.txt -- './*' ':!include/spdk/stdinc.h' ':!include/linux/**' ':!lib/rte_vhost*/**' ':!scripts/posix.txt' ':!*.patch' ':!configure' > scripts/posix.log || true - if [ -s scripts/posix.log ]; then - echo "POSIX includes detected. Please include spdk/stdinc.h instead." - cat scripts/posix.log diff --git a/deps/patches/spdk/0002_spdk_rwf_nowait.patch b/deps/patches/spdk/0002_spdk_rwf_nowait.patch new file mode 100644 index 00000000000..e65bb55e32b --- /dev/null +++ b/deps/patches/spdk/0002_spdk_rwf_nowait.patch @@ -0,0 +1,78 @@ +diff --git a/CONFIG b/CONFIG +index 89c34e90b..02ce04692 100644 +--- a/CONFIG ++++ b/CONFIG +@@ -256,3 +256,6 @@ CONFIG_COPY_FILE_RANGE=n + + # liblz4 is available + CONFIG_HAVE_LZ4=n ++ ++# aio_rw_flags are enabled ++CONFIG_HAVE_AIO_RW_FLAGS=n +diff --git a/configure b/configure +index 26c9b0f4d..d8daedc37 100755 +--- a/configure ++++ b/configure +@@ -860,6 +860,22 @@ if [[ $sys_name != "Linux" ]]; then + fi + fi + ++if echo -e '#include \n' \ ++ '#include \n' \ ++ '#include \n' \ ++ '#ifndef RWF_NOWAIT\n' \ ++ '#error "No RWF_NOWAIT is defined"\n' \ ++ '#endif\n' \ ++ 'int main(int argc, char **argv) {\n' \ ++ 'return offsetof(struct iocb, aio_rw_flags);\n}\n' \ ++ | "${BUILD_CMD[@]}" -c - ; then ++ echo HAVE_AIO_RW_FLAGS=YES ++ CONFIG[HAVE_AIO_RW_FLAGS]="y" ++else ++ echo HAVE_AIO_RW_FLAGS=NO ++ CONFIG[HAVE_AIO_RW_FLAGS]="n" ++fi ++ + if [ "${CONFIG[RDMA]}" = "y" ]; then + if [[ ! "${CONFIG[RDMA_PROV]}" == "verbs" ]] && [[ ! "${CONFIG[RDMA_PROV]}" == "mlx5_dv" ]]; then + echo "Invalid RDMA provider specified, must be \"verbs\" or \"mlx5_dv\"" +diff --git a/module/bdev/aio/bdev_aio.c b/module/bdev/aio/bdev_aio.c +index b51d6c83a..01914fb9d 100644 +--- a/module/bdev/aio/bdev_aio.c ++++ b/module/bdev/aio/bdev_aio.c +@@ -64,7 +64,7 @@ struct file_disk { + struct spdk_bdev disk; + char *filename; + int fd; +-#ifdef RWF_NOWAIT ++#ifdef SPDK_CONFIG_HAVE_AIO_RW_FLAGS + bool use_nowait; + #endif + TAILQ_ENTRY(file_disk) link; +@@ -116,7 +116,7 @@ bdev_aio_open(struct file_disk *disk) + { + int fd; + int io_flag = disk->readonly ? O_RDONLY : O_RDWR; +-#ifdef RWF_NOWAIT ++#ifdef SPDK_CONFIG_HAVE_AIO_RW_FLAGS + struct stat st; + #endif + +@@ -134,7 +134,7 @@ bdev_aio_open(struct file_disk *disk) + + disk->fd = fd; + +-#ifdef RWF_NOWAIT ++#ifdef SPDK_CONFIG_HAVE_AIO_RW_FLAGS + /* Some aio operations can block, for example if number outstanding + * I/O exceeds number of block layer tags. But not all files can + * support RWF_NOWAIT flag. So use RWF_NOWAIT on block devices only. +@@ -212,7 +212,7 @@ bdev_aio_submit_io(enum spdk_bdev_io_type type, struct file_disk *fdisk, + io_set_eventfd(iocb, aio_ch->group_ch->efd); + } + iocb->data = aio_task; +-#ifdef RWF_NOWAIT ++#ifdef SPDK_CONFIG_HAVE_AIO_RW_FLAGS + if (fdisk->use_nowait) { + iocb->aio_rw_flags = RWF_NOWAIT; + } diff --git a/deps/patches/spdk/0003_external_isal.patch b/deps/patches/spdk/0003_external_isal.patch new file mode 100644 index 00000000000..3a4a9be7824 --- /dev/null +++ b/deps/patches/spdk/0003_external_isal.patch @@ -0,0 +1,362 @@ +diff --git a/CONFIG b/CONFIG +index 89c34e90b..086db27a4 100644 +--- a/CONFIG ++++ b/CONFIG +@@ -170,9 +170,11 @@ CONFIG_CUSTOMOCF=n + + # Build ISA-L library + CONFIG_ISAL=y ++CONFIG_ISAL_PATH= + + # Build ISA-L-crypto library + CONFIG_ISAL_CRYPTO=y ++CONFIG_ISAL_CRYPTO_PATH= + + # Build with IO_URING support + CONFIG_URING=n +diff --git a/Makefile b/Makefile +index 3aeae41ad..5e249aaa3 100644 +--- a/Makefile ++++ b/Makefile +@@ -18,8 +18,16 @@ DIRS-$(CONFIG_EXAMPLES) += examples + DIRS-$(CONFIG_APPS) += app + DIRS-y += test + DIRS-$(CONFIG_IPSEC_MB) += ipsecbuild ++ifeq ($(CONFIG_ISAL),y) ++ifeq ($(CONFIG_ISAL_PATH),) + DIRS-$(CONFIG_ISAL) += isalbuild ++endif ++endif ++ifeq ($(CONFIG_ISAL_CRYPTO),y) ++ifeq ($(CONFIG_ISAL_CRYPTO_PATH),) + DIRS-$(CONFIG_ISAL_CRYPTO) += isalcryptobuild ++endif ++endif + DIRS-$(CONFIG_VFIO_USER) += vfiouserbuild + DIRS-$(CONFIG_SMA) += proto + DIRS-$(CONFIG_XNVME) += xnvmebuild +@@ -63,14 +71,18 @@ DPDK_DEPS += ipsecbuild + endif + + ifeq ($(CONFIG_ISAL),y) ++ifeq ($(CONFIG_ISAL_PATH),) + ISALBUILD = isalbuild + LIB += isalbuild + DPDK_DEPS += isalbuild + ifeq ($(CONFIG_ISAL_CRYPTO),y) ++ifeq ($(CONFIG_ISAL_CRYPTO_PATH),) + ISALCRYPTOBUILD = isalcryptobuild + LIB += isalcryptobuild + endif + endif ++endif ++endif + + ifeq ($(CONFIG_VFIO_USER),y) + VFIOUSERBUILD = vfiouserbuild +diff --git a/configure b/configure +index 26c9b0f4d..8ef548fa8 100755 +--- a/configure ++++ b/configure +@@ -62,6 +62,8 @@ function usage() { + echo " --without-idxd Disabled while experimental. Only built for x86 when enabled." + echo " --with-crypto Build isa-l-crypto and vbdev crypto module. No path required." + echo " --without-crypto Disable isa-l-crypto and vbdev crypto module." ++ echo " --with-isal[=DIR] Don't build isal, use external library" ++ echo " --with-isal-crypto[=DIR] Don't build isal-crypto, use external library" + echo " --with-fio[=DIR] Build fio_plugin." + echo " --without-fio default: /usr/src/fio" + echo " --with-xnvme Build xNVMe bdev module." +@@ -581,6 +583,26 @@ for i in "$@"; do + --without-fio) + CONFIG[FIO_PLUGIN]=n + ;; ++ --with-isal) ;& ++ --with-isal=*) ++ # if specified, set the default so we don't build it ++ CONFIG[ISAL_PATH]="/usr" ++ if [[ -n ${i#*=} ]] && [[ ${i#*=} != "$i" ]]; then ++ CONFIG[ISAL_PATH]=${i#*=} ++ fi ++ check_dir "--with-isal=${CONFIG[ISAL_PATH]}" ++ CONFIG[ISAL]=y ++ ;; ++ --with-isal-crypto) ;& ++ --with-isal-crypto=*) ++ # if specified, set the default so we don't build it ++ CONFIG[ISAL_CRYPTO_PATH]="/usr" ++ if [[ -n ${i#*=} ]] && [[ ${i#*=} != "$i" ]]; then ++ CONFIG[ISAL_CRYPTO_PATH]=${i#*=} ++ fi ++ check_dir "--with-isal-crypto=${CONFIG[ISAL_CRYPTO_PATH]}" ++ CONFIG[ISAL_CRYPTO]=y ++ ;; + --with-vtune=*) + check_dir "$i" + CONFIG[VTUNE_DIR]="${i#*=}" +@@ -1228,7 +1250,10 @@ if [[ "${CONFIG[FUZZER]}" = "y" && "$CC_TYPE" != "clang" ]]; then + exit 1 + fi + +-if [[ $arch == x86_64* ]] || [[ $arch == aarch64* ]]; then ++if [[ -d "${CONFIG[ISAL_PATH]}" ]]; then ++ echo "Using ISA-L from ${CONFIG[ISAL_PATH]}" ++ CONFIG[ISAL]=y ++elif [[ $arch == x86_64* ]] || [[ $arch == aarch64* ]]; then + CONFIG[ISAL]=y + # make sure the submodule is initialized + if [ ! -f "$rootdir"/isa-l/autogen.sh ]; then +@@ -1266,35 +1291,40 @@ else + fi + + # now either configure ISA-L or disable unavailable features +-if [[ "${CONFIG[ISAL]}" = "y" ]]; then +- cd $rootdir/isa-l +- ISAL_LOG=$rootdir/.spdk-isal.log +- if [[ -n "${CONFIG[CROSS_PREFIX]}" ]]; then +- ISAL_OPTS=("--host=${CONFIG[CROSS_PREFIX]}") +- else +- ISAL_OPTS=() +- fi +- if [[ "${CONFIG[SHARED]}" = "y" ]]; then +- ISAL_OPTS+=("--enable-shared=yes") ++if [[ ! -d "${CONFIG[ISAL_PATH]}" ]]; then ++ if [[ "${CONFIG[ISAL]}" = "y" ]]; then ++ cd $rootdir/isa-l ++ ISAL_LOG=$rootdir/.spdk-isal.log ++ if [[ -n "${CONFIG[CROSS_PREFIX]}" ]]; then ++ ISAL_OPTS=("--host=${CONFIG[CROSS_PREFIX]}") ++ else ++ ISAL_OPTS=() ++ fi ++ if [[ "${CONFIG[SHARED]}" = "y" ]]; then ++ ISAL_OPTS+=("--enable-shared=yes") ++ else ++ ISAL_OPTS+=("--enable-shared=no") ++ fi ++ ISAL_OPTS+=("--prefix=${CONFIG[PREFIX]}") ++ echo -n "Configuring ISA-L (logfile: $ISAL_LOG)..." ++ ./autogen.sh &> $ISAL_LOG ++ ./configure CFLAGS="-fPIC -g -O2 -fuse-ld=$LD_TYPE -Wno-unused-command-line-argument" "${ISAL_OPTS[@]}" --enable-shared=no >> $ISAL_LOG 2>&1 ++ echo "done." ++ cd $rootdir + else +- ISAL_OPTS+=("--enable-shared=no") ++ echo "Without ISA-L, there is no software support for crypto or compression," ++ echo "so these features will be disabled." ++ CONFIG[CRYPTO]=n ++ CONFIG[VBDEV_COMPRESS]=n ++ CONFIG[DPDK_COMPRESSDEV]=n + fi +- ISAL_OPTS+=("--prefix=${CONFIG[PREFIX]}") +- echo -n "Configuring ISA-L (logfile: $ISAL_LOG)..." +- ./autogen.sh &> $ISAL_LOG +- ./configure CFLAGS="-fPIC -g -O2 -fuse-ld=$LD_TYPE -Wno-unused-command-line-argument" "${ISAL_OPTS[@]}" --enable-shared=no >> $ISAL_LOG 2>&1 +- echo "done." +- cd $rootdir +-else +- echo "Without ISA-L, there is no software support for crypto or compression," +- echo "so these features will be disabled." +- CONFIG[CRYPTO]=n +- CONFIG[VBDEV_COMPRESS]=n +- CONFIG[DPDK_COMPRESSDEV]=n + fi + + # ISA-L-crypto complements ISA-L functionality, it is only enabled together with ISA-L +-if [[ "${CONFIG[ISAL]}" = "y" ]]; then ++if [[ -d "${CONFIG[ISAL_CRYPTO_PATH]}" ]]; then ++ echo "Using isa-l_crypto from ${CONFIG[ISAL_CRYPTO_PATH]}" ++ CONFIG[ISAL_CRYPTO]=y ++elif [[ "${CONFIG[ISAL]}" = "y" ]]; then + if [ ! -f "$rootdir"/isa-l-crypto/autogen.sh ]; then + echo "ISA-L-crypto is required but was not found, please init the submodule with:" + echo " git submodule update --init" +diff --git a/dpdkbuild/Makefile b/dpdkbuild/Makefile +index 64da6cc32..a88c8a6ec 100644 +--- a/dpdkbuild/Makefile ++++ b/dpdkbuild/Makefile +@@ -108,8 +108,8 @@ DPDK_DRIVERS += compress compress/isal + ifeq ($(CONFIG_VBDEV_COMPRESS_MLX5),y) + DPDK_DRIVERS += compress/mlx5 + endif +-DPDK_CFLAGS += -I$(ISAL_DIR) -I$(ISAL_BUILD_DIR) +-DPDK_LDFLAGS += -L$(ISAL_DIR)/.libs -lisal ++DPDK_CFLAGS += -I$(ISAL_DIR) -I$(ISAL_DIR)/include -I$(ISAL_BUILD_DIR) ++DPDK_LDFLAGS += -L$(ISAL_DIR)/.libs -L$(ISAL_DIR)/lib64 -lisal + endif + + DPDK_ENABLED_DRIVERS = $(shell echo $(DPDK_DRIVERS) | sed -E "s/ +/,/g") +diff --git a/lib/accel/Makefile b/lib/accel/Makefile +index 0d4cb1239..840a031a1 100644 +--- a/lib/accel/Makefile ++++ b/lib/accel/Makefile +@@ -18,6 +18,8 @@ ifeq ($(CONFIG_HAVE_LZ4),y) + LOCAL_SYS_LIBS += -llz4 + endif + ++LOCAL_SYS_LIBS += -L$(ISAL_CRYPTO_DIR)/lib64 -lisal_crypto ++ + SPDK_MAP_FILE = $(abspath $(CURDIR)/spdk_accel.map) + + include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk +diff --git a/lib/accel/accel_sw.c b/lib/accel/accel_sw.c +index d7e2dfff9..03b1dcb4c 100644 +--- a/lib/accel/accel_sw.c ++++ b/lib/accel/accel_sw.c +@@ -24,12 +24,21 @@ + #endif + + #ifdef SPDK_CONFIG_ISAL ++#ifdef SPDK_CONFIG_ISAL_PATH ++#include ++#else + #include "../isa-l/include/igzip_lib.h" ++#endif + #ifdef SPDK_CONFIG_ISAL_CRYPTO ++#ifdef SPDK_CONFIG_ISAL_CRYPTO_PATH ++#include "isa-l-crypto/aes_xts.h" ++#include "isa-l-crypto/isal_crypto_api.h" ++#else + #include "../isa-l-crypto/include/aes_xts.h" + #include "../isa-l-crypto/include/isal_crypto_api.h" + #endif + #endif ++#endif + + /* Per the AES-XTS spec, the size of data unit cannot be bigger than 2^20 blocks, 128b each block */ + #define ACCEL_AES_XTS_MAX_BLOCK_SIZE (1 << 24) +diff --git a/lib/env_dpdk/env.mk b/lib/env_dpdk/env.mk +index f71de7f48..a45a019df 100644 +--- a/lib/env_dpdk/env.mk ++++ b/lib/env_dpdk/env.mk +@@ -171,7 +171,7 @@ endif + endif + + ifeq ($(CONFIG_VBDEV_COMPRESS),y) +-DPDK_PRIVATE_LINKER_ARGS += -lisal -L$(ISAL_DIR)/.libs ++DPDK_PRIVATE_LINKER_ARGS += -lisal -L$(ISAL_DIR)/.libs -L$(ISAL_DIR)/lib64 + ifeq ($(CONFIG_VBDEV_COMPRESS_MLX5),y) + DPDK_PRIVATE_LINKER_ARGS += -lmlx5 -libverbs + endif +diff --git a/lib/util/Makefile b/lib/util/Makefile +index e9daa2623..c2fa28734 100644 +--- a/lib/util/Makefile ++++ b/lib/util/Makefile +@@ -22,6 +22,8 @@ ifeq ($(CONFIG_HAVE_UUID_GENERATE_SHA1), n) + LOCAL_SYS_LIBS += -lssl + endif + ++LOCAL_SYS_LIBS += -L$(ISAL_DIR)/lib64 -lisal ++ + CFLAGS += -Wpointer-arith + + SPDK_MAP_FILE = $(abspath $(CURDIR)/spdk_util.map) +diff --git a/lib/util/crc16.c b/lib/util/crc16.c +index f085a2851..a5e6937ca 100644 +--- a/lib/util/crc16.c ++++ b/lib/util/crc16.c +@@ -11,7 +11,12 @@ + */ + + #ifdef SPDK_CONFIG_ISAL ++#ifdef SPDK_CONFIG_ISAL_PATH ++#include ++#else + #include "isa-l/include/crc.h" ++#endif ++ + + uint16_t + spdk_crc16_t10dif(uint16_t init_crc, const void *buf, size_t len) +diff --git a/lib/util/crc64.c b/lib/util/crc64.c +index b1a37af35..31bd7bd3c 100644 +--- a/lib/util/crc64.c ++++ b/lib/util/crc64.c +@@ -7,7 +7,11 @@ + #include "spdk/crc64.h" + + #ifdef SPDK_CONFIG_ISAL ++#ifdef SPDK_CONFIG_ISAL_PATH ++#include ++#else + #include "isa-l/include/crc64.h" ++#endif + + uint64_t + spdk_crc64_nvme(const void *buf, size_t len, uint64_t crc) +diff --git a/lib/util/crc_internal.h b/lib/util/crc_internal.h +index b432d0d7b..f9979249f 100644 +--- a/lib/util/crc_internal.h ++++ b/lib/util/crc_internal.h +@@ -10,7 +10,11 @@ + + #ifdef SPDK_CONFIG_ISAL + #define SPDK_HAVE_ISAL +-#include ++#ifdef SPDK_CONFIG_ISAL_PATH ++#include ++#else ++#include "isa-l/include/crc.h" ++#endif + #elif defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) + #define SPDK_HAVE_ARM_CRC + #include +diff --git a/lib/util/xor.c b/lib/util/xor.c +index 07eca5f50..2b15aea3b 100644 +--- a/lib/util/xor.c ++++ b/lib/util/xor.c +@@ -85,7 +85,11 @@ xor_gen_basic(void *dest, void **sources, uint32_t n, uint32_t len) + } + + #ifdef SPDK_CONFIG_ISAL ++#ifdef SPDK_CONFIG_ISAL_PATH ++#include ++#else + #include "isa-l/include/raid.h" ++#endif + + #define SPDK_XOR_BUF_ALIGN 32 + +diff --git a/mk/spdk.common.mk b/mk/spdk.common.mk +index 19f0192c2..65ef68f50 100644 +--- a/mk/spdk.common.mk ++++ b/mk/spdk.common.mk +@@ -179,23 +179,31 @@ endif + + IPSEC_MB_DIR=$(CONFIG_IPSEC_MB_DIR) + ++ifeq ($(CONFIG_ISAL_PATH),) + ISAL_DIR=$(SPDK_ROOT_DIR)/isa-l ++else ++ISAL_DIR=$(CONFIG_ISAL_PATH) ++endif ++ifeq ($(CONFIG_ISAL_CRYPTO_PATH),) + ISAL_CRYPTO_DIR=$(SPDK_ROOT_DIR)/isa-l-crypto ++else ++ISAL_CRYPTO_DIR=$(CONFIG_ISAL_CRYPTO_PATH) ++endif + ISAL_BUILD_DIR=$(SPDK_ROOT_DIR)/isalbuild + ISAL_CRYPTO_BUILD_DIR=$(SPDK_ROOT_DIR)/isalcryptobuild +-ifeq ($(CONFIG_ISAL), y) +-COMMON_CFLAGS += -I$(ISAL_DIR)/.. -I$(ISAL_BUILD_DIR) ++ifeq ($(CONFIG_ISAL),y) ++COMMON_CFLAGS += -I$(ISAL_DIR)/.. -I$(ISAL_DIR)/include -I$(ISAL_BUILD_DIR) + ifeq ($(CONFIG_SHARED),y) +-SYS_LIBS += -L$(ISAL_DIR)/.libs -lisal +-LDFLAGS += -Wl,-rpath=$(ISAL_DIR)/.libs ++SYS_LIBS += -L$(ISAL_DIR)/.libs -L$(ISAL_DIR)/lib64 -lisal ++LDFLAGS += -Wl,-rpath=$(ISAL_DIR)/.lib -Wl,-rpath=$(ISAL_DIR)/lib64 + else + SYS_LIBS += $(ISAL_DIR)/.libs/libisal.a + endif +-ifeq ($(CONFIG_ISAL_CRYPTO), y) ++ifeq ($(CONFIG_ISAL_CRYPTO),y) + COMMON_CFLAGS += -I$(ISAL_CRYPTO_DIR)/.. -I$(ISAL_CRYPTO_BUILD_DIR) + ifeq ($(CONFIG_SHARED),y) +-SYS_LIBS += -L$(ISAL_CRYPTO_DIR)/.libs -lisal_crypto +-LDFLAGS += -Wl,-rpath=$(ISAL_CRYPTO_DIR)/.libs ++SYS_LIBS += -L$(ISAL_CRYPTO_DIR)/.libs -L$(ISAL_CRYPTO_DIR)/lib64 -lisal_crypto ++LDFLAGS += -Wl,-rpath=$(ISAL_CRYPTO_DIR)/.libs -Wl,-rpath=$(ISAL_CRYPTO_DIR)/lib64 + else + SYS_LIBS += $(ISAL_CRYPTO_DIR)/.libs/libisal_crypto.a + endif diff --git a/site_scons/components/__init__.py b/site_scons/components/__init__.py index fdfd0739f9e..0d074dd3c24 100644 --- a/site_scons/components/__init__.py +++ b/site_scons/components/__init__.py @@ -275,14 +275,15 @@ def define_components(reqs): reqs.define('isal', retriever=GitRepoRetriever(), commands=[['./autogen.sh'], - ['./configure', '--prefix=$ISAL_PREFIX', '--libdir=$ISAL_PREFIX/lib64'], + ['./configure', '--disable-static', '--prefix=$ISAL_PREFIX', + '--libdir=$ISAL_PREFIX/lib64'], ['make'], ['make', 'install']], libs=['isal']) reqs.define('isal_crypto', retriever=GitRepoRetriever(), - commands=[['./autogen.sh'], - ['./configure', + commands=[['./autogen.sh', '--no-oshmem'], + ['./configure', '--disable-static', '--prefix=$ISAL_CRYPTO_PREFIX', '--libdir=$ISAL_CRYPTO_PREFIX/lib64'], ['make'], @@ -359,8 +360,12 @@ def define_components(reqs): # https://gcc.gnu.org/onlinedocs/gcc/x86-Options.html dist = distro.linux_distribution() + spdk_reqs = ['isal', 'isal_crypto'] + spdk_conf = ['--with-isal=$ISAL_PREFIX', '--with-isal-crypto=$ISAL_CRYPTO_PREFIX'] if ARM_PLATFORM: spdk_arch = 'native' + spdk_reqs = [] + spdk_conf = [] elif dist[0] == 'CentOS Linux' and dist[1] == '7': spdk_arch = 'native' elif dist[0] == 'Ubuntu' and dist[1] == '20.04': @@ -376,25 +381,24 @@ def define_components(reqs): '--prefix=$SPDK_PREFIX', '--disable-tests', '--disable-unit-tests', - '--disable-apps', '--without-vhost', - '--without-crypto', - '--without-pmdk', '--without-rbd', '--without-iscsi-initiator', - '--without-isal', '--without-vtune', '--with-shared', - f'--target-arch={spdk_arch}'], + '--without-nvme-cuse', + '--without-crypto', + f'--target-arch={spdk_arch}'] + spdk_conf, ['make', f'CONFIG_ARCH={spdk_arch}'], ['make', 'libdir=$SPDK_PREFIX/lib64/daos_srv', 'includedir=$SPDK_PREFIX/include/daos_srv', 'install'], [copy_files, 'dpdk/build/lib', '$SPDK_PREFIX/lib64/daos_srv'], + ['rm', '-rf', '$SPDK_PREFIX/lib'], [copy_files, 'dpdk/build/include', '$SPDK_PREFIX/include/daos_srv/dpdk'], [copy_files, 'include', '$SPDK_PREFIX/share/daos/spdk/include'], [copy_files, 'scripts', '$SPDK_PREFIX/share/daos/spdk/scripts'], - ['mv', '$SPDK_PREFIX/bin/spdk_nvme_discovery_aer', - '$SPDK_PREFIX/bin/daos_spdk_nvme_discovery_aer'], + ['mv', '$SPDK_PREFIX/bin/spdk_nvme_discover', + '$SPDK_PREFIX/bin/daos_spdk_nvme_discover'], ['cp', 'build/examples/lsvmd', '$SPDK_PREFIX/bin/daos_spdk_nvme_lsvmd'], ['cp', 'build/examples/nvme_manage', '$SPDK_PREFIX/bin/daos_spdk_nvme_manage'], @@ -406,7 +410,8 @@ def define_components(reqs): extra_lib_path=['lib64/daos_srv'], headers=['spdk/nvme.h'], pkgconfig='daos_spdk', - patch_rpath=['lib64/daos_srv', 'bin']) + patch_rpath=['lib64/daos_srv', 'bin'], + requires=spdk_reqs) reqs.define('protobufc', retriever=GitRepoRetriever(), diff --git a/src/bio/SConscript b/src/bio/SConscript index 93866e078e8..444a243eb3c 100644 --- a/src/bio/SConscript +++ b/src/bio/SConscript @@ -32,7 +32,7 @@ def scons(): libs += ['spdk_vmd', 'spdk_event_bdev', 'spdk_init'] # Other libs - libs += ['numa', 'dl', 'smd', 'abt'] + libs += ['numa', 'dl', 'smd', 'abt', 'ssl'] tgts = FILES + control_tgts bio = denv.d_library("bio", tgts, install_off="../..", LIBS=libs) diff --git a/src/bio/bio_xstream.c b/src/bio/bio_xstream.c index 966eadf1c42..059556ba89a 100644 --- a/src/bio/bio_xstream.c +++ b/src/bio/bio_xstream.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2018-2024 Intel Corporation. + * (C) Copyright 2025 Google LLC * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent @@ -21,6 +22,7 @@ #include #include #include +#include #include #include "bio_internal.h" #include @@ -155,6 +157,7 @@ bio_spdk_env_init(void) /* Only print error and more severe to stderr. */ spdk_log_set_print_level(SPDK_LOG_ERROR); + opts.opts_size = sizeof(opts); spdk_env_opts_init(&opts); opts.name = "daos_engine"; opts.env_context = (char *)dpdk_cli_override_opts; @@ -507,10 +510,30 @@ common_init_cb(void *arg, int rc) cp_arg->cca_rc = daos_errno2der(-rc); } +struct subsystem_init_arg { + struct common_cp_arg *cp_arg; + void *json_data; + ssize_t json_data_size; +}; + static void subsys_init_cb(int rc, void *arg) { - common_init_cb(arg, rc); + struct subsystem_init_arg *init_arg = arg; + + if (init_arg->json_data != NULL) { + free(init_arg->json_data); + init_arg->json_data = NULL; + } + + if (rc) + D_ERROR("subsystem init failed: %d\n", rc); + + common_init_cb(init_arg->cp_arg, rc); + + D_FREE(init_arg); + + return; } static void @@ -1596,6 +1619,63 @@ bio_xsctxt_free(struct bio_xs_context *ctxt) D_FREE(ctxt); } +static void +subsystem_init_cb(int rc, void *arg) +{ + struct subsystem_init_arg *init_arg; + + if (rc) { + subsys_init_cb(rc, arg); + return; + } + + init_arg = arg; + + /* Set RUNTIME state and load config again for RUNTIME methods */ + spdk_rpc_set_state(SPDK_RPC_RUNTIME); + spdk_subsystem_load_config(init_arg->json_data, init_arg->json_data_size, subsys_init_cb, + init_arg, true); +} + +static void +load_config_cb(int rc, void *arg) +{ + if (rc) { + subsys_init_cb(rc, arg); + return; + } + + /* init subsystem */ + spdk_subsystem_init(subsystem_init_cb, arg); +} + +static int +bio_xsctxt_init_by_config(struct common_cp_arg *cp_arg) +{ + struct subsystem_init_arg *init_arg; + void *json_data; + size_t json_data_size; + + json_data = spdk_posix_file_load_from_name(nvme_glb.bd_nvme_conf, &json_data_size); + if (json_data == NULL) { + D_ERROR("failed to load nvme conf %s\n", nvme_glb.bd_nvme_conf); + return -DER_NOMEM; + } + + D_ALLOC_PTR(init_arg); + if (init_arg == NULL) { + free(json_data); + return -DER_NOMEM; + } + + init_arg->cp_arg = cp_arg; + init_arg->json_data = json_data; + init_arg->json_data_size = (ssize_t)json_data_size; + spdk_subsystem_load_config(json_data, (ssize_t)json_data_size, load_config_cb, init_arg, + true); + return 0; +} + int bio_xsctxt_alloc(struct bio_xs_context **pctxt, int tgt_id, bool self_polling) { @@ -1659,13 +1739,14 @@ bio_xsctxt_alloc(struct bio_xs_context **pctxt, int tgt_id, bool self_polling) /* Initialize all registered subsystems: bdev, vmd, copy. */ common_prep_arg(&cp_arg); - spdk_subsystem_init_from_json_config(nvme_glb.bd_nvme_conf, - SPDK_DEFAULT_RPC_ADDR, - subsys_init_cb, &cp_arg, - true); + rc = bio_xsctxt_init_by_config(&cp_arg); + if (rc != 0) { + D_ERROR("failed to load nvme conf %s\n", nvme_glb.bd_nvme_conf); + goto out; + } + rc = xs_poll_completion(ctxt, &cp_arg.cca_inflights, 0); D_ASSERT(rc == 0); - if (cp_arg.cca_rc != 0) { rc = cp_arg.cca_rc; DL_ERROR(rc, "failed to init bdevs"); @@ -1695,7 +1776,7 @@ bio_xsctxt_alloc(struct bio_xs_context **pctxt, int tgt_id, bool self_polling) if ((!nvme_glb.bd_rpc_srv_addr) || (strlen(nvme_glb.bd_rpc_srv_addr) == 0)) nvme_glb.bd_rpc_srv_addr = SPDK_DEFAULT_RPC_ADDR; - rc = spdk_rpc_initialize(nvme_glb.bd_rpc_srv_addr); + rc = spdk_rpc_initialize(nvme_glb.bd_rpc_srv_addr, NULL); if (rc != 0) { D_ERROR("failed to start SPDK JSON-RPC server at %s, "DF_RC"\n", nvme_glb.bd_rpc_srv_addr, DP_RC(daos_errno2der(-rc))); diff --git a/src/control/SConscript b/src/control/SConscript index b64ed23bc0d..1029735f957 100644 --- a/src/control/SConscript +++ b/src/control/SConscript @@ -192,10 +192,12 @@ def scons(): "-L$BUILD_DIR/src/common " "-L$BUILD_DIR/src/utils/ddb " "-L$SPDK_PREFIX/lib " - "-L$OFI_PREFIX/lib $_RPATH") + "-L$OFI_PREFIX/lib " + "-L$ISAL_PREFIX/lib64 " + "-L$ISAL_CRYPTO_PREFIX/lib64 $_RPATH") # Explicitly link RTE & SPDK libs for CGO access ldopts = cgolibdirs + " -lspdk_env_dpdk -lspdk_nvme -lspdk_vmd -lrte_mempool" + \ - " -lrte_mempool_ring -lrte_bus_pci -lnvme_control -lnuma -ldl" + " -lrte_mempool_ring -lrte_bus_pci -lnvme_control -lisal -lssl -lnuma -ldl" aenv.AppendENVPath("CGO_LDFLAGS", ldopts, sep=" ") aenv.AppendENVPath("CGO_CFLAGS", aenv.subst("$_CPPINCFLAGS"), sep=" ") @@ -214,7 +216,7 @@ def scons(): # Add vos and dependent libs for ddb ddb_env.AppendENVPath("CGO_LDFLAGS", " -lvos -ldav_v2 -ldaos_common_pmem -lpmem " - "-labt -lgurt -luuid -lbio -lcart", sep=" ") + "-labt -lgurt -luuid -lbio -lssl -lcart", sep=" ") install_go_bin(ddb_env, "ddb", ['ddb']) diff --git a/src/control/lib/spdk/ctests/SConscript b/src/control/lib/spdk/ctests/SConscript index a8bd196fed2..a19376392f4 100644 --- a/src/control/lib/spdk/ctests/SConscript +++ b/src/control/lib/spdk/ctests/SConscript @@ -23,7 +23,7 @@ def scons(): libs += ['rte_mempool_ring', 'rte_bus_pci', 'nvme_control'] # Other libs - libs += ['numa', 'dl', 'isal', 'cmocka', 'pthread'] + libs += ['numa', 'dl', 'isal', 'cmocka', 'pthread', 'ssl'] if GetOption('help'): return diff --git a/src/control/lib/spdk/ctests/nvme_control_ut.c b/src/control/lib/spdk/ctests/nvme_control_ut.c index 0863d4f4509..4bdcbe16d29 100644 --- a/src/control/lib/spdk/ctests/nvme_control_ut.c +++ b/src/control/lib/spdk/ctests/nvme_control_ut.c @@ -1,8 +1,9 @@ /** -* (C) Copyright 2019-2021 Intel Corporation. -* -* SPDX-License-Identifier: BSD-2-Clause-Patent -*/ + * (C) Copyright 2019-2021 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ #include #include @@ -102,6 +103,13 @@ mock_spdk_pci_device_get_socket_id(struct spdk_pci_device *dev) return 1; } +static const char * +mock_spdk_pci_device_get_type(const struct spdk_pci_device *dev) +{ + (void)dev; + return "pci"; +} + /** * =================== * Test functions @@ -208,9 +216,8 @@ test_collect(void **state) test_ret = init_ret(); assert_null(test_ret->ctrlrs); - _collect(test_ret, &mock_copy_ctrlr_data, - &mock_spdk_nvme_ctrlr_get_pci_device, - &mock_spdk_pci_device_get_socket_id); + _collect(test_ret, &mock_copy_ctrlr_data, &mock_spdk_nvme_ctrlr_get_pci_device, + &mock_spdk_pci_device_get_socket_id, &mock_spdk_pci_device_get_type); if (test_ret->rc != 0) fprintf(stderr, "collect err: %s\n", test_ret->info); diff --git a/src/control/lib/spdk/include/nvme_control_common.h b/src/control/lib/spdk/include/nvme_control_common.h index ae8780ad911..5bb0fd7850f 100644 --- a/src/control/lib/spdk/include/nvme_control_common.h +++ b/src/control/lib/spdk/include/nvme_control_common.h @@ -1,5 +1,6 @@ /** * (C) Copyright 2019-2023 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -170,8 +171,10 @@ typedef struct spdk_pci_device * typedef int (*socket_id_getter)(struct spdk_pci_device *); +typedef const char *(*pci_type_getter)(const struct spdk_pci_device *); + void -_collect(struct ret_t *, data_copier, pci_getter, socket_id_getter); +_collect(struct ret_t *, data_copier, pci_getter, socket_id_getter, pci_type_getter); /** * Collect controller and namespace information of the NVMe devices. diff --git a/src/control/lib/spdk/nvme_default.go b/src/control/lib/spdk/nvme_default.go index 0b0fc935738..09e50027b2d 100644 --- a/src/control/lib/spdk/nvme_default.go +++ b/src/control/lib/spdk/nvme_default.go @@ -1,6 +1,7 @@ // // (C) Copyright 2022-2023 Intel Corporation. // (C) Copyright 2025 Hewlett Packard Enterprise Development LP +// (C) Copyright 2025 Google LLC // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -14,7 +15,7 @@ package spdk /* #cgo CFLAGS: -I . -#cgo LDFLAGS: -L . -lnvme_control +#cgo LDFLAGS: -L . -lnvme_control -lssl #cgo LDFLAGS: -lspdk_env_dpdk -lspdk_nvme -lspdk_vmd -lspdk_util #cgo LDFLAGS: -lrte_mempool -lrte_mempool_ring -lrte_bus_pci diff --git a/src/control/lib/spdk/src/nvme_control.c b/src/control/lib/spdk/src/nvme_control.c index 137d3b91462..d609e485726 100644 --- a/src/control/lib/spdk/src/nvme_control.c +++ b/src/control/lib/spdk/src/nvme_control.c @@ -1,8 +1,9 @@ /** -* (C) Copyright 2018-2022 Intel Corporation. -* -* SPDX-License-Identifier: BSD-2-Clause-Patent -*/ + * (C) Copyright 2018-2022 Intel Corporation. + * (C) Copyright 2025 Google LLC + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ #include #include @@ -488,6 +489,7 @@ daos_spdk_init(int mem_sz, char *env_ctx, size_t nr_pcil, char **pcil) struct spdk_env_opts opts = {}; int rc, i; + opts.opts_size = sizeof(opts); spdk_env_opts_init(&opts); if (mem_sz > 0) diff --git a/src/control/lib/spdk/src/nvme_control_common.c b/src/control/lib/spdk/src/nvme_control_common.c index 4d7d138fd08..41d859e1a19 100644 --- a/src/control/lib/spdk/src/nvme_control_common.c +++ b/src/control/lib/spdk/src/nvme_control_common.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2019-2023 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -445,7 +446,7 @@ populate_dev_health(struct nvme_stats *stats, void _collect(struct ret_t *ret, data_copier copy_data, pci_getter get_pci, - socket_id_getter get_socket_id) + socket_id_getter get_socket_id, pci_type_getter get_pci_type) { struct ctrlr_entry *ctrlr_entry; const struct spdk_nvme_ctrlr_data *cdata; @@ -499,8 +500,7 @@ _collect(struct ret_t *ret, data_copier copy_data, pci_getter get_pci, ctrlr_tmp->socket_id = get_socket_id(pci_dev); - pci_type = spdk_pci_device_get_type(pci_dev); - free(pci_dev); + pci_type = get_pci_type(pci_dev); ctrlr_tmp->pci_type = strndup(pci_type, NVME_DETAIL_BUFLEN); if (ctrlr_tmp->pci_type == NULL) { rc = -NVMEC_ERR_GET_PCI_TYPE; @@ -555,7 +555,7 @@ collect(void) ret = init_ret(); _collect(ret, ©_ctrlr_data, &spdk_nvme_ctrlr_get_pci_device, - &spdk_pci_device_get_socket_id); + &spdk_pci_device_get_socket_id, &spdk_pci_device_get_type); return ret; } diff --git a/src/control/server/ctl_storage_rpc.go b/src/control/server/ctl_storage_rpc.go index 21fdc770bfc..4efefd6c32f 100644 --- a/src/control/server/ctl_storage_rpc.go +++ b/src/control/server/ctl_storage_rpc.go @@ -108,14 +108,18 @@ func findBdevTier(pciAddr string, tcs storage.TierConfigs) *storage.TierConfig { } // Convert bdev scan results to protobuf response. -func bdevScanToProtoResp(scan scanBdevsFn, bdevCfgs storage.TierConfigs) (*ctlpb.ScanNvmeResp, error) { +func bdevScanToProtoResp(log logging.DebugLogger, scan scanBdevsFn, bdevCfgs storage.TierConfigs) (*ctlpb.ScanNvmeResp, error) { req := storage.BdevScanRequest{DeviceList: bdevCfgs.Bdevs()} + log.Debugf("bdevScanToProtoResp: bdev provider scan, req: %+v", req) + resp, err := scan(req) if err != nil { - return nil, err + return nil, errors.Wrap(err, "bdev provider scan") } + log.Debugf("bdevScanToProtoResp: bdev provider scan, resp: %+v", resp) + pbCtrlrs := make(proto.NvmeControllers, 0, len(resp.Controllers)) if err := pbCtrlrs.FromNative(resp.Controllers); err != nil { @@ -230,7 +234,13 @@ func bdevScanAssigned(ctx context.Context, cs *ControlService, req *ctlpb.ScanNv return nil, errors.New("meta smd usage info unavailable as engines stopped") } - return bdevScanToProtoResp(cs.storage.ScanBdevs, bdevCfgs) + resp, err := bdevScanToProtoResp(cs.log, cs.storage.ScanBdevs, bdevCfgs) + if err != nil { + return nil, errors.Wrap(err, "bdevScanAssigned: bdevScanToProtoResp") + } + + cs.log.Debugf("bdevScanAssigned: bdevScanToProtoResp returned: %+v", resp) + return resp, nil } // Delegate scan to engine instances as soon as one engine with assigned bdevs has started. @@ -264,11 +274,12 @@ func bdevScan(ctx context.Context, cs *ControlService, req *ctlpb.ScanNvmeReq, n cs.log.Debugf("scan bdevs from control service as no bdevs in cfg") // No bdevs configured for engines to claim so scan through control service. - resp, err = bdevScanToProtoResp(cs.storage.ScanBdevs, bdevCfgs) + resp, err = bdevScanToProtoResp(cs.log, cs.storage.ScanBdevs, bdevCfgs) if err != nil { - return nil, err + return nil, errors.Wrap(err, "bdevScan: bdevScanToProtoResp") } + cs.log.Debugf("bdevScan: bdevScanToProtoResp returned: %+v", resp) return bdevScanTrimResults(req, resp), nil } @@ -287,7 +298,7 @@ func bdevScan(ctx context.Context, cs *ControlService, req *ctlpb.ScanNvmeReq, n return nil, err } - cs.log.Tracef("bdevScanAssigned returned %d, want %d", nrScannedBdevs, nrCfgBdevs) + cs.log.Debugf("bdevScanAssigned returned %d, want %d", nrScannedBdevs, nrCfgBdevs) if nrScannedBdevs == nrCfgBdevs { return bdevScanTrimResults(req, resp), nil @@ -774,8 +785,9 @@ func (cs *ControlService) StorageScan(ctx context.Context, req *ctlpb.StorageSca } else { respNvme, err := scanBdevs(ctx, cs, req.Nvme, respScm.Namespaces) if err != nil { - return nil, err + return nil, errors.Wrap(err, "scan bdevs") } + cs.log.Debugf("scanBdevs returned respNvme: %+v", respNvme) resp.Nvme = respNvme } diff --git a/src/control/server/engine/exec.go b/src/control/server/engine/exec.go index c7abed7e2b4..c1efe634e3f 100644 --- a/src/control/server/engine/exec.go +++ b/src/control/server/engine/exec.go @@ -1,5 +1,6 @@ // // (C) Copyright 2019-2023 Intel Corporation. +// (C) Copyright 2025 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -161,21 +162,22 @@ func processLogEnvs(env []string) ([]string, error) { func (r *Runner) Start(ctx context.Context) (RunnerExitChan, error) { args, err := r.Config.CmdLineArgs() if err != nil { - return nil, err + return nil, errors.Wrap(err, "CmdLineArgs") } env, err := r.Config.CmdLineEnv() if err != nil { - return nil, err + return nil, errors.Wrap(err, "CmdLineEnv") } env = common.MergeKeyValues(cleanEnvVars(os.Environ(), r.Config.EnvPassThrough), env) env, err = processLogEnvs(env) if err != nil { + return nil, errors.Wrap(err, "processLogEnvs") return nil, err } exitCh := make(RunnerExitChan) - return exitCh, r.run(ctx, args, env, exitCh) + return exitCh, errors.Wrap(r.run(ctx, args, env, exitCh), "Runner run") } // IsRunning indicates whether the Runner process is running or not. diff --git a/src/control/server/instance_exec.go b/src/control/server/instance_exec.go index b153d1b09ac..d38ab1613fe 100644 --- a/src/control/server/instance_exec.go +++ b/src/control/server/instance_exec.go @@ -36,10 +36,10 @@ func (ei *EngineInstance) format(ctx context.Context) error { ei.log.Debugf("instance %d: checking if storage is formatted", idx) if err := ei.awaitStorageReady(ctx); err != nil { - return err + return errors.Wrap(err, "awaitStorageReady") } if err := ei.createSuperblock(); err != nil { - return err + return errors.Wrap(err, "createSuperblock") } if !ei.hasSuperblock() { @@ -50,7 +50,7 @@ func (ei *EngineInstance) format(ctx context.Context) error { // any callbacks that were waiting for this state. for _, readyFn := range ei.onStorageReady { if err := readyFn(ctx); err != nil { - return err + return errors.Wrap(err, "onStorageReady readyFn") } } @@ -82,10 +82,11 @@ func (ei *EngineInstance) start(ctx context.Context) (chan *engine.RunnerExitInf } if err := ei.initIncarnationFromSuperblock(); err != nil { - return nil, err + return nil, errors.Wrap(err, "initIncarnationFromSuperblock") } - return ei.runner.Start(ctx) + ch, err := ei.runner.Start(ctx) + return ch, errors.Wrap(err, "runner Start") } // waitReady awaits ready signal from I/O Engine before starting @@ -235,7 +236,7 @@ func (ei *EngineInstance) Run(ctx context.Context) { runnerExitCh, err = ei.startRunner(ctx) if err != nil { - ei.log.Errorf("runner exited without starting process: %s", err) + ei.log.Errorf("runner exited without starting process: %+v", err) ei.handleExit(ctx, 0, err) continue } diff --git a/src/control/server/instance_storage_rpc.go b/src/control/server/instance_storage_rpc.go index b5d482e31bf..c0a34a6c1f5 100644 --- a/src/control/server/instance_storage_rpc.go +++ b/src/control/server/instance_storage_rpc.go @@ -452,7 +452,13 @@ func bdevScanEngineAssigned(ctx context.Context, engine Engine, req *ctlpb.ScanN if !*isStarted { engine.Debugf("scanning engine-%d bdevs while engine is down", engine.Index()) - return bdevScanToProtoResp(engine.GetStorage().ScanBdevs, bdevCfgs) + resp, err := bdevScanToProtoResp(engine, engine.GetStorage().ScanBdevs, bdevCfgs) + if err != nil { + return nil, errors.Wrap(err, "bdevScanEngineAssigned: bdevScanToProtoResp") + } + + engine.Debugf("bdevScanEngineAssigned: bdevScanToProtoResp returned: %+v", resp) + return resp, err } engine.Debugf("scanning engine-%d bdevs while engine is up", engine.Index()) diff --git a/src/control/server/storage/bdev/provider.go b/src/control/server/storage/bdev/provider.go index d86c2611d09..d823dad185b 100644 --- a/src/control/server/storage/bdev/provider.go +++ b/src/control/server/storage/bdev/provider.go @@ -1,5 +1,6 @@ // // (C) Copyright 2019-2023 Intel Corporation. +// (C) Copyright 2025 Hewlett Packard Enterprise Development LP // (C) Copyright 2025 Google LLC // // SPDX-License-Identifier: BSD-2-Clause-Patent @@ -52,9 +53,12 @@ func NewProvider(log logging.Logger, backend Backend) *Provider { // Scan calls into the backend to discover NVMe components in the // system. -func (p *Provider) Scan(req storage.BdevScanRequest) (resp *storage.BdevScanResponse, err error) { +func (p *Provider) Scan(req storage.BdevScanRequest) (*storage.BdevScanResponse, error) { p.log.Debugf("run bdev storage provider scan, req: %+v", req) - return p.backend.Scan(req) + resp, err := p.backend.Scan(req) + p.log.Debugf("run bdev storage provider scan, resp: %+v", resp) + + return resp, err } // Prepare attempts to perform all actions necessary to make NVMe components diff --git a/src/dtx/tests/SConscript b/src/dtx/tests/SConscript index 2ea2e93eec5..0367747d76d 100644 --- a/src/dtx/tests/SConscript +++ b/src/dtx/tests/SConscript @@ -7,7 +7,7 @@ def scons(): # build dtx_tests - libraries = ['abt', 'bio', 'dtx', 'vos', 'gurt', 'daos_common_pmem', 'cmocka', 'pthread', + libraries = ['abt', 'bio', 'dtx', 'vos', 'ssl', 'gurt', 'daos_common_pmem', 'cmocka', 'pthread', 'uuid', 'cart', 'daos_tests'] tenv = denv.Clone() @@ -36,8 +36,8 @@ def scons(): # build dtx_ut - libraries = ['abt', 'bio', 'cmocka', 'daos_common_pmem', 'gurt', 'uuid', 'vea', 'pthread', - 'pmemobj'] + libraries = ['abt', 'bio', 'ssl', 'cmocka', 'daos_common_pmem', 'gurt', 'uuid', 'vea', + 'pthread', 'pmemobj'] tenv = denv.Clone() tenv.Append(CPPPATH=[Dir('../../vos').srcnode()]) diff --git a/src/engine/SConscript b/src/engine/SConscript index 06c0e2bfef8..434ba4b1def 100644 --- a/src/engine/SConscript +++ b/src/engine/SConscript @@ -13,7 +13,7 @@ def scons(): denv.AppendUnique(CPPPATH=[Dir('..').srcnode()]) denv.Append(CPPDEFINES=['-DDAOS_PMEM_BUILD']) libraries = ['daos_common_pmem', 'gurt', 'cart', 'vos_srv'] - libraries += ['bio', 'dl', 'uuid', 'pthread', 'abt'] + libraries += ['bio', 'ssl', 'dl', 'uuid', 'pthread', 'abt'] libraries += ['hwloc', 'pmemobj', 'protobuf-c', 'isal', 'numa'] denv.require('argobots', 'protobufc', 'pmdk', 'isal') diff --git a/src/mgmt/tests/SConscript b/src/mgmt/tests/SConscript index b3418e20b5c..bbf5638355c 100644 --- a/src/mgmt/tests/SConscript +++ b/src/mgmt/tests/SConscript @@ -12,7 +12,8 @@ def scons(): denv.AppendUnique(RPATH_FULL=['$PREFIX/lib64/daos_srv']) denv.d_test_program('srv_drpc_tests', source=[pb_objs, mocks, 'srv_drpc_tests.c', '../srv_drpc.c'], - LIBS=['cmocka', 'protobuf-c', 'daos_common_pmem', 'gurt', 'uuid', 'bio']) + LIBS=['cmocka', 'protobuf-c', 'daos_common_pmem', 'gurt', 'uuid', 'bio', + 'ssl']) if __name__ == "SCons.Script": diff --git a/src/object/tests/SConscript b/src/object/tests/SConscript index 74c0f41da23..56fc8a015d0 100644 --- a/src/object/tests/SConscript +++ b/src/object/tests/SConscript @@ -14,7 +14,7 @@ def scons(): unit_env.Append(CPPDEFINES=['-DDAOS_PMEM_BUILD']) unit_env.d_test_program(['srv_checksum_tests.c', '../srv_csum.c'], LIBS=['daos_common_pmem', 'gurt', 'cmocka', - 'vos', 'bio', 'abt']) + 'vos', 'bio', 'ssl', 'abt']) unit_env.d_test_program(['cli_checksum_tests.c', '../cli_csum.c', diff --git a/src/tests/SConscript b/src/tests/SConscript index 6467ef54b4e..be22f9a0bd2 100644 --- a/src/tests/SConscript +++ b/src/tests/SConscript @@ -38,7 +38,7 @@ def build_tests(env, prereqs): tenv = denv.Clone() tenv.require('argobots', 'pmdk') - libs_server += ['vos', 'bio', 'abt', 'numa'] + libs_server += ['vos', 'bio', 'ssl', 'abt', 'numa'] vos_engine = tenv.StaticObject(['vos_engine.c']) vos_perf = tenv.d_program('vos_perf', diff --git a/src/utils/ddb/SConscript b/src/utils/ddb/SConscript index 52861eca39c..df4ce966d14 100644 --- a/src/utils/ddb/SConscript +++ b/src/utils/ddb/SConscript @@ -8,7 +8,7 @@ def scons(): denv = env.Clone() denv.Append(OBJPREFIX="ddb_") - libs = ['vos', 'daos_common_pmem', 'abt', 'gurt', 'uuid', 'bio', 'cart'] + libs = ['vos', 'daos_common_pmem', 'abt', 'gurt', 'uuid', 'bio', 'ssl', 'cart'] # spdk libraries libs += ['spdk_event', 'spdk_log'] libs += ['spdk_bdev', 'spdk_blob', 'spdk_blob_bdev', 'spdk_json'] diff --git a/src/vos/tests/SConscript b/src/vos/tests/SConscript index e5b6f72ecc7..0f61277fafe 100644 --- a/src/vos/tests/SConscript +++ b/src/vos/tests/SConscript @@ -14,7 +14,7 @@ def scons(): vts_objs = senv.StaticObject(vts_src) Export('vts_objs') - libraries = ['vos', 'bio', 'abt', 'pthread', 'daos_common_pmem', + libraries = ['vos', 'bio', 'ssl', 'abt', 'pthread', 'daos_common_pmem', 'daos_tests', 'gurt', 'uuid', 'pthread', 'pmemobj', 'cmocka', 'gomp'] @@ -42,7 +42,7 @@ def scons(): unit_env.AppendUnique(RPATH_FULL=['$PREFIX/lib64/daos_srv']) libraries = ['daos_common_pmem', 'daos_tests', 'gurt', 'cart', 'cmocka', - 'vos', 'uuid', 'pmem', 'pmemobj', 'bio', 'pthread', 'abt'] + 'vos', 'uuid', 'pmem', 'pmemobj', 'bio', 'ssl', 'pthread', 'abt'] unit_env.d_test_program('pool_scrubbing_tests', ['pool_scrubbing_tests.c', '../vos_pool_scrub.c'], LIBS=libraries) @@ -51,7 +51,8 @@ def scons(): tenv.AppendUnique(RPATH_FULL=['$PREFIX/lib64/daos_srv']) tenv.Append(CPPDEFINES={'VOS_STANDALONE': '1'}) - libraries = ['uuid', 'bio', 'gurt', 'cmocka', 'daos_common_pmem', 'daos_tests', 'vos', 'abt'] + libraries = ['uuid', 'bio', 'gurt', 'cmocka', 'daos_common_pmem', 'daos_tests', 'vos', 'abt', + 'ssl'] tenv.require('spdk') bio_ut_src = ['bio_ut.c', 'wal_ut.c'] diff --git a/utils/build.config b/utils/build.config index 735a160be05..c36fbd2a043 100644 --- a/utils/build.config +++ b/utils/build.config @@ -7,7 +7,7 @@ fused=v1.0.0 pmdk=2.1.2 isal=v2.31.1 isal_crypto=v2.25.0 -spdk=v22.01.2 +spdk=v24.09 ofi=v1.22.0 mercury=v2.4.0 protobufc=v1.3.3 @@ -26,6 +26,6 @@ protobufc=https://github.com/protobuf-c/protobuf-c.git ucx=https://github.com/openucx/ucx.git [patch_versions] -spdk=0001_b0aba3fcd5aceceea530a702922153bc75664978.diff,0002_445a4c808badbad3942696ecf16fa60e8129a747.diff +spdk=0001_3428322b812fe31cc3e1d0308a7f5bd4b06b9886.diff,0002_spdk_rwf_nowait.patch,0003_external_isal.patch mercury=0001_na_ucx.patch,0002_na_ucx_ep_flush.patch,0003_combined_plugin_path.patch argobots=0001_411e5b344642ebc82190fd8b125db512e5b449d1.diff,0002_bb0c908abfac4bfe37852eee621930634183c6aa.diff diff --git a/utils/rpms/daos.changelog b/utils/rpms/daos.changelog index 312526fc063..d255dac085d 100644 --- a/utils/rpms/daos.changelog +++ b/utils/rpms/daos.changelog @@ -1,4 +1,8 @@ %changelog +* Tue Nov 25 2025 Jeff Olivier 2.7.102-3 +- Upgrade spdk +- Add some missing dependencies numactl and pcituils + * Mon Nov 24 2025 Jeff Olivier 2.7.102-2 - Require isal_crypto 2.25.0 due to API deprecation warnings diff --git a/utils/rpms/daos.sh b/utils/rpms/daos.sh index 6a79e0ccfaa..8a28b2a187d 100755 --- a/utils/rpms/daos.sh +++ b/utils/rpms/daos.sh @@ -181,7 +181,7 @@ EOF DEPENDS=( "daos = ${VERSION}-${RELEASE}" "daos-spdk = ${daos_spdk_full}" ) DEPENDS+=( "${pmemobj_lib} >= ${pmdk_full}" "${argobots_lib} >= ${argobots_full}" ) - DEPENDS+=( "${isal_crypto_lib} >= ${isal_crypto_version}" ) + DEPENDS+=( "${isal_crypto_lib} >= ${isal_crypto_version}" "numactl" "pciutils" ) build_package "daos-server" TARGET_PATH="${bindir}" diff --git a/utils/rpms/daos.spec b/utils/rpms/daos.spec index 2f54a5d9624..3bb3d4a7a02 100644 --- a/utils/rpms/daos.spec +++ b/utils/rpms/daos.spec @@ -25,7 +25,7 @@ Name: daos Version: 2.7.102 -Release: 2%{?relval}%{?dist} +Release: 3%{?relval}%{?dist} Summary: DAOS Storage Engine License: BSD-2-Clause-Patent diff --git a/utils/rpms/package_info.sh b/utils/rpms/package_info.sh index 0d31aa6ec60..cc3be377607 100644 --- a/utils/rpms/package_info.sh +++ b/utils/rpms/package_info.sh @@ -1,4 +1,5 @@ #!/bin/bash +# (C) Copyright 2025 Google LLC root="$(realpath "$(dirname "$(dirname "$(dirname "${BASH_SOURCE[0]}")")")")" set_lib_name() { comp="$1"; shift @@ -52,8 +53,8 @@ export isal_full="${isal_version}-${isal_release}" export isal_crypto_version="2.25.0" export isal_crypto_release="1${distro_name}" export isal_crypto_full="${isal_crypto_version}-${isal_crypto_release}" -export daos_spdk_version="1.0.0" -export daos_spdk_release="4${distro_name}" +export daos_spdk_version="2.0.0" +export daos_spdk_release="1${distro_name}" export daos_spdk_full="${daos_spdk_version}-${daos_spdk_release}" export fused_version="1.0.0" export fused_release="3${distro_name}" diff --git a/utils/rpms/spdk.changelog b/utils/rpms/spdk.changelog new file mode 100644 index 00000000000..181aa36bc16 --- /dev/null +++ b/utils/rpms/spdk.changelog @@ -0,0 +1,6 @@ +* Tue Nov 25 2025 Jeff Olivier - 2.0.0-1 +- Upgrade to SPDK 24.09. +- Restore missing changelog + +* Wed Sep 10 2025 Jeff Olivier - 1.0.0-1 +- Switch to daos-spdk package for spdk, deprecates old spdk diff --git a/utils/rpms/spdk.sh b/utils/rpms/spdk.sh index 2cff9fd4138..d041511ea42 100755 --- a/utils/rpms/spdk.sh +++ b/utils/rpms/spdk.sh @@ -17,6 +17,7 @@ DESCRIPTION="The Storage Performance Development Kit provides a set of tools and libraries for writing high performance, scalable, user-mode storage applications." URL="https://spdk.io" +RPM_CHANGELOG="spdk.changelog" files=() TARGET_PATH="${bindir}" @@ -35,8 +36,8 @@ list_files files "${SL_SPDK_PREFIX}/lib64/daos_srv/libspdk*.so.*" \ clean_bin "${files[@]}" append_install_list "${files[@]}" -TARGET_PATH="${libdir}/daos_srv/dpdk/pmds-22.0" -list_files files "${SL_SPDK_PREFIX}/lib64/daos_srv/dpdk/pmds-22.0/lib*.so.*" +TARGET_PATH="${libdir}/daos_srv/dpdk/pmds-24.1" +list_files files "${SL_SPDK_PREFIX}/lib64/daos_srv/dpdk/pmds-24.1/lib*.so.*" clean_bin "${files[@]}" append_install_list "${files[@]}" @@ -65,8 +66,8 @@ list_files files "${SL_SPDK_PREFIX}/lib64/pkgconfig/daos_spdk.pc" replace_paths "${SL_SPDK_PREFIX}" "${files[@]}" append_install_list "${files[@]}" -TARGET_PATH="${libdir}/daos_srv/dpdk/pmds-22.0" -list_files files "${SL_SPDK_PREFIX}/lib64/daos_srv/dpdk/pmds-22.0/lib*.so" +TARGET_PATH="${libdir}/daos_srv/dpdk/pmds-24.1" +list_files files "${SL_SPDK_PREFIX}/lib64/daos_srv/dpdk/pmds-24.1/lib*.so" append_install_list "${files[@]}" TARGET_PATH="${includedir}/daos_srv/spdk" diff --git a/utils/run_utest.py b/utils/run_utest.py index a555e9f8203..39261be2a51 100755 --- a/utils/run_utest.py +++ b/utils/run_utest.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 """ Copyright 2023-2024 Intel Corporation. + Copyright 2025 Google LLC Copyright 2025 Hewlett Packard Enterprise Development LP All rights reserved. @@ -270,7 +271,7 @@ def create_config(self, name): }}, {{ "params": {{ - "retry_count": 4, + "bdev_retry_count": 4, "timeout_us": 0, "nvme_adminq_poll_period_us": 100000, "action_on_timeout": "none", diff --git a/utils/scripts/install-el8.sh b/utils/scripts/install-el8.sh index 5b1239dc094..670ef8f6eef 100755 --- a/utils/scripts/install-el8.sh +++ b/utils/scripts/install-el8.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# (C) Copyright 2025 Google LLC # Install OS updates and packages as required for building DAOS on EL 8 and # derivatives. Include basic tools and daos dependencies that come from the core repos. @@ -80,6 +81,7 @@ dnf --nodocs install ${dnf_install_args} \ systemd \ valgrind-devel \ which \ + ncurses-devel \ yasm if [[ -z "${NO_OPENMPI_DEVEL+set}" ]]; then diff --git a/utils/scripts/install-el9.sh b/utils/scripts/install-el9.sh index 355272f1c63..a9234ff7cba 100755 --- a/utils/scripts/install-el9.sh +++ b/utils/scripts/install-el9.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# (C) Copyright 2025 Google LLC # Install OS updates and packages as required for building DAOS on EL 9 and # derivatives. Include basic tools and daos dependencies that come from the core repos. @@ -80,6 +81,7 @@ dnf --nodocs install ${dnf_install_args} \ sudo \ valgrind-devel \ which \ + ncurses-devel \ yasm ruby_version=$(dnf module list ruby | grep -Eow "3\.[0-9]+" | tail -1) diff --git a/utils/scripts/install-leap15.sh b/utils/scripts/install-leap15.sh index 87447d05695..5029eb1000a 100755 --- a/utils/scripts/install-leap15.sh +++ b/utils/scripts/install-leap15.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# (C) Copyright 2025 Google LLC # Install OS updates and package. Include basic tools and daos dependencies # that come from the core repo. diff --git a/utils/scripts/install-ubuntu.sh b/utils/scripts/install-ubuntu.sh index 0c4dbae59e6..0db922daeb7 100755 --- a/utils/scripts/install-ubuntu.sh +++ b/utils/scripts/install-ubuntu.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# (C) Copyright 2025 Google LLC # Install OS updates and package. Include basic tools and daos dependencies # that come from the core repo. @@ -66,6 +67,7 @@ apt-get install ${apt_get_install_args} \ sudo \ uuid-dev \ valgrind \ + libncurses-dev \ yasm sudo gem install fpm diff --git a/utils/test_memcheck.supp b/utils/test_memcheck.supp index a4a59101891..0d41b6e9a3d 100644 --- a/utils/test_memcheck.supp +++ b/utils/test_memcheck.supp @@ -404,6 +404,14 @@ ... fun:bdev_aio_writev } +{ + bdev_aio_rw param error + Memcheck:Param + io_submit(PWRITEV(iov[i])) + fun:syscall + ... + fun:bdev_aio_rw +} { Memcheck:Param From 652bbd2d9ed411fbb1a412002bdde036ba267239 Mon Sep 17 00:00:00 2001 From: sherintg Date: Mon, 1 Dec 2025 19:14:56 +0530 Subject: [PATCH 043/253] DAOS-18195 common: DAV zone chunks metadata corruption (#17168) * DAOS-18195 common: DAV zone chunks metadata corruption Fixed rare corruption of DAV VOS heap chunk metadata during engine restart. Without this fix, the corruption could cause future allocation/free operations to abort the engine with an assert indicating the chunk metadata corruption. Signed-off-by: Sherin T George --- src/common/dav/heap.c | 5 ++++- src/common/dav/memblock.c | 4 ++-- src/common/dav_v2/heap.c | 25 ++++++++++++++++++++++++- src/common/dav_v2/heap.h | 3 +++ src/common/dav_v2/memblock.c | 14 ++++++++++---- 5 files changed, 43 insertions(+), 8 deletions(-) diff --git a/src/common/dav/heap.c b/src/common/dav/heap.c index ee2feca85a1..89f0ecf15c6 100644 --- a/src/common/dav/heap.c +++ b/src/common/dav/heap.c @@ -816,7 +816,10 @@ heap_reclaim_zone_garbage(struct palloc_heap *heap, struct bucket *bucket, case CHUNK_TYPE_USED: break; default: - ASSERT(0); + D_ASSERTF(0, + "Encountered invalid chunk (%" PRIu32 ") of type %" PRIu16 + " val = 0x%" PRIx64, + i, hdr->type, *(uint64_t *)hdr); } i = m.chunk_id + m.size_idx; /* hdr might have changed */ diff --git a/src/common/dav/memblock.c b/src/common/dav/memblock.c index 9600e49c46c..4fa51f5e502 100644 --- a/src/common/dav/memblock.c +++ b/src/common/dav/memblock.c @@ -1233,8 +1233,8 @@ huge_reinit_chunk(const struct memory_block *m) { struct chunk_header *hdr = heap_get_chunk_hdr(m->heap, m); - if (hdr->type == CHUNK_TYPE_USED) - huge_write_footer(hdr, hdr->size_idx); + D_ASSERT((hdr->type == CHUNK_TYPE_USED) || (hdr->type == CHUNK_TYPE_FREE)); + huge_write_footer(hdr, hdr->size_idx); } /* diff --git a/src/common/dav_v2/heap.c b/src/common/dav_v2/heap.c index 29a7ce17c9c..c40e708c1e9 100644 --- a/src/common/dav_v2/heap.c +++ b/src/common/dav_v2/heap.c @@ -703,6 +703,26 @@ heap_mbrt_log_alloc_failure(struct palloc_heap *heap, uint32_t zone_id) } } +/* + * heap_touch_umem_cache -- touch the cache page for a memory address + * if the memory bucket is evictable + */ +void +heap_touch_umem_cache(struct palloc_heap *heap, void *addr, size_t size) +{ + uint64_t offset = HEAP_PTR_TO_OFF(heap, addr); + uint32_t zone_id = OFFSET_TO_ZID(offset); + struct mbrt *mb = heap_mbrt_get_mb(heap, zone_id); + dav_obj_t *dav_hdl = (dav_obj_t *)heap->p_ops.base; + + D_ASSERT((mb != NULL) && (dav_hdl != NULL) && (dav_hdl->do_utx != NULL)); + + if (!mb->is_evictable) + return; + + umem_cache_touch(dav_hdl->do_store, dav_hdl->do_utx->utx_id, offset, size); +} + void heap_mbrt_setmb_usage(struct palloc_heap *heap, uint32_t zone_id, uint64_t usage) { @@ -1312,7 +1332,10 @@ heap_reclaim_zone_garbage(struct palloc_heap *heap, struct bucket *bucket, case CHUNK_TYPE_USED: break; default: - ASSERT(0); + D_ASSERTF(0, + "Encountered invalid chunk (%" PRIu32 ") of type %" PRIu16 + " val = 0x%" PRIx64, + i, hdr->type, *(uint64_t *)hdr); } i = m.chunk_id + m.size_idx; /* hdr might have changed */ diff --git a/src/common/dav_v2/heap.h b/src/common/dav_v2/heap.h index 8a49934b5c4..c01aba37dcb 100644 --- a/src/common/dav_v2/heap.h +++ b/src/common/dav_v2/heap.h @@ -139,6 +139,9 @@ heap_mbrt_get_mb(struct palloc_heap *heap, uint32_t zone_id); void heap_mbrt_log_alloc_failure(struct palloc_heap *heap, uint32_t zone_id); +void +heap_touch_umem_cache(struct palloc_heap *heap, void *addr, size_t size); + int heap_get_evictable_mb(struct palloc_heap *heap, uint32_t *zone_id); diff --git a/src/common/dav_v2/memblock.c b/src/common/dav_v2/memblock.c index 402b79a4df9..6c544100ea2 100644 --- a/src/common/dav_v2/memblock.c +++ b/src/common/dav_v2/memblock.c @@ -680,14 +680,20 @@ huge_prep_operation_hdr(const struct memory_block *m, enum memblock_state op, * The footer entry change is updated as transient because it will * be recreated at heap boot regardless - it's just needed for runtime * operations. + * Note: + * If a footer is added as part of a tx, creating a transient entry + * and marking the page as dirty at commit time does not justify the + * added complexity and occurs less frequently. Therefore, for now, + * we commit footer in the same way as header when called under a tx. */ + if (ctx == NULL) { util_atomic_store_explicit64((uint64_t *)footer, val, memory_order_relaxed); + heap_touch_umem_cache(m->heap, footer, sizeof(*footer)); VALGRIND_SET_CLEAN(footer, sizeof(*footer)); } else { - operation_add_typed_entry(ctx, - footer, val, ULOG_OPERATION_SET, LOG_TRANSIENT); + operation_add_entry(ctx, footer, val, ULOG_OPERATION_SET); } } @@ -1234,8 +1240,8 @@ huge_reinit_chunk(const struct memory_block *m) { struct chunk_header *hdr = heap_get_chunk_hdr(m->heap, m); - if (hdr->type == CHUNK_TYPE_USED) - huge_write_footer(hdr, hdr->size_idx); + D_ASSERT((hdr->type == CHUNK_TYPE_USED) || (hdr->type == CHUNK_TYPE_FREE)); + huge_write_footer(hdr, hdr->size_idx); } /* From dbea8adc7c6bad2a569830bf1fac4749e1fb3b3f Mon Sep 17 00:00:00 2001 From: Dalton Bohning Date: Mon, 1 Dec 2025 09:35:07 -0800 Subject: [PATCH 044/253] DAOS-623 test: add --allowerasing option to install_packages (#17162) add --allowerasing option to install_packages. Use this option in interop testing. Signed-off-by: Dalton Bohning --- .../ftest/interoperability/upgrade_downgrade_base.py | 6 ++++-- src/tests/ftest/util/package_utils.py | 9 +++++++-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/tests/ftest/interoperability/upgrade_downgrade_base.py b/src/tests/ftest/interoperability/upgrade_downgrade_base.py index 5cfd839409a..e1d3777edf8 100644 --- a/src/tests/ftest/interoperability/upgrade_downgrade_base.py +++ b/src/tests/ftest/interoperability/upgrade_downgrade_base.py @@ -261,7 +261,8 @@ def install_daos(self, version, servers, clients): if servers: self.log.info("Installing version %s on servers, %s", version, servers) - if not install_packages(self.log, servers, server_packages, 'root').passed: + result = install_packages(self.log, servers, server_packages, 'root', allowerasing=True) + if not result.passed: self.fail(f"Failed to install version {version} on servers") self.current_server_version = Version(version) result = run_remote(self.log, NodeSet(servers[0]), 'dmg version') @@ -274,7 +275,8 @@ def install_daos(self, version, servers, clients): # Install on clients if clients: self.log.info("Installing version %s on clients, %s", version, clients) - if not install_packages(self.log, clients, client_packages, 'root').passed: + result = install_packages(self.log, clients, client_packages, 'root', allowerasing=True) + if not result.passed: self.fail(f"Failed to install version {version} on clients") self.current_client_version = Version(version) result = run_remote(self.log, clients, 'daos version') diff --git a/src/tests/ftest/util/package_utils.py b/src/tests/ftest/util/package_utils.py index af18623e332..634311855e6 100644 --- a/src/tests/ftest/util/package_utils.py +++ b/src/tests/ftest/util/package_utils.py @@ -32,7 +32,7 @@ def find_packages(log, hosts, pattern, user=None): return installed -def install_packages(log, hosts, packages, user=None, timeout=600): +def install_packages(log, hosts, packages, user=None, timeout=600, allowerasing=False): """Install the packages on the hosts. Args: @@ -41,12 +41,17 @@ def install_packages(log, hosts, packages, user=None, timeout=600): packages (list): a list of packages to install user (str, optional): user to use when installing the packages. Defaults to None. timeout (int, optional): timeout for the dnf install command. Defaults to 600. + allowerasing (bool, optional): whether to use dnf --allowerasing. Defaults to False. Returns: CommandResult: the 'dnf install' command results """ log.info('Installing packages on %s: %s', hosts, ', '.join(packages)) - command = command_as_user(' '.join(['dnf', 'install', '-y'] + packages), user) + command = ['dnf', 'install', '-y'] + if allowerasing: + command.append('--allowerasing') + command.extend(packages) + command = command_as_user(' '.join(command), user) return run_remote(log, hosts, command, timeout=timeout) From 585301057570e4670c2489bcc60e125fa8e29a70 Mon Sep 17 00:00:00 2001 From: Tom Nabarro Date: Mon, 1 Dec 2025 21:30:17 +0000 Subject: [PATCH 045/253] DAOS-18287 doc: ddb prov_mem command usage and examples (#17196) Signed-off-by: Tom Nabarro --- src/utils/ddb/README.md | 46 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/src/utils/ddb/README.md b/src/utils/ddb/README.md index 58ceabf9098..d0355115005 100644 --- a/src/utils/ddb/README.md +++ b/src/utils/ddb/README.md @@ -100,8 +100,54 @@ Commands: load Load a value to a vos path. ls List containers, objects, dkeys, akeys, and values open Opens the vos file at + prov_mem Prepare memory environment for md-on-ssd mode rm Remove a branch of the VOS tree. rm_ilog Remove all the ilog entries smd_sync Restore the SMD file with backup from blob update_vea Alter the VEA tree to mark a region as free. +``` + +## prov_mem Command + +The `prov_mem` command prepares the memory environment for md-on-ssd mode by setting up a tmpfs mount and recreating VOS files on it. + +### Usage + +``` +prov_mem [Options] +``` + +### Arguments + +- `` - Path to the sys db +- `` - Path to the tmpfs mountpoint + +### Options + +- `-s, --tmpfs_size` - Specify tmpfs size in GiB for tmpfs_mount. By default, the total size of all VOS files will be automatically calculated and used. + +### Description + +This command is used when working with DAOS in md-on-ssd (metadata-on-SSD) mode. It: + +1. Verifies the system is running in MD-on-SSD mode +2. Creates a tmpfs mount at the specified path (if not already mounted) +3. Sets up the necessary directory structure +4. Recreates VOS pool target files on the tmpfs mount + +### Examples + +```bash +# Prepare memory environment with auto-calculated tmpfs size +ddb -R "prov_mem /path/to/sys/db /mnt/tmpfs" + +# Prepare memory environment with specific tmpfs size of 16 GiB +ddb -R "prov_mem -s 16 /path/to/sys/db /mnt/tmpfs" +``` + +### Notes + +- The tmpfs_mount path must not already be a mountpoint, otherwise the command will fail with a busy error +- If tmpfs_size is not specified, the size will be automatically calculated based on the total size of all VOS files +- This command requires the system to be configured for MD-on-SSD mode ``` \ No newline at end of file From b62718c6496f50dc0341b435350e96e5d82d63aa Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Tue, 2 Dec 2025 14:42:18 +0800 Subject: [PATCH 046/253] DAOS-18240 container: Fix infinite blocking in cont_track_eph_leader_ult (#17181) * DAOS-18240 container: Fix infinite blocking in cont_track_eph_leader_ult When a DAOS cluster experiences a network outage, one rank may be marked as dead by SWIM. Simultaneously, the pool service leader may lose its majority and leadership. During this transition, the same rank might still be selected as the new leader. When this occurs, the pool service step-down procedure must be called, which waits for the cont_track_eph_leader_ult to exit. However, this ULT could block indefinitely during exit. The cont_track_eph_leader_ult() ULT may block forever when attempting to exit because IV operations retry indefinitely (due to the dead rank) without checking whether the ULT should exit. This issue prevents the pool service leader from starting properly, which also causes pool exclusion and rebuild processes to fail. Consequently, pool operations may hang. The problem is fixed by moving the retry loop outside of IV and breaking the loop if the ULT needs to exit early. Signed-off-by: Wang Shilong --- src/container/container_iv.c | 56 ++++++++++++++++++++++++++++------- src/container/srv_container.c | 7 ++++- src/container/srv_internal.h | 10 ++++--- src/container/srv_target.c | 5 ++-- 4 files changed, 60 insertions(+), 18 deletions(-) diff --git a/src/container/container_iv.c b/src/container/container_iv.c index 0ac8ae9737f..2f7719747f2 100644 --- a/src/container/container_iv.c +++ b/src/container/container_iv.c @@ -1115,6 +1115,12 @@ cont_iv_hdl_fetch(uuid_t cont_hdl_uuid, uuid_t pool_uuid, return rc; } +static inline bool +cont_iv_retryable_error(int rc) +{ + return daos_rpc_retryable_rc(rc) || rc == -DER_NOTLEADER || rc == -DER_BUSY; +} + static int cont_iv_track_eph_update_internal(void *ns, uuid_t cont_uuid, daos_epoch_t ec_agg_eph, daos_epoch_t stable_eph, unsigned int shortcut, @@ -1135,30 +1141,58 @@ cont_iv_track_eph_update_internal(void *ns, uuid_t cont_uuid, daos_epoch_t ec_ag return rc; } - rc = cont_iv_update(ns, op, cont_uuid, &iv_entry, sizeof(iv_entry), - shortcut, sync_mode, true /* retry */); - if (rc) + rc = cont_iv_update(ns, op, cont_uuid, &iv_entry, sizeof(iv_entry), shortcut, sync_mode, + false); + if (rc && !cont_iv_retryable_error(rc)) D_ERROR(DF_UUID" op %d, cont_iv_update failed "DF_RC"\n", DP_UUID(cont_uuid), op, DP_RC(rc)); return rc; } +static int +cont_iv_track_eph_retry(void *ns, uuid_t cont_uuid, daos_epoch_t ec_agg_eph, + daos_epoch_t stable_eph, unsigned int shortcut, unsigned int sync_mode, + uint32_t op, struct sched_request *req) +{ + int sleep_ms = 1000; /* 1 second retry interval */ + int rc = 0; + + while (1) { + rc = cont_iv_track_eph_update_internal(ns, cont_uuid, ec_agg_eph, stable_eph, + shortcut, sync_mode, op); + if (rc == 0) + break; + + /* Only retry on specific errors */ + if (!cont_iv_retryable_error(rc)) + break; + + if (req && dss_ult_exiting(req)) { + rc = -DER_SHUTDOWN; + break; + } + + dss_sleep(sleep_ms); + } + + return rc; +} + int cont_iv_track_eph_update(void *ns, uuid_t cont_uuid, daos_epoch_t ec_agg_eph, - daos_epoch_t stable_eph) + daos_epoch_t stable_eph, struct sched_request *req) { - return cont_iv_track_eph_update_internal(ns, cont_uuid, ec_agg_eph, stable_eph, - CRT_IV_SHORTCUT_TO_ROOT, - CRT_IV_SYNC_NONE, - IV_CONT_TRACK_EPOCH_REPORT); + return cont_iv_track_eph_retry(ns, cont_uuid, ec_agg_eph, stable_eph, + CRT_IV_SHORTCUT_TO_ROOT, CRT_IV_SYNC_NONE, + IV_CONT_TRACK_EPOCH_REPORT, req); } int cont_iv_track_eph_refresh(void *ns, uuid_t cont_uuid, daos_epoch_t ec_agg_eph, - daos_epoch_t stable_eph) + daos_epoch_t stable_eph, struct sched_request *req) { - return cont_iv_track_eph_update_internal(ns, cont_uuid, ec_agg_eph, stable_eph, 0, - CRT_IV_SYNC_EAGER, IV_CONT_TRACK_EPOCH); + return cont_iv_track_eph_retry(ns, cont_uuid, ec_agg_eph, stable_eph, 0, CRT_IV_SYNC_EAGER, + IV_CONT_TRACK_EPOCH, req); } int diff --git a/src/container/srv_container.c b/src/container/srv_container.c index a9c5a9fe354..abd487599a8 100644 --- a/src/container/srv_container.c +++ b/src/container/srv_container.c @@ -2159,12 +2159,17 @@ cont_agg_eph_sync(struct ds_pool *pool, struct cont_svc *svc) } rc = cont_iv_track_eph_refresh(pool->sp_iv_ns, eph_ldr->cte_cont_uuid, - min_ec_agg_eph, min_stable_eph); + min_ec_agg_eph, min_stable_eph, + svc->cs_cont_ephs_leader_req); if (rc) { DL_CDEBUG(rc == -DER_NONEXIST, DLOG_INFO, DLOG_ERR, rc, DF_CONT ": refresh failed", DP_CONT(svc->cs_pool_uuid, eph_ldr->cte_cont_uuid)); + /* If ULT is exiting, break out */ + if (rc == -DER_SHUTDOWN) + break; + /* If there are network error or pool map inconsistency, * let's skip the following eph sync, which will fail * anyway. diff --git a/src/container/srv_internal.h b/src/container/srv_internal.h index c8a676c5b80..469a671ffb0 100644 --- a/src/container/srv_internal.h +++ b/src/container/srv_internal.h @@ -301,10 +301,12 @@ int cont_iv_prop_update(void *ns, uuid_t cont_uuid, daos_prop_t *prop, bool sync int cont_iv_snapshots_refresh(void *ns, uuid_t cont_uuid); int cont_iv_snapshots_update(void *ns, uuid_t cont_uuid, uint64_t *snapshots, int snap_count); -int cont_iv_track_eph_update(void *ns, uuid_t cont_uuid, daos_epoch_t ec_agg_eph, - daos_epoch_t stable_eph); -int cont_iv_track_eph_refresh(void *ns, uuid_t cont_uuid, daos_epoch_t ec_agg_eph, - daos_epoch_t stable_eph); +int +cont_iv_track_eph_update(void *ns, uuid_t cont_uuid, daos_epoch_t ec_agg_eph, + daos_epoch_t stable_eph, struct sched_request *req); +int + cont_iv_track_eph_refresh(void *ns, uuid_t cont_uuid, daos_epoch_t ec_agg_eph, + daos_epoch_t stable_eph, struct sched_request *req); int cont_iv_entry_delete(void *ns, uuid_t pool_uuid, uuid_t cont_uuid); /* srv_metrics.c*/ diff --git a/src/container/srv_target.c b/src/container/srv_target.c index fdf3a915f04..a382f46e7b4 100644 --- a/src/container/srv_target.c +++ b/src/container/srv_target.c @@ -2829,8 +2829,9 @@ ds_cont_eph_report(struct ds_pool *pool) D_DEBUG(DB_MD, "Update ec_agg_eph " DF_X64 ", stable_eph " DF_X64 ", " DF_UUID "\n", min_ec_agg_eph, min_stable_eph, DP_UUID(ec_eph->cte_cont_uuid)); - ret = cont_iv_track_eph_update(pool->sp_iv_ns, ec_eph->cte_cont_uuid, - min_ec_agg_eph, min_stable_eph); + ret = + cont_iv_track_eph_update(pool->sp_iv_ns, ec_eph->cte_cont_uuid, min_ec_agg_eph, + min_stable_eph, pool->sp_ec_ephs_req); if (ret == 0) { ec_eph->cte_last_ec_agg_epoch = min_ec_agg_eph; ec_eph->cte_last_stable_epoch = min_stable_eph; From 54073be99c1f3b944705f4097281bf8d492a7e01 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Tue, 2 Dec 2025 21:39:08 +0800 Subject: [PATCH 047/253] DAOS-17928 rebuild: refine migration fixes (#17120) * DAOS-17928 rebuild: refine migration fixes - Rename ds_pool_migrate_arg fields with pma_ prefix for better grep - Remove duplicate pool check in ds_migrate_end_ult - Make sp_rebuilding atomic using ATOMIC - Simplify sp_rebuilding increment logic after lookup. - Use deep stack size for iv ult. - Fix to return real error if migration failed. Signed-off-by: Wang Shilong --- src/container/srv_container.c | 2 +- src/container/srv_target.c | 4 +- src/include/daos_srv/pool.h | 5 +- src/object/srv_ec_aggregate.c | 16 ++--- src/object/srv_obj.c | 6 +- src/object/srv_obj_migrate.c | 118 ++++++++++++++++++---------------- src/rebuild/scan.c | 9 +-- src/rebuild/srv.c | 4 +- 8 files changed, 86 insertions(+), 78 deletions(-) diff --git a/src/container/srv_container.c b/src/container/srv_container.c index abd487599a8..582c5f97352 100644 --- a/src/container/srv_container.c +++ b/src/container/srv_container.c @@ -2184,7 +2184,7 @@ cont_agg_eph_sync(struct ds_pool *pool, struct cont_svc *svc) } eph_ldr->cte_current_ec_agg_eph = min_ec_agg_eph; eph_ldr->cte_current_stable_eph = min_stable_eph; - if (pool->sp_rebuilding) + if (atomic_load(&pool->sp_rebuilding)) break; } ABT_mutex_unlock(svc->cs_cont_ephs_mutex); diff --git a/src/container/srv_target.c b/src/container/srv_target.c index a382f46e7b4..d3bc9710632 100644 --- a/src/container/srv_target.c +++ b/src/container/srv_target.c @@ -188,8 +188,8 @@ cont_aggregate_runnable(struct ds_cont_child *cont, struct sched_request *req, if (ds_pool_is_rebuilding(pool) && !vos_agg) { D_DEBUG(DB_EPC, DF_CONT ": skip EC aggregation during rebuild %d, %d.\n", - DP_CONT(cont->sc_pool->spc_uuid, cont->sc_uuid), pool->sp_rebuilding, - pool->sp_rebuild_scan); + DP_CONT(cont->sc_pool->spc_uuid, cont->sc_uuid), + atomic_load(&pool->sp_rebuilding), pool->sp_rebuild_scan); return false; } diff --git a/src/include/daos_srv/pool.h b/src/include/daos_srv/pool.h index a08c3e702c6..e27f0bd89a4 100644 --- a/src/include/daos_srv/pool.h +++ b/src/include/daos_srv/pool.h @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -92,7 +93,7 @@ struct ds_pool { * rebuild job. */ uint32_t sp_rebuild_gen; - int sp_rebuilding; + ATOMIC int sp_rebuilding; /** * someone has already messaged this pool to for rebuild scan, * NB: all xstreams can do lockless-write on it but it's OK @@ -217,7 +218,7 @@ struct ds_pool_svc_op_val { static inline bool ds_pool_is_rebuilding(struct ds_pool *pool) { - return (pool->sp_rebuilding > 0 || pool->sp_rebuild_scan > 0); + return (atomic_load(&pool->sp_rebuilding) > 0 || pool->sp_rebuild_scan > 0); } /* encode metadata RPC operation key: HLC time first, in network order, for keys sorted by time. diff --git a/src/object/srv_ec_aggregate.c b/src/object/srv_ec_aggregate.c index 982969e17c3..96abd078284 100644 --- a/src/object/srv_ec_aggregate.c +++ b/src/object/srv_ec_aggregate.c @@ -2288,9 +2288,9 @@ ec_aggregate_yield(struct ec_agg_param *agg_param) int rc; if (ds_pool_is_rebuilding(agg_param->ap_pool_info.api_pool)) { - D_INFO(DF_UUID": abort ec aggregation, sp_rebuilding %d\n", + D_INFO(DF_UUID ": abort ec aggregation, sp_rebuilding %d\n", DP_UUID(agg_param->ap_pool_info.api_pool->sp_uuid), - agg_param->ap_pool_info.api_pool->sp_rebuilding); + atomic_load(&agg_param->ap_pool_info.api_pool->sp_rebuilding)); return true; } @@ -2501,10 +2501,10 @@ agg_iterate_pre_cb(daos_handle_t ih, vos_iter_entry_t *entry, * (see obj_inflight_io_check()). */ if (ds_pool_is_rebuilding(agg_param->ap_pool_info.api_pool)) { - D_INFO(DF_CONT" abort as rebuild started, sp_rebuilding %d\n", - DP_CONT(agg_param->ap_pool_info.api_pool_uuid, - agg_param->ap_pool_info.api_cont_uuid), - agg_param->ap_pool_info.api_pool->sp_rebuilding); + D_INFO(DF_CONT " abort as rebuild started, sp_rebuilding %d\n", + DP_CONT(agg_param->ap_pool_info.api_pool_uuid, + agg_param->ap_pool_info.api_cont_uuid), + atomic_load(&agg_param->ap_pool_info.api_pool->sp_rebuilding)); return -1; } @@ -2529,9 +2529,9 @@ agg_iterate_pre_cb(daos_handle_t ih, vos_iter_entry_t *entry, } if (rc < 0) { - D_ERROR(DF_UUID" EC aggregation (rebuilding %d) failed: "DF_RC"\n", + D_ERROR(DF_UUID " EC aggregation (rebuilding %d) failed: " DF_RC "\n", DP_UUID(agg_param->ap_pool_info.api_pool->sp_uuid), - agg_param->ap_pool_info.api_pool->sp_rebuilding, DP_RC(rc)); + atomic_load(&agg_param->ap_pool_info.api_pool->sp_rebuilding), DP_RC(rc)); return rc; } diff --git a/src/object/srv_obj.c b/src/object/srv_obj.c index 258cc1f3fe8..ef6c5a26830 100644 --- a/src/object/srv_obj.c +++ b/src/object/srv_obj.c @@ -2424,9 +2424,9 @@ obj_inflight_io_check(struct ds_cont_child *child, uint32_t opc, if (opc == DAOS_OBJ_RPC_ENUMERATE && flags & ORF_FOR_MIGRATION) { /* EC aggregation is still inflight, rebuild should wait until it's paused */ if (ds_cont_child_ec_aggregating(child)) { - D_ERROR(DF_CONT" ec aggregate still active, rebuilding %d\n", + D_ERROR(DF_CONT " ec aggregate still active, rebuilding %d\n", DP_CONT(child->sc_pool->spc_uuid, child->sc_uuid), - child->sc_pool->spc_pool->sp_rebuilding); + atomic_load(&child->sc_pool->spc_pool->sp_rebuilding)); return -DER_UPDATE_AGAIN; } } @@ -2434,7 +2434,7 @@ obj_inflight_io_check(struct ds_cont_child *child, uint32_t opc, if (!obj_is_modification_opc(opc) && (opc != DAOS_OBJ_RPC_CPD || flags & ORF_CPD_RDONLY)) return 0; - if (child->sc_pool->spc_pool->sp_rebuilding) { + if (atomic_load(&child->sc_pool->spc_pool->sp_rebuilding)) { uint32_t version; ds_rebuild_running_query(child->sc_pool_uuid, RB_OP_REBUILD, diff --git a/src/object/srv_obj_migrate.c b/src/object/srv_obj_migrate.c index a5200b9d9b2..4c6a801962b 100644 --- a/src/object/srv_obj_migrate.c +++ b/src/object/srv_obj_migrate.c @@ -640,6 +640,17 @@ mrone_obj_fetch_internal(struct migrate_one *mrone, daos_handle_t oh, d_sg_list_ return rc; } +static inline int +migrate_pool_tls_get_status(struct migrate_pool_tls *tls) +{ + if (tls && tls->mpt_status) + return tls->mpt_status; + if (tls == NULL || tls->mpt_fini) + return -DER_SHUTDOWN; + + return 0; +} + static int mrone_obj_fetch(struct migrate_one *mrone, daos_handle_t oh, d_sg_list_t *sgls, daos_iod_t *iods, int iod_num, daos_epoch_t eph, uint32_t flags, @@ -652,7 +663,7 @@ mrone_obj_fetch(struct migrate_one *mrone, daos_handle_t oh, d_sg_list_t *sgls, mrone->mo_pool_tls_version, mrone->mo_generation); if (tls == NULL || tls->mpt_fini) { D_WARN("someone aborted the rebuild " DF_UUID "\n", DP_UUID(mrone->mo_pool_uuid)); - D_GOTO(out, rc = -DER_SHUTDOWN); + D_GOTO(out, rc = migrate_pool_tls_get_status(tls)); } if (daos_oclass_grp_size(&mrone->mo_oca) > 1) @@ -1796,7 +1807,7 @@ migrate_tgt_enter(struct migrate_pool_tls *tls, int ult_type, bool *yielded) ABT_cond_wait(tls->mpt_inflight_cond, tls->mpt_inflight_mutex); ABT_mutex_unlock(tls->mpt_inflight_mutex); if (tls->mpt_fini) - D_GOTO(out, rc = -DER_SHUTDOWN); + D_GOTO(out, rc = migrate_pool_tls_get_status(tls)); ult_cnt = migrate_tgt_ult_cnt(tls, ult_type); } @@ -2447,8 +2458,7 @@ migrate_one_create(struct enum_unpack_arg *arg, struct dc_obj_enum_unpack_io *io migrate_one_destroy(mrone); } put: - if (tls) - migrate_pool_tls_put(tls); + migrate_pool_tls_put(tls); return rc; } @@ -2604,8 +2614,7 @@ migrate_enum_unpack_cb(struct dc_obj_enum_unpack_io *io, void *data) put: if (obj) obj_decref(obj); - if (tls != NULL) - migrate_pool_tls_put(tls); + migrate_pool_tls_put(tls); return rc; } @@ -2692,8 +2701,7 @@ migrate_start_ult(struct enum_unpack_arg *unpack_arg) } put: - if (tls) - migrate_pool_tls_put(tls); + migrate_pool_tls_put(tls); return rc; } @@ -3044,8 +3052,8 @@ ds_migrate_stop(struct ds_pool *pool, unsigned int version, unsigned int generat if (rc) D_ERROR(DF_UUID" migrate stop: %d\n", DP_UUID(pool->sp_uuid), rc); - D_ASSERT(pool->sp_rebuilding >= arg.stop_count); - pool->sp_rebuilding -= arg.stop_count; + D_ASSERT(atomic_load(&pool->sp_rebuilding) >= arg.stop_count); + atomic_fetch_sub(&pool->sp_rebuilding, arg.stop_count); ABT_mutex_free(&arg.stop_lock); D_INFO(DF_UUID" migrate stopped\n", DP_UUID(pool->sp_uuid)); @@ -3430,7 +3438,8 @@ migrate_cont_iter_cb(daos_handle_t ih, d_iov_t *key_iov, uuid_copy(fetch_arg.cont_uuid, cont_uuid); uuid_copy(fetch_arg.pool_uuid, tls->mpt_pool_uuid); - rc = dss_ult_execute(cont_fetch_start_ult, &fetch_arg, NULL, NULL, DSS_XS_SYS, 0, 0); + rc = dss_ult_execute(cont_fetch_start_ult, &fetch_arg, NULL, NULL, DSS_XS_SYS, 0, + MIGRATE_STACK_SIZE); if (rc) { DL_ERROR(rc, DF_RB ": ds_pool_lookup failed", DP_RB_MPT(tls)); if (rc == -DER_SHUTDOWN) @@ -3615,16 +3624,15 @@ migrate_try_obj_insert(struct migrate_pool_tls *tls, uuid_t co_uuid, } struct ds_pool_migrate_arg { - uuid_t pool_uuid; - struct ds_pool *pool; - uint32_t rebuild_ver; - uint32_t generation; - daos_prop_t *prop; - int tgt_id; - uint8_t tgt_status; - uint32_t tgt_in_ver; - int rebuilding_count; - bool no_iv; + uuid_t pma_pool_uuid; + struct ds_pool *pma_pool; + uint32_t pma_rebuild_ver; + uint32_t pma_generation; + daos_prop_t *pma_prop; + int pma_tgt_id; + uint8_t pma_tgt_status; + uint32_t pma_tgt_in_ver; + bool pma_no_iv; }; static int @@ -3632,12 +3640,9 @@ ds_migrate_end_ult(void *arg) { struct ds_pool_migrate_arg *pool_arg = (struct ds_pool_migrate_arg *)arg; - if (pool_arg->pool) { - pool_arg->pool->sp_rebuilding += pool_arg->rebuilding_count; - ds_pool_put(pool_arg->pool); - } - if (pool_arg->prop) - daos_prop_free(pool_arg->prop); + ds_pool_put(pool_arg->pma_pool); + if (pool_arg->pma_prop) + daos_prop_free(pool_arg->pma_prop); return 0; } @@ -3649,42 +3654,42 @@ ds_migrate_prepare_ult(void *arg) struct ds_pool_migrate_arg *pool_arg = (struct ds_pool_migrate_arg *)arg; struct pool_target *tgts; - rc = ds_pool_lookup(pool_arg->pool_uuid, &pool_arg->pool); + rc = ds_pool_lookup(pool_arg->pma_pool_uuid, &pool_arg->pma_pool); if (rc != 0) { if (rc == -DER_SHUTDOWN) { D_DEBUG(DB_REBUILD, DF_UUID " pool service is stopping.\n", - DP_UUID(pool_arg->pool_uuid)); + DP_UUID(pool_arg->pma_pool_uuid)); rc = 0; } else { D_DEBUG(DB_REBUILD, DF_UUID " pool service is not started yet. " DF_RC "\n", - DP_UUID(pool_arg->pool_uuid), DP_RC(rc)); + DP_UUID(pool_arg->pma_pool_uuid), DP_RC(rc)); rc = -DER_AGAIN; } return rc; } - ds_rebuild_running_query(pool_arg->pool_uuid, -1, &rebuild_ver, NULL, NULL); - if (rebuild_ver == 0 || rebuild_ver != pool_arg->rebuild_ver) { + ds_rebuild_running_query(pool_arg->pma_pool_uuid, -1, &rebuild_ver, NULL, NULL); + if (rebuild_ver == 0 || rebuild_ver != pool_arg->pma_rebuild_ver) { rc = -DER_SHUTDOWN; D_GOTO(out, rc); } - if (pool_arg->no_iv) + if (pool_arg->pma_no_iv) D_GOTO(out, rc = 0); - D_ALLOC_PTR(pool_arg->prop); - if (pool_arg->prop == NULL) + D_ALLOC_PTR(pool_arg->pma_prop); + if (pool_arg->pma_prop == NULL) D_GOTO(out, rc = -DER_NOMEM); - rc = ds_pool_iv_prop_fetch(pool_arg->pool, pool_arg->prop); + rc = ds_pool_iv_prop_fetch(pool_arg->pma_pool, pool_arg->pma_prop); if (rc) D_GOTO(out, rc); - pool_arg->pool->sp_rebuilding++; - rc = pool_map_find_target_by_rank_idx(pool_arg->pool->sp_map, dss_self_rank(), -1, &tgts); + rc = pool_map_find_target_by_rank_idx(pool_arg->pma_pool->sp_map, dss_self_rank(), -1, + &tgts); D_ASSERT(rc == dss_tgt_nr); - pool_arg->tgt_status = tgts[pool_arg->tgt_id].ta_comp.co_status; - pool_arg->tgt_in_ver = tgts[pool_arg->tgt_id].ta_comp.co_in_ver; + pool_arg->pma_tgt_status = tgts[pool_arg->pma_tgt_id].ta_comp.co_status; + pool_arg->pma_tgt_in_ver = tgts[pool_arg->pma_tgt_id].ta_comp.co_in_ver; rc = 0; out: return rc; @@ -3706,38 +3711,40 @@ ds_migrate_object(uuid_t pool_uuid, uuid_t po_hdl, uuid_t co_hdl, uuid_t co_uuid tls = migrate_pool_tls_lookup(pool_uuid, version, generation); if (tls) - arg.no_iv = true; - - uuid_copy(arg.pool_uuid, pool_uuid); - arg.rebuild_ver = version; - arg.tgt_id = tgt_id; - arg.generation = generation; - rc = dss_ult_execute(ds_migrate_prepare_ult, &arg, NULL, NULL, DSS_XS_SYS, 0, 0); - if (rc || arg.pool == NULL) + arg.pma_no_iv = true; + + uuid_copy(arg.pma_pool_uuid, pool_uuid); + arg.pma_rebuild_ver = version; + arg.pma_tgt_id = tgt_id; + arg.pma_generation = generation; + rc = dss_ult_execute(ds_migrate_prepare_ult, &arg, NULL, NULL, DSS_XS_SYS, 0, + MIGRATE_STACK_SIZE); + if (rc || arg.pma_pool == NULL) D_GOTO(out, rc); if (tls) goto skip_create; - entry = daos_prop_entry_get(arg.prop, DAOS_PROP_PO_SVC_LIST); + entry = daos_prop_entry_get(arg.pma_prop, DAOS_PROP_PO_SVC_LIST); D_ASSERT(entry != NULL); svc_list = (d_rank_list_t *)entry->dpe_val_ptr; /* prepare might yield */ tls = migrate_pool_tls_lookup(pool_uuid, version, generation); if (tls) { - arg.rebuilding_count = -1; goto skip_create; } + atomic_fetch_add(&arg.pma_pool->sp_rebuilding, 1); + rc = migrate_pool_tls_create(pool_uuid, version, generation, po_hdl, co_hdl, max_eph, - new_layout_ver, opc, &tls, svc_list, arg.tgt_status, - arg.tgt_in_ver); + new_layout_ver, opc, &tls, svc_list, arg.pma_tgt_status, + arg.pma_tgt_in_ver); if (rc != 0) D_GOTO(out, rc); skip_create: if (tls->mpt_fini) - D_GOTO(out, rc = -DER_SHUTDOWN); + D_GOTO(out, rc = migrate_pool_tls_get_status(tls)); rc = migrate_try_create_object_tree(tls); if (rc) @@ -3777,9 +3784,8 @@ ds_migrate_object(uuid_t pool_uuid, uuid_t po_hdl, uuid_t co_hdl, uuid_t co_uuid } out: - if (tls) - migrate_pool_tls_put(tls); - if (arg.pool) + migrate_pool_tls_put(tls); + if (arg.pma_pool) D_ASSERT(dss_ult_execute(ds_migrate_end_ult, &arg, NULL, NULL, DSS_XS_SYS, 0, 0) == 0); return rc; diff --git a/src/rebuild/scan.c b/src/rebuild/scan.c index a13826fd91e..e38e9d73e00 100644 --- a/src/rebuild/scan.c +++ b/src/rebuild/scan.c @@ -896,11 +896,12 @@ rebuild_container_scan_cb(daos_handle_t ih, vos_iter_entry_t *entry, while (cont_child->sc_ec_agg_active && rpt->rt_rebuild_op != RB_OP_RECLAIM && rpt->rt_rebuild_op != RB_OP_FAIL_RECLAIM) { - D_ASSERTF(rpt->rt_pool->sp_rebuilding >= 0, DF_UUID" rebuilding %d\n", - DP_UUID(rpt->rt_pool_uuid), rpt->rt_pool->sp_rebuilding); + D_ASSERTF(atomic_load(&rpt->rt_pool->sp_rebuilding) >= 0, + DF_UUID " rebuilding %d\n", DP_UUID(rpt->rt_pool_uuid), + atomic_load(&rpt->rt_pool->sp_rebuilding)); /* Wait for EC aggregation to abort before discard the object */ D_INFO(DF_RB " " DF_UUID " wait for ec agg abort, rebuilding %d.\n", DP_RB_RPT(rpt), - DP_UUID(entry->ie_couuid), rpt->rt_pool->sp_rebuilding); + DP_UUID(entry->ie_couuid), atomic_load(&rpt->rt_pool->sp_rebuilding)); dss_sleep(1000); if (rpt->rt_abort || rpt->rt_finishing) { D_DEBUG(DB_REBUILD, DF_RB " " DF_UUID " rebuild abort %u/%u.\n", @@ -1263,7 +1264,7 @@ rebuild_tgt_scan_handler(crt_rpc_t *rpc) D_GOTO(out, rc); } - rpt->rt_pool->sp_rebuilding++; /* reset in rebuild_tgt_fini */ + atomic_fetch_add(&rpt->rt_pool->sp_rebuilding, 1); /* reset in rebuild_tgt_fini */ rpt_get(rpt); /* step-3: start scan leader */ diff --git a/src/rebuild/srv.c b/src/rebuild/srv.c index 373db1dbb4d..3bde950d5bf 100644 --- a/src/rebuild/srv.c +++ b/src/rebuild/srv.c @@ -2732,8 +2732,8 @@ rebuild_tgt_fini(struct rebuild_tgt_pool_tracker *rpt) D_INFO(DF_RB " finishing rebuild rpt refcount %u, pool refcount %u\n", DP_RB_RPT(rpt), rpt->rt_refcount, daos_lru_ref_count(&rpt->rt_pool->sp_entry)); - D_ASSERT(rpt->rt_pool->sp_rebuilding > 0); - rpt->rt_pool->sp_rebuilding--; + D_ASSERT(atomic_load(&rpt->rt_pool->sp_rebuilding) > 0); + atomic_fetch_sub(&rpt->rt_pool->sp_rebuilding, 1); rpt->rt_pool->sp_rebuild_scan = 0; ABT_mutex_lock(rpt->rt_lock); From 0c7e3d8a692a68102f5ae5d18a79a4f4b54198d4 Mon Sep 17 00:00:00 2001 From: Cedric Koch-Hofer <94527853+knard38@users.noreply.github.com> Date: Tue, 2 Dec 2025 17:03:55 +0100 Subject: [PATCH 048/253] DAOS-18282 build: Allow cmocka test filtering (#17182) Add new CMOCKA_FILTER_SUPPORTED SCons variable to enable/disable cmocka filtering. Signed-off-by: Cedric Koch-Hofer --- site_scons/prereq_tools/base.py | 1 + site_scons/site_tools/compiler_setup.py | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/site_scons/prereq_tools/base.py b/site_scons/prereq_tools/base.py index 00d00d2b1fb..a1cd84fab2a 100644 --- a/site_scons/prereq_tools/base.py +++ b/site_scons/prereq_tools/base.py @@ -518,6 +518,7 @@ def __init__(self, env, opts): opts.Add(EnumVariable('WARNING_LEVEL', "Set default warning level", 'error', ['warning', 'warn', 'error'], ignorecase=2)) opts.Add(('SANITIZERS', 'Instrument C code with google sanitizers', None)) + opts.Add(BoolVariable('CMOCKA_FILTER_SUPPORTED', 'Allows to filter cmocka tests', False)) opts.Update(self.__env) diff --git a/site_scons/site_tools/compiler_setup.py b/site_scons/site_tools/compiler_setup.py index 51909b8cfe6..08e97f62d1f 100644 --- a/site_scons/site_tools/compiler_setup.py +++ b/site_scons/site_tools/compiler_setup.py @@ -106,7 +106,10 @@ def _base_setup(env): env.AppendUnique(CPPDEFINES={'FAULT_INJECTION': '1'}) env.AppendUnique(CPPDEFINES={'BUILD_PIPELINE': '1'}) - env.AppendUnique(CPPDEFINES={'CMOCKA_FILTER_SUPPORTED': '0'}) + if env['CMOCKA_FILTER_SUPPORTED']: + env.AppendUnique(CPPDEFINES={'CMOCKA_FILTER_SUPPORTED': '1'}) + else: + env.AppendUnique(CPPDEFINES={'CMOCKA_FILTER_SUPPORTED': '0'}) env.AppendUnique(CPPDEFINES='_GNU_SOURCE') From 17017ae201f48914a3b807cacab6241e35acd35f Mon Sep 17 00:00:00 2001 From: Ken Cain Date: Tue, 2 Dec 2025 11:08:18 -0500 Subject: [PATCH 049/253] DAOS-17358 test: more daos_test interactive rebuild cases (#17025) common: - add interactive rebuild stop|start into reintegrate_inflight_io() for use by existind, and new interactive rebuild stop|start tests. - change semantics of rebuild_pool_erroring() function, immediately return upon seeing nonzero rs_errno in rebuild state. - new functions rebuild_resume_wait_to_start() and test_rebuild_wait_to_start_after_ver() - warn not assert on first rebuild state check in rebuild_resume_wait() - T_BEGIN() and T_END() macros to add easy __FUNCTION__ identifying printing into all test cases, for faster diagnosis of future failing tests. new daos_rebuild_interactive test. - selected cases from rebuild_simple, drain_simple, extend_simple, inserting rebuild stop|start explicit/interactive control of running rebuilds. - added to suite.py and suite.yaml rebuild_simple - remove --rebuild_interactive from all cases, factor out selected cases into the new daos_rebuild_interactive test. Remove corresponding information from suite.py and suite.yaml. drain_simple - remove --rebuild_interactive from all cases, factor out selected cases into common code and the new daos_rebuild_interactive test. Common code in daos_drain_common.c Remove corresponding information from suite.py and suite.yaml. extend_simple - remove --rebuild_interactive from all cases, factor out selected cases into common code and a new daos_rebuild_interactive test. Common code in daos_extend_common.c Remove corresponding information from suite.py and suite.yaml. Signed-off-by: Kenneth Cain --- src/rebuild/srv.c | 7 +- src/tests/ftest/daos_test/suite.py | 28 +- src/tests/ftest/daos_test/suite.yaml | 12 +- src/tests/suite/SConscript | 19 +- src/tests/suite/daos_drain_common.c | 227 +++++++ src/tests/suite/daos_drain_simple.c | 312 +++------- src/tests/suite/daos_extend_common.c | 223 +++++++ src/tests/suite/daos_extend_simple.c | 539 ++++++---------- src/tests/suite/daos_rebuild_common.c | 91 ++- src/tests/suite/daos_rebuild_interactive.c | 682 +++++++++++++++++++++ src/tests/suite/daos_rebuild_simple.c | 333 +++------- src/tests/suite/daos_test.c | 17 +- src/tests/suite/daos_test.h | 91 ++- src/tests/suite/daos_test_common.c | 42 +- 14 files changed, 1683 insertions(+), 940 deletions(-) create mode 100644 src/tests/suite/daos_drain_common.c create mode 100644 src/tests/suite/daos_extend_common.c create mode 100644 src/tests/suite/daos_rebuild_interactive.c diff --git a/src/rebuild/srv.c b/src/rebuild/srv.c index 3bde950d5bf..88e6b53c851 100644 --- a/src/rebuild/srv.c +++ b/src/rebuild/srv.c @@ -565,6 +565,10 @@ rebuild_status_completed_update_partial(const uuid_t pool_uuid, int32_t rs_state rs_inlist = rebuild_status_completed_lookup(pool_uuid); if (rs_inlist != NULL) { + /* possible enhancement: only overwrite rs_inlist->rs_errno if rs_errno != 0 + * e.g., if marking a failed rebuild as done after Fail_reclaim, keep original + * rs_errno. + */ rs_inlist->rs_errno = rs_errno; rs_inlist->rs_state = rs_state; return 0; @@ -1901,8 +1905,7 @@ rebuild_task_complete_schedule(struct rebuild_task *task, struct ds_pool *pool, DP_RC(rgt->rgt_status.rs_errno)); } else if ((task->dst_rebuild_op == RB_OP_FAIL_RECLAIM) && (task->dst_retry_rebuild_op != RB_OP_NONE)) { - /* Fail_reclaim done (and a stop command wasn't received during) - retry original - * rebuild */ + /* Fail_reclaim done (and a stop command wasn't received during) - retry rebuild. */ rc1 = ds_rebuild_schedule(pool, task->dst_retry_map_ver, rgt->rgt_reclaim_epoch, task->dst_new_layout_version, &task->dst_tgts, task->dst_retry_rebuild_op, diff --git a/src/tests/ftest/daos_test/suite.py b/src/tests/ftest/daos_test/suite.py index 3c31ef9ec7e..3be9b5afe7b 100644 --- a/src/tests/ftest/daos_test/suite.py +++ b/src/tests/ftest/daos_test/suite.py @@ -302,51 +302,51 @@ def test_daos_rebuild_simple(self): """ self.run_subtest() - def test_daos_rebuild_simple_interactive(self): - """Jira ID: DAOS-17354 + def test_daos_drain_simple(self): + """Jira ID: DAOS-1568 Test Description: - Run daos_test -v --rebuild_interactive + Run daos_test -b Use cases: Core tests for daos_test - :avocado: tags=all,full_regression + :avocado: tags=all,pr,daily_regression :avocado: tags=hw,medium,provider :avocado: tags=daos_test,daos_core_test,rebuild - :avocado: tags=DaosCoreTest,test_daos_rebuild_simple_interactive + :avocado: tags=DaosCoreTest,test_daos_drain_simple """ self.run_subtest() - def test_daos_drain_simple(self): + def test_daos_extend_simple(self): """Jira ID: DAOS-1568 Test Description: - Run daos_test -b + Run daos_test -B Use cases: Core tests for daos_test :avocado: tags=all,pr,daily_regression :avocado: tags=hw,medium,provider - :avocado: tags=daos_test,daos_core_test - :avocado: tags=DaosCoreTest,test_daos_drain_simple + :avocado: tags=daos_test,daos_core_test,rebuild + :avocado: tags=DaosCoreTest,test_daos_extend_simple """ self.run_subtest() - def test_daos_extend_simple(self): - """Jira ID: DAOS-1568 + def test_daos_rebuild_interactive(self): + """Jira ID: DAOS-17358 Test Description: - Run daos_test -B + Run daos_test -j Use cases: Core tests for daos_test :avocado: tags=all,pr,daily_regression :avocado: tags=hw,medium,provider - :avocado: tags=daos_test,daos_core_test - :avocado: tags=DaosCoreTest,test_daos_extend_simple + :avocado: tags=daos_test,daos_core_test,rebuild + :avocado: tags=DaosCoreTest,test_daos_rebuild_interactive """ self.run_subtest() diff --git a/src/tests/ftest/daos_test/suite.yaml b/src/tests/ftest/daos_test/suite.yaml index 5761ef1de1e..23e8bf7297b 100644 --- a/src/tests/ftest/daos_test/suite.yaml +++ b/src/tests/ftest/daos_test/suite.yaml @@ -23,9 +23,9 @@ timeouts: test_daos_epoch_recovery: 104 test_daos_md_replication: 104 test_daos_rebuild_simple: 1800 - test_daos_rebuild_simple_interactive: 2100 - test_daos_drain_simple: 3600 + test_daos_drain_simple: 3720 test_daos_extend_simple: 3600 + test_daos_rebuild_interactive: 1020 test_daos_oid_allocator: 640 test_daos_checksum: 500 test_daos_rebuild_ec: 9000 @@ -101,9 +101,9 @@ daos_tests: test_daos_epoch_recovery: 1 test_daos_md_replication: 2 test_daos_rebuild_simple: 1 - test_daos_rebuild_simple_interactive: 1 test_daos_drain_simple: 1 test_daos_extend_simple: 1 + test_daos_rebuild_interactive: 1 test_daos_oid_allocator: 1 test_daos_checksum: 1 test_daos_rebuild_ec: 1 @@ -131,7 +131,6 @@ daos_tests: test_daos_epoch_recovery: DAOS_Epoch_Recovery test_daos_md_replication: DAOS_MD_Replication test_daos_rebuild_simple: DAOS_Rebuild_Simple - test_daos_rebuild_simple_interactive: DAOS_Rebuild_Simple_Interactive test_daos_drain_simple: DAOS_Drain_Simple test_daos_oid_allocator: DAOS_OID_Allocator test_daos_checksum: DAOS_Checksum @@ -140,6 +139,7 @@ daos_tests: test_daos_degraded_ec: DAOS_Degraded_EC test_daos_dedup: DAOS_Dedup test_daos_extend_simple: DAOS_Extend_Simple + test_daos_rebuild_interactive: DAOS_Rebuild_Interactive test_daos_upgrade: DAOS_Upgrade test_daos_pipeline: DAOS_Pipeline daos_test: @@ -161,9 +161,9 @@ daos_tests: test_daos_epoch_recovery: o test_daos_md_replication: R test_daos_rebuild_simple: v - test_daos_rebuild_simple_interactive: v test_daos_drain_simple: b test_daos_extend_simple: B + test_daos_rebuild_interactive: j test_daos_oid_allocator: O test_daos_checksum: z test_daos_rebuild_ec: S @@ -178,9 +178,9 @@ daos_tests: test_daos_md_replication: -s5 test_daos_degraded_mode: -s7 test_daos_rebuild_simple: -s3 - test_daos_rebuild_simple_interactive: -s3 --rebuild_interactive test_daos_drain_simple: -s3 test_daos_extend_simple: -s3 + test_daos_rebuild_interactive: -s3 test_daos_oid_allocator: -s5 stopped_ranks: test_daos_degraded_mode: [5, 6, 7] diff --git a/src/tests/suite/SConscript b/src/tests/suite/SConscript index ab3a3fee0e6..e780a0ae7cd 100644 --- a/src/tests/suite/SConscript +++ b/src/tests/suite/SConscript @@ -33,16 +33,15 @@ def scons(): newenv = denv.Clone() - c_files = Split("""daos_array.c daos_base_tx.c daos_capa.c daos_checksum.c - daos_container.c daos_dedup.c daos_degraded.c - daos_dist_tx.c daos_drain_simple.c daos_epoch.c - daos_epoch_io.c daos_epoch_recovery.c daos_kv.c - daos_md_replication.c daos_mgmt.c daos_nvme_recovery.c - daos_obj_array.c daos_obj.c daos_oid_alloc.c daos_pool.c - daos_rebuild.c daos_rebuild_common.c daos_rebuild_ec.c - daos_rebuild_simple.c daos_test.c daos_verify_consistency.c - daos_aggregate_ec.c daos_degrade_ec.c daos_cr.c daos_inc_reint.c - daos_extend_simple.c daos_obj_ec.c daos_upgrade.c daos_pipeline.c""") + c_files = Split("""daos_aggregate_ec.c daos_array.c daos_base_tx.c daos_capa.c daos_checksum.c + daos_container.c daos_cr.c daos_dedup.c daos_degraded.c daos_degrade_ec.c + daos_dist_tx.c daos_drain_common.c daos_drain_simple.c daos_epoch.c + daos_epoch_io.c daos_epoch_recovery.c daos_extend_common.c + daos_extend_simple.c daos_inc_reint.c daos_kv.c daos_md_replication.c + daos_mgmt.c daos_nvme_recovery.c daos_obj.c daos_obj_array.c daos_obj_ec.c + daos_oid_alloc.c daos_pipeline.c daos_pool.c daos_rebuild.c + daos_rebuild_common.c daos_rebuild_ec.c daos_rebuild_interactive.c + daos_rebuild_simple.c daos_test.c daos_upgrade.c daos_verify_consistency.c""") daostest = newenv.d_program('daos_test', c_files + daos_test_tgt, LIBS=['daos_common'] + libraries) diff --git a/src/tests/suite/daos_drain_common.c b/src/tests/suite/daos_drain_common.c new file mode 100644 index 00000000000..b1d317a1a0e --- /dev/null +++ b/src/tests/suite/daos_drain_common.c @@ -0,0 +1,227 @@ +/** + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ +/** + * This file is for common functions used between daos_drain_simple.c and + * daos_rebuild_interactive.c tests. + * + * tests/suite/daos_drain_common.c + * + */ +#define D_LOGFAC DD_FAC(tests) + +#include "daos_test.h" +#include "daos_iotest.h" +#include "dfs_test.h" +#include +#include +#include + +/* clang-format off */ +const char *extend_drain_opstrs[] = { + "EXTEND_DRAIN_PUNCH", + "EXTEND_DRAIN_STAT", + "EXTEND_DRAIN_ENUMERATE", + "EXTEND_DRAIN_FETCH", + "EXTEND_DRAIN_UPDATE", + "EXTEND_DRAIN_OVERWRITE", + "EXTEND_DRAIN_WRITELOOP" +}; +/* clang-format on */ + +void +extend_drain_read_check(dfs_t *dfs_mt, dfs_obj_t *dir, uint32_t objclass, uint32_t objcnt, + daos_size_t total_size, char start_char) +{ + char *buf = NULL; + char *verify_buf = NULL; + daos_size_t buf_size = 512 * 1024; + d_sg_list_t sgl; + d_iov_t iov; + d_iov_t verify_iov; + int i; + + buf = malloc(buf_size); + verify_buf = malloc(buf_size); + assert_non_null(buf); + assert_non_null(verify_buf); + d_iov_set(&iov, buf, buf_size); + d_iov_set(&verify_iov, buf, buf_size); + sgl.sg_nr = 1; + sgl.sg_iovs = &iov; + + for (i = 0; i < objcnt; i++) { + char filename[32]; + daos_size_t read_size = buf_size; + dfs_obj_t *obj; + daos_off_t offset = 0; + daos_size_t total = total_size; + int rc; + + sprintf(filename, "file%d", i); + rc = dfs_open(dfs_mt, dir, filename, S_IFREG | S_IWUSR | S_IRUSR, O_RDWR, objclass, + 1048576, NULL, &obj); + assert_int_equal(rc, 0); + + memset(verify_buf, start_char + i, buf_size); + + while (total > 0) { + memset(buf, 0, buf_size); + rc = dfs_read(dfs_mt, obj, &sgl, offset, &read_size, NULL); + assert_int_equal(rc, 0); + assert_memory_equal(buf, verify_buf, read_size); + offset += read_size; + total -= read_size; + } + + rc = dfs_release(obj); + assert_int_equal(rc, 0); + } + free(buf); + free(verify_buf); +} + +void +extend_drain_write(dfs_t *dfs_mt, dfs_obj_t *dir, uint32_t objclass, uint32_t objcnt, + daos_size_t total_size, char write_char, daos_obj_id_t *oids) +{ + char *buf = NULL; + daos_size_t buf_size = 512 * 1024; + d_sg_list_t sgl; + d_iov_t iov; + int i; + + buf = malloc(buf_size); + assert_non_null(buf); + d_iov_set(&iov, buf, buf_size); + sgl.sg_nr = 1; + sgl.sg_iovs = &iov; + + for (i = 0; i < objcnt; i++) { + char filename[32]; + dfs_obj_t *obj; + daos_size_t total = total_size; + daos_off_t offset = 0; + int rc; + + sprintf(filename, "file%d", i); + rc = dfs_open(dfs_mt, dir, filename, S_IFREG | S_IWUSR | S_IRUSR, O_RDWR | O_CREAT, + OC_EC_2P1GX, 1048576, NULL, &obj); + assert_int_equal(rc, 0); + if (oids != NULL) + dfs_obj2id(obj, &oids[i]); + + memset(buf, write_char + i, buf_size); + while (total > 0) { + rc = dfs_write(dfs_mt, obj, &sgl, offset, NULL); + assert_int_equal(rc, 0); + offset += buf_size; + total -= buf_size; + } + rc = dfs_release(obj); + assert_int_equal(rc, 0); + } + free(buf); +} + +void +extend_drain_check(dfs_t *dfs_mt, dfs_obj_t *dir, int objclass, int opc) +{ + switch (opc) { + case EXTEND_DRAIN_PUNCH: + break; + case EXTEND_DRAIN_OVERWRITE: + extend_drain_read_check(dfs_mt, dir, objclass, EXTEND_DRAIN_OBJ_NR, WRITE_SIZE, + 'b'); + break; + case EXTEND_DRAIN_WRITELOOP: + extend_drain_read_check(dfs_mt, dir, objclass, 1, 512 * 1048576, 'a'); + break; + default: + extend_drain_read_check(dfs_mt, dir, objclass, EXTEND_DRAIN_OBJ_NR, WRITE_SIZE, + 'a'); + break; + } +} + +void +dfs_extend_drain_common(void **state, int opc, uint32_t objclass, + test_rebuild_cb_t extend_drain_cb_fn) +{ + test_arg_t *arg = *state; + dfs_t *dfs_mt; + daos_handle_t co_hdl; + dfs_obj_t *dir; + uuid_t co_uuid; + char str[37]; + daos_obj_id_t oids[EXTEND_DRAIN_OBJ_NR]; + struct extend_drain_cb_arg cb_arg; + dfs_attr_t attr = {}; + int rc; + + FAULT_INJECTION_REQUIRED(); + + if (!test_runable(arg, 4)) + return; + + attr.da_props = daos_prop_alloc(2); + assert_non_null(attr.da_props); + attr.da_props->dpp_entries[0].dpe_type = DAOS_PROP_CO_REDUN_LVL; + attr.da_props->dpp_entries[0].dpe_val = DAOS_PROP_CO_REDUN_RANK; + attr.da_props->dpp_entries[1].dpe_type = DAOS_PROP_CO_REDUN_FAC; + attr.da_props->dpp_entries[1].dpe_val = DAOS_PROP_CO_REDUN_RF1; + rc = dfs_cont_create(arg->pool.poh, &co_uuid, &attr, &co_hdl, &dfs_mt); + daos_prop_free(attr.da_props); + assert_int_equal(rc, 0); + print_message("Created DFS Container " DF_UUIDF "\n", DP_UUID(co_uuid)); + + rc = dfs_open(dfs_mt, NULL, "dir", S_IFDIR | S_IWUSR | S_IRUSR, O_RDWR | O_CREAT, objclass, + 0, NULL, &dir); + assert_int_equal(rc, 0); + + /* Create 10 files */ + if (opc != EXTEND_DRAIN_UPDATE) + extend_drain_write(dfs_mt, dir, objclass, EXTEND_DRAIN_OBJ_NR, WRITE_SIZE, 'a', + oids); + + cb_arg.oids = oids; + cb_arg.dfs_mt = dfs_mt; + cb_arg.dir = dir; + cb_arg.opc = opc; + cb_arg.objclass = objclass; + arg->rebuild_cb = extend_drain_cb_fn; + arg->rebuild_cb_arg = &cb_arg; + + /* HOLD rebuild ULT */ + daos_debug_set_params(arg->group, -1, DMG_KEY_FAIL_LOC, + DAOS_REBUILD_TGT_SCAN_HANG | DAOS_FAIL_ALWAYS, 0, NULL); + drain_single_pool_rank(arg, ranks_to_kill[0], false); + + extend_drain_check(dfs_mt, dir, objclass, opc); + + /* Unclear if kill engine is necessary for a drain / reintegrate test. + * Consider instead test_rebuild_wait() and reintegrate_single_pool_rank(restart=false). + */ + daos_kill_server(arg, arg->pool.pool_uuid, arg->group, arg->pool.alive_svc, + ranks_to_kill[0]); + + arg->rebuild_cb = NULL; + arg->rebuild_cb_arg = NULL; + reintegrate_single_pool_rank(arg, ranks_to_kill[0], true); + + extend_drain_check(dfs_mt, dir, objclass, opc); + + rc = dfs_release(dir); + assert_int_equal(rc, 0); + rc = dfs_umount(dfs_mt); + assert_int_equal(rc, 0); + + rc = daos_cont_close(co_hdl, NULL); + assert_rc_equal(rc, 0); + + uuid_unparse(co_uuid, str); + rc = daos_cont_destroy(arg->pool.poh, str, 1, NULL); + assert_rc_equal(rc, 0); +} diff --git a/src/tests/suite/daos_drain_simple.c b/src/tests/suite/daos_drain_simple.c index d5ce764789c..2227a3bcbc4 100644 --- a/src/tests/suite/daos_drain_simple.c +++ b/src/tests/suite/daos_drain_simple.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2016-2023 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -43,6 +44,7 @@ drain_dkeys(void **state) if (!test_runable(arg, 4)) return; + T_BEGIN(); oid = daos_test_oid_gen(arg->coh, DAOS_OC_R1S_SPEC_RANK, 0, 0, arg->myrank); oid = dts_oid_set_rank(oid, ranks_to_kill[0]); @@ -80,6 +82,7 @@ drain_dkeys(void **state) reintegrate_inflight_io_verify(arg); ioreq_fini(&req); + T_END(); } static int @@ -110,6 +113,7 @@ cont_open_in_drain(void **state) if (!test_runable(arg, 4)) return; + T_BEGIN(); oid = daos_test_oid_gen(arg->coh, DAOS_OC_R1S_SPEC_RANK, 0, 0, arg->myrank); oid = dts_oid_set_rank(oid, ranks_to_kill[0]); @@ -129,7 +133,7 @@ cont_open_in_drain(void **state) ioreq_fini(&req); test_teardown_cont_hdl(arg); - arg->rebuild_cb = cont_open_and_inflight_io; + arg->rebuild_cb = cont_open_and_inflight_io; arg->rebuild_cb_arg = &oid; drain_single_pool_target(arg, ranks_to_kill[0], tgt, false); @@ -150,6 +154,7 @@ cont_open_in_drain(void **state) reintegrate_inflight_io_verify(arg); ioreq_fini(&req); + T_END(); } static void @@ -166,6 +171,7 @@ drain_akeys(void **state) if (!test_runable(arg, 4)) return; + T_BEGIN(); oid = daos_test_oid_gen(arg->coh, DAOS_OC_R1S_SPEC_RANK, 0, 0, arg->myrank); oid = dts_oid_set_rank(oid, ranks_to_kill[0]); @@ -202,6 +208,7 @@ drain_akeys(void **state) reintegrate_inflight_io_verify(arg); ioreq_fini(&req); + T_END(); } static void @@ -219,6 +226,7 @@ drain_indexes(void **state) if (!test_runable(arg, 4)) return; + T_BEGIN(); oid = daos_test_oid_gen(arg->coh, DAOS_OC_R1S_SPEC_RANK, 0, 0, arg->myrank); oid = dts_oid_set_rank(oid, ranks_to_kill[0]); @@ -258,6 +266,7 @@ drain_indexes(void **state) reintegrate_inflight_io_verify(arg); ioreq_fini(&req); + T_END(); } static void @@ -280,6 +289,7 @@ drain_snap_update_keys(void **state) if (!test_runable(arg, 4)) return; + T_BEGIN(); oid = daos_test_oid_gen(arg->coh, DAOS_OC_R1S_SPEC_RANK, 0, 0, arg->myrank); oid = dts_oid_set_rank(oid, ranks_to_kill[0]); @@ -298,7 +308,7 @@ drain_snap_update_keys(void **state) insert_single("dkey", akey, 0, "data", 1, DAOS_TX_NONE, &req); } - arg->rebuild_cb = reintegrate_inflight_io; + arg->rebuild_cb = reintegrate_inflight_io; arg->rebuild_cb_arg = &oid; drain_single_pool_target(arg, ranks_to_kill[0], tgt, false); @@ -335,6 +345,7 @@ drain_snap_update_keys(void **state) reintegrate_inflight_io_verify(arg); ioreq_fini(&req); + T_END(); } static void @@ -357,6 +368,7 @@ drain_snap_punch_keys(void **state) if (!test_runable(arg, 4)) return; + T_BEGIN(); oid = daos_test_oid_gen(arg->coh, DAOS_OC_R3S_SPEC_RANK, 0, 0, arg->myrank); oid = dts_oid_set_rank(oid, ranks_to_kill[0]); @@ -390,7 +402,7 @@ drain_snap_punch_keys(void **state) punch_akey("dkey", akey, DAOS_TX_NONE, &req); } - arg->rebuild_cb = reintegrate_inflight_io; + arg->rebuild_cb = reintegrate_inflight_io; arg->rebuild_cb_arg = &oid; drain_single_pool_target(arg, ranks_to_kill[0], tgt, false); @@ -427,6 +439,7 @@ drain_snap_punch_keys(void **state) reintegrate_inflight_io_verify(arg); ioreq_fini(&req); + T_END(); } static void @@ -445,6 +458,7 @@ drain_multiple(void **state) if (!test_runable(arg, 4)) return; + T_BEGIN(); oid = daos_test_oid_gen(arg->coh, DAOS_OC_R1S_SPEC_RANK, 0, 0, arg->myrank); oid = dts_oid_set_rank(oid, ranks_to_kill[0]); @@ -495,6 +509,7 @@ drain_multiple(void **state) reintegrate_inflight_io_verify(arg); ioreq_fini(&req); + T_END(); } static void @@ -513,6 +528,7 @@ drain_large_rec(void **state) if (!test_runable(arg, 4)) return; + T_BEGIN(); oid = daos_test_oid_gen(arg->coh, DAOS_OC_R1S_SPEC_RANK, 0, 0, arg->myrank); oid = dts_oid_set_rank(oid, ranks_to_kill[0]); @@ -548,6 +564,7 @@ drain_large_rec(void **state) reintegrate_inflight_io_verify(arg); ioreq_fini(&req); + T_END(); } static void @@ -563,6 +580,7 @@ drain_objects(void **state) if (!test_runable(arg, 4)) return; + T_BEGIN(); for (i = 0; i < OBJ_NR; i++) { oids[i] = daos_test_oid_gen(arg->coh, DAOS_OC_R1S_SPEC_RANK, 0, 0, arg->myrank); @@ -577,20 +595,22 @@ drain_objects(void **state) rebuild_io_validate(arg, oids, OBJ_NR); reintegrate_inflight_io_verify(arg); + T_END(); } static void drain_fail_and_retry_objects(void **state) { - test_arg_t *arg = *state; - daos_obj_id_t oids[OBJ_NR]; - int i; + test_arg_t *arg = *state; + daos_obj_id_t oids[OBJ_NR]; + int i; FAULT_INJECTION_REQUIRED(); if (!test_runable(arg, 4)) return; + T_BEGIN(); for (i = 0; i < OBJ_NR; i++) { oids[i] = daos_test_oid_gen(arg->coh, DAOS_OC_R1S_SPEC_RANK, 0, 0, arg->myrank); @@ -611,8 +631,12 @@ drain_fail_and_retry_objects(void **state) daos_debug_set_params(arg->group, -1, DMG_KEY_FAIL_LOC, 0, 0, NULL); rebuild_io_validate(arg, oids, OBJ_NR); + arg->rebuild_cb = reintegrate_inflight_io; + arg->rebuild_cb_arg = &oids[OBJ_NR - 1]; drain_single_pool_rank(arg, ranks_to_kill[0], false); rebuild_io_validate(arg, oids, OBJ_NR); + reintegrate_inflight_io_verify(arg); + T_END(); } static void @@ -626,6 +650,7 @@ drain_then_exclude(void **state) if (!test_runable(arg, 4)) return; + T_BEGIN(); oid = daos_test_oid_gen(arg->coh, OC_EC_2P1GX, 0, 0, arg->myrank); rebuild_io(arg, &oid, 1); @@ -639,123 +664,10 @@ drain_then_exclude(void **state) reintegrate_single_pool_rank(arg, ranks_to_kill[0], true); rebuild_io_validate(arg, &oid, 1); + T_END(); } -#define EXTEND_DRAIN_OBJ_NR 5 -#define WRITE_SIZE (1048576 * 5) -struct extend_drain_cb_arg{ - daos_obj_id_t *oids; - dfs_t *dfs_mt; - dfs_obj_t *dir; - d_rank_t rank; - uint32_t objclass; - int opc; -}; - -enum extend_drain_opc { - EXTEND_DRAIN_PUNCH, - EXTEND_DRAIN_STAT, - EXTEND_DRAIN_ENUMERATE, - EXTEND_DRAIN_FETCH, - EXTEND_DRAIN_UPDATE, - EXTEND_DRAIN_OVERWRITE, - EXTEND_DRAIN_WRITELOOP, -}; - -static void -extend_drain_read_check(dfs_t *dfs_mt, dfs_obj_t *dir, uint32_t objclass, uint32_t objcnt, - daos_size_t total_size, char start_char) -{ - char *buf = NULL; - char *verify_buf = NULL; - daos_size_t buf_size = 512 * 1024; - d_sg_list_t sgl; - d_iov_t iov; - d_iov_t verify_iov; - int i; - - buf = malloc(buf_size); - verify_buf = malloc(buf_size); - assert_non_null(buf); - assert_non_null(verify_buf); - d_iov_set(&iov, buf, buf_size); - d_iov_set(&verify_iov, buf, buf_size); - sgl.sg_nr = 1; - sgl.sg_iovs = &iov; - - for (i = 0; i < objcnt; i++) { - char filename[32]; - daos_size_t read_size = buf_size; - dfs_obj_t *obj; - daos_off_t offset = 0; - daos_size_t total = total_size; - int rc; - - sprintf(filename, "file%d", i); - rc = dfs_open(dfs_mt, dir, filename, S_IFREG | S_IWUSR | S_IRUSR, - O_RDWR, objclass, 1048576, NULL, &obj); - assert_int_equal(rc, 0); - - memset(verify_buf, start_char + i, buf_size); - - while (total > 0) { - memset(buf, 0, buf_size); - rc = dfs_read(dfs_mt, obj, &sgl, offset, &read_size, NULL); - assert_int_equal(rc, 0); - assert_memory_equal(buf, verify_buf, read_size); - offset += read_size; - total -= read_size; - } - - rc = dfs_release(obj); - assert_int_equal(rc, 0); - } - free(buf); - free(verify_buf); -} - -static void -extend_drain_write(dfs_t *dfs_mt, dfs_obj_t *dir, uint32_t objclass, uint32_t objcnt, - daos_size_t total_size, char write_char, daos_obj_id_t *oids) -{ - char *buf = NULL; - daos_size_t buf_size = 512 * 1024; - d_sg_list_t sgl; - d_iov_t iov; - int i; - - buf = malloc(buf_size); - assert_non_null(buf); - d_iov_set(&iov, buf, buf_size); - sgl.sg_nr = 1; - sgl.sg_iovs = &iov; - - for (i = 0; i < objcnt; i++) { - char filename[32]; - dfs_obj_t *obj; - daos_size_t total = total_size; - daos_off_t offset = 0; - int rc; - - sprintf(filename, "file%d", i); - rc = dfs_open(dfs_mt, dir, filename, S_IFREG | S_IWUSR | S_IRUSR, - O_RDWR | O_CREAT, OC_EC_2P1GX, 1048576, NULL, &obj); - assert_int_equal(rc, 0); - if (oids != NULL) - dfs_obj2id(obj, &oids[i]); - - memset(buf, write_char + i, buf_size); - while (total > 0) { - rc = dfs_write(dfs_mt, obj, &sgl, offset, NULL); - assert_int_equal(rc, 0); - offset += buf_size; - total -= buf_size; - } - rc = dfs_release(obj); - assert_int_equal(rc, 0); - } - free(buf); -} +/* FIXME: rename a few things - most of this code is performing drain + kill/exclude, NOT extend */ static int extend_drain_cb_internal(void *arg) @@ -775,10 +687,12 @@ extend_drain_cb_internal(void *arg) int i; if (opc != EXTEND_DRAIN_WRITELOOP) { - print_message("sleep 5 seconds then start op %d\n", opc); + print_message("sleep 5 seconds first\n"); sleep(5); } + print_message("start op %d (%s)\n", opc, extend_drain_opstrs[opc]); + /* Kill another rank during extend */ switch(opc) { case EXTEND_DRAIN_PUNCH: @@ -837,151 +751,85 @@ extend_drain_cb_internal(void *arg) daos_debug_set_params(test_arg->group, -1, DMG_KEY_FAIL_LOC, 0, 0, NULL); - return 0; -} - -static void -extend_drain_check(dfs_t *dfs_mt, dfs_obj_t *dir, int objclass, int opc) -{ - switch (opc) { - case EXTEND_DRAIN_PUNCH: - break; - case EXTEND_DRAIN_OVERWRITE: - extend_drain_read_check(dfs_mt, dir, objclass, EXTEND_DRAIN_OBJ_NR, - WRITE_SIZE, 'b'); - break; - case EXTEND_DRAIN_WRITELOOP: - extend_drain_read_check(dfs_mt, dir, objclass, 1, 512 * 1048576, 'a'); - break; - default: - extend_drain_read_check(dfs_mt, dir, objclass, EXTEND_DRAIN_OBJ_NR, - WRITE_SIZE, 'a'); - break; - } -} - -void -dfs_extend_drain_common(void **state, int opc, uint32_t objclass) -{ - test_arg_t *arg = *state; - dfs_t *dfs_mt; - daos_handle_t co_hdl; - dfs_obj_t *dir; - uuid_t co_uuid; - char str[37]; - daos_obj_id_t oids[EXTEND_DRAIN_OBJ_NR]; - struct extend_drain_cb_arg cb_arg; - dfs_attr_t attr = {}; - int rc; - - FAULT_INJECTION_REQUIRED(); + print_message("done op %d (%s)\n", opc, extend_drain_opstrs[opc]); - if (!test_runable(arg, 4)) - return; - - attr.da_props = daos_prop_alloc(2); - assert_non_null(attr.da_props); - attr.da_props->dpp_entries[0].dpe_type = DAOS_PROP_CO_REDUN_LVL; - attr.da_props->dpp_entries[0].dpe_val = DAOS_PROP_CO_REDUN_RANK; - attr.da_props->dpp_entries[1].dpe_type = DAOS_PROP_CO_REDUN_FAC; - attr.da_props->dpp_entries[1].dpe_val = DAOS_PROP_CO_REDUN_RF1; - rc = dfs_cont_create(arg->pool.poh, &co_uuid, &attr, &co_hdl, &dfs_mt); - daos_prop_free(attr.da_props); - assert_int_equal(rc, 0); - print_message("Created DFS Container "DF_UUIDF"\n", DP_UUID(co_uuid)); - - rc = dfs_open(dfs_mt, NULL, "dir", S_IFDIR | S_IWUSR | S_IRUSR, - O_RDWR | O_CREAT, objclass, 0, NULL, &dir); - assert_int_equal(rc, 0); - - /* Create 10 files */ - if (opc != EXTEND_DRAIN_UPDATE) - extend_drain_write(dfs_mt, dir, objclass, EXTEND_DRAIN_OBJ_NR, WRITE_SIZE, - 'a', oids); - - cb_arg.oids = oids; - cb_arg.dfs_mt = dfs_mt; - cb_arg.dir = dir; - cb_arg.opc = opc; - cb_arg.objclass = objclass; - arg->rebuild_cb = extend_drain_cb_internal; - arg->rebuild_cb_arg = &cb_arg; - - /* HOLD rebuild ULT */ - daos_debug_set_params(arg->group, -1, DMG_KEY_FAIL_LOC, - DAOS_REBUILD_TGT_SCAN_HANG | DAOS_FAIL_ALWAYS, 0, NULL); - drain_single_pool_rank(arg, ranks_to_kill[0], false); - - extend_drain_check(dfs_mt, dir, objclass, opc); - - daos_kill_server(arg, arg->pool.pool_uuid, arg->group, arg->pool.alive_svc, - ranks_to_kill[0]); - arg->rebuild_cb = NULL; - arg->rebuild_cb_arg = NULL; - reintegrate_single_pool_rank(arg, ranks_to_kill[0], true); - - extend_drain_check(dfs_mt, dir, objclass, opc); - - rc = dfs_release(dir); - assert_int_equal(rc, 0); - rc = dfs_umount(dfs_mt); - assert_int_equal(rc, 0); - - rc = daos_cont_close(co_hdl, NULL); - assert_rc_equal(rc, 0); - - uuid_unparse(co_uuid, str); - rc = daos_cont_destroy(arg->pool.poh, str, 1, NULL); - assert_rc_equal(rc, 0); + return 0; } void dfs_drain_punch(void **state) { - dfs_extend_drain_common(state, EXTEND_DRAIN_PUNCH, OC_EC_2P1GX); - dfs_extend_drain_common(state, EXTEND_DRAIN_PUNCH, OC_EC_4P2GX); + print_message("=== Begin EXTEND_DRAIN_PUNCH, oclass OC_EC_2P1GX\n"); + dfs_extend_drain_common(state, EXTEND_DRAIN_PUNCH, OC_EC_2P1GX, extend_drain_cb_internal); + print_message("=== Begin EXTEND_DRAIN_PUNCH, oclass OC_EC_4P2GX, rebuild stop|start\n"); + dfs_extend_drain_common(state, EXTEND_DRAIN_PUNCH, OC_EC_4P2GX, extend_drain_cb_internal); + T_END(); } void dfs_drain_stat(void **state) { - dfs_extend_drain_common(state, EXTEND_DRAIN_STAT, OC_EC_2P1GX); - dfs_extend_drain_common(state, EXTEND_DRAIN_STAT, OC_EC_4P2GX); + print_message("=== Begin EXTEND_DRAIN_STAT, oclass OC_EC_2P1GX\n"); + dfs_extend_drain_common(state, EXTEND_DRAIN_STAT, OC_EC_2P1GX, extend_drain_cb_internal); + print_message("=== Begin EXTEND_DRAIN_STAT, oclass OC_EC_4P2GX\n"); + dfs_extend_drain_common(state, EXTEND_DRAIN_STAT, OC_EC_4P2GX, extend_drain_cb_internal); + T_END(); } void dfs_drain_enumerate(void **state) { - dfs_extend_drain_common(state, EXTEND_DRAIN_ENUMERATE, OC_EC_2P1GX); - dfs_extend_drain_common(state, EXTEND_DRAIN_ENUMERATE, OC_EC_4P2GX); + print_message("=== Begin EXTEND_DRAIN_ENUMERATE, oclass OC_EC_2P1GX\n"); + dfs_extend_drain_common(state, EXTEND_DRAIN_ENUMERATE, OC_EC_2P1GX, + extend_drain_cb_internal); + print_message("=== Begin EXTEND_DRAIN_ENUMERATE, oclass OC_EC_4P2GX\n"); + dfs_extend_drain_common(state, EXTEND_DRAIN_ENUMERATE, OC_EC_4P2GX, + extend_drain_cb_internal); + T_END(); } void dfs_drain_fetch(void **state) { - dfs_extend_drain_common(state, EXTEND_DRAIN_FETCH, OC_EC_2P1GX); - dfs_extend_drain_common(state, EXTEND_DRAIN_FETCH, OC_EC_4P2GX); + print_message("=== Begin EXTEND_DRAIN_FETCH, oclass OC_EC_2P1GX\n"); + dfs_extend_drain_common(state, EXTEND_DRAIN_FETCH, OC_EC_2P1GX, extend_drain_cb_internal); + print_message("=== Begin EXTEND_DRAIN_FETCH, oclass OC_EC_4P2GX, rebuild stop|start\n"); + dfs_extend_drain_common(state, EXTEND_DRAIN_FETCH, OC_EC_4P2GX, extend_drain_cb_internal); + T_END(); } void dfs_drain_update(void **state) { - dfs_extend_drain_common(state, EXTEND_DRAIN_UPDATE, OC_EC_2P1GX); - dfs_extend_drain_common(state, EXTEND_DRAIN_UPDATE, OC_EC_4P2GX); + print_message("=== Begin EXTEND_DRAIN_UPDATE, oclass OC_EC_2P1GX\n"); + dfs_extend_drain_common(state, EXTEND_DRAIN_UPDATE, OC_EC_2P1GX, extend_drain_cb_internal); + print_message("=== Begin EXTEND_DRAIN_UPDATE, oclass OC_EC_4P2GX, rebuild stop|start\n"); + dfs_extend_drain_common(state, EXTEND_DRAIN_UPDATE, OC_EC_4P2GX, extend_drain_cb_internal); + T_END(); } void dfs_drain_overwrite(void **state) { - dfs_extend_drain_common(state, EXTEND_DRAIN_OVERWRITE, OC_EC_2P1GX); - dfs_extend_drain_common(state, EXTEND_DRAIN_OVERWRITE, OC_EC_4P2GX); + print_message("=== Begin EXTEND_DRAIN_OVERWRITE, oclass OC_EC_2P1GX, rebuild stop|start\n"); + dfs_extend_drain_common(state, EXTEND_DRAIN_OVERWRITE, OC_EC_2P1GX, + extend_drain_cb_internal); + print_message("=== Begin EXTEND_DRAIN_OVERWRITE, oclass OC_EC_4P2GX\n"); + dfs_extend_drain_common(state, EXTEND_DRAIN_OVERWRITE, OC_EC_4P2GX, + extend_drain_cb_internal); + T_END(); } void dfs_drain_writeloop(void **state) { - dfs_extend_drain_common(state, EXTEND_DRAIN_WRITELOOP, OC_EC_2P1GX); - dfs_extend_drain_common(state, EXTEND_DRAIN_WRITELOOP, OC_EC_4P2GX); + print_message("=== Begin EXTEND_DRAIN_WRITELOOP, oclass OC_EC_2P1GX\n"); + dfs_extend_drain_common(state, EXTEND_DRAIN_WRITELOOP, OC_EC_2P1GX, + extend_drain_cb_internal); + print_message("=== Begin EXTEND_DRAIN_WRITELOOP, oclass OC_EC_4P2GX\n"); + dfs_extend_drain_common(state, EXTEND_DRAIN_WRITELOOP, OC_EC_4P2GX, + extend_drain_cb_internal); + T_END(); } void @@ -1002,6 +850,7 @@ dfs_drain_extend(void **state) if (!test_runable(arg, 3)) return; + T_BEGIN(); attr.da_props = daos_prop_alloc(2); assert_non_null(attr.da_props); attr.da_props->dpp_entries[0].dpe_type = DAOS_PROP_CO_REDUN_LVL; @@ -1049,6 +898,7 @@ dfs_drain_extend(void **state) uuid_unparse(co_uuid, str); rc = daos_cont_destroy(arg->pool.poh, str, 1, NULL); assert_rc_equal(rc, 0); + T_END(); } /** create a new pool/container for each test */ diff --git a/src/tests/suite/daos_extend_common.c b/src/tests/suite/daos_extend_common.c new file mode 100644 index 00000000000..86a8a2e2009 --- /dev/null +++ b/src/tests/suite/daos_extend_common.c @@ -0,0 +1,223 @@ +/** + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ +/** + * This file is for common functions used between daos_extend_simple.c and + * daos_rebuild_interactive.c tests. + * + * tests/suite/daos_extend_common.c + * + */ +#define D_LOGFAC DD_FAC(tests) + +#include "daos_test.h" +#include "daos_iotest.h" +#include "dfs_test.h" +#include +#include +#include + +/* clang-format off */ +const char *extend_opstrs[] = { + "EXTEND_PUNCH", + "EXTEND_STAT", + "EXTEND_ENUMERATE", + "EXTEND_FETCH", + "EXTEND_UPDATE" +}; +/* clang-format on */ + +void +extend_read_check(dfs_t *dfs_mt, dfs_obj_t *dir) +{ + char *buf = NULL; + char *verify_buf = NULL; + daos_size_t buf_size = 512 * 1024; + d_sg_list_t sgl; + d_iov_t iov; + d_iov_t verify_iov; + int i; + + buf = malloc(buf_size); + verify_buf = malloc(buf_size); + print_message("%s(): allocations buf_size=" DF_U64 ", buf=%p, verify_buf=%p\n", + __FUNCTION__, buf_size, buf, verify_buf); + assert_non_null(buf); + assert_non_null(verify_buf); + d_iov_set(&iov, buf, buf_size); + d_iov_set(&verify_iov, buf, buf_size); + sgl.sg_nr = 1; + sgl.sg_iovs = &iov; + + for (i = 0; i < 20; i++) { + char filename[32]; + daos_size_t read_size = buf_size; + dfs_obj_t *obj; + int rc; + + sprintf(filename, "file%d", i); + rc = dfs_open(dfs_mt, dir, filename, S_IFREG | S_IWUSR | S_IRUSR, O_RDWR, + OC_EC_2P1GX, 1048576, NULL, &obj); + print_message("%s(): dfs_open(filename=%s) rc=%d\n", __FUNCTION__, filename, rc); + assert_int_equal(rc, 0); + + memset(verify_buf, 'a' + i, buf_size); + rc = dfs_read(dfs_mt, obj, &sgl, 0, &read_size, NULL); + print_message("%s(): dfs_read() read_size=" DF_U64 ", rc=%d\n", __FUNCTION__, + read_size, rc); + assert_int_equal(rc, 0); + assert_int_equal((int)read_size, buf_size); + assert_memory_equal(buf, verify_buf, read_size); + rc = dfs_release(obj); + print_message("%s(): dfs_release() rc=%d\n", __FUNCTION__, rc); + assert_int_equal(rc, 0); + } + free(buf); + free(verify_buf); + print_message("%s(): done, freed buf and verify_buf\n", __FUNCTION__); +} + +void +extend_write(dfs_t *dfs_mt, dfs_obj_t *dir) +{ + char *buf = NULL; + daos_size_t buf_size = 512 * 1024; + d_sg_list_t sgl; + d_iov_t iov; + int i; + + buf = malloc(buf_size); + assert_non_null(buf); + d_iov_set(&iov, buf, buf_size); + sgl.sg_nr = 1; + sgl.sg_iovs = &iov; + + for (i = 0; i < 20; i++) { + char filename[32]; + dfs_obj_t *obj; + int rc; + + sprintf(filename, "file%d", i); + rc = dfs_open(dfs_mt, dir, filename, S_IFREG | S_IWUSR | S_IRUSR, O_RDWR | O_CREAT, + OC_EC_2P1GX, 1048576, NULL, &obj); + assert_int_equal(rc, 0); + + memset(buf, 'a' + i, buf_size); + rc = dfs_write(dfs_mt, obj, &sgl, 0, NULL); + assert_int_equal(rc, 0); + rc = dfs_release(obj); + assert_int_equal(rc, 0); + } + free(buf); +} + +void +dfs_extend_internal(void **state, int opc, test_rebuild_cb_t extend_cb, bool kill) +{ + test_arg_t *arg = *state; + dfs_t *dfs_mt; + daos_handle_t co_hdl; + dfs_obj_t *obj; + dfs_obj_t *dir; + uuid_t co_uuid; + int i; + d_rank_t extend_rank = 3; + char str[37]; + daos_obj_id_t oids[EXTEND_OBJ_NR]; + struct extend_cb_arg cb_arg; + dfs_attr_t attr = {}; + int rc; + + attr.da_props = daos_prop_alloc(2); + assert_non_null(attr.da_props); + attr.da_props->dpp_entries[0].dpe_type = DAOS_PROP_CO_REDUN_LVL; + attr.da_props->dpp_entries[0].dpe_val = DAOS_PROP_CO_REDUN_RANK; + attr.da_props->dpp_entries[1].dpe_type = DAOS_PROP_CO_REDUN_FAC; + attr.da_props->dpp_entries[1].dpe_val = DAOS_PROP_CO_REDUN_RF1; + rc = dfs_cont_create(arg->pool.poh, &co_uuid, &attr, &co_hdl, &dfs_mt); + daos_prop_free(attr.da_props); + assert_int_equal(rc, 0); + print_message("Created DFS Container " DF_UUIDF "\n", DP_UUID(co_uuid)); + + rc = dfs_open(dfs_mt, NULL, "dir", S_IFDIR | S_IWUSR | S_IRUSR, O_RDWR | O_CREAT, + OC_EC_2P1GX, 0, NULL, &dir); + assert_int_equal(rc, 0); + + /* Create 1000 files */ + if (opc == EXTEND_FETCH) { + extend_write(dfs_mt, dir); + } else { + for (i = 0; i < EXTEND_OBJ_NR; i++) { + char filename[32]; + + sprintf(filename, "file%d", i); + rc = dfs_open(dfs_mt, dir, filename, S_IFREG | S_IWUSR | S_IRUSR, + O_RDWR | O_CREAT, OC_EC_2P1GX, 1048576, NULL, &obj); + assert_int_equal(rc, 0); + dfs_obj2id(obj, &oids[i]); + rc = dfs_release(obj); + assert_int_equal(rc, 0); + } + } + + cb_arg.oids = oids; + cb_arg.dfs_mt = dfs_mt; + cb_arg.dir = dir; + cb_arg.opc = opc; + cb_arg.kill = kill; + if (kill) + cb_arg.rank = 2; + else + cb_arg.rank = 4; + + arg->rebuild_cb = extend_cb; + arg->rebuild_cb_arg = &cb_arg; + + /* HOLD rebuild ULT. FIXME: maybe change to use test_set_engine_fail_loc()? */ + print_message("inject DAOS_REBUILD_TGT_SCAN_HANG fault on engines\n"); + daos_debug_set_params(arg->group, -1, DMG_KEY_FAIL_LOC, + DAOS_REBUILD_TGT_SCAN_HANG | DAOS_FAIL_ALWAYS, 0, NULL); + + arg->no_rebuild = + 1; /* This has no effect for RB_OP_TYPE_ADD - so can this be removed here? */ + extend_single_pool_rank(arg, extend_rank); + arg->no_rebuild = 0; + + print_message("sleep 30 secs for rank %u %s\n", cb_arg.rank, + cb_arg.kill ? "kill/exclude" : "extend"); + sleep(30); + print_message("wait for rebuild due to rank %u extend and rank %u %s\n", extend_rank, + cb_arg.rank, cb_arg.kill ? "kill/exclude" : "extend"); + test_rebuild_wait(&arg, 1); + + if (opc == EXTEND_UPDATE) { + print_message("First extend update read check\n"); + extend_read_check(dfs_mt, dir); + } + + arg->rebuild_cb = NULL; + arg->rebuild_cb_arg = NULL; + if (kill) { + print_message("reintegrate rank %u\n", cb_arg.rank); + reintegrate_single_pool_rank(arg, cb_arg.rank, true); + } + + if (opc == EXTEND_UPDATE) { + print_message("Second extend update read check\n"); + extend_read_check(dfs_mt, dir); + } + + rc = dfs_release(dir); + assert_int_equal(rc, 0); + rc = dfs_umount(dfs_mt); + assert_int_equal(rc, 0); + + rc = daos_cont_close(co_hdl, NULL); + assert_rc_equal(rc, 0); + + uuid_unparse(co_uuid, str); + rc = daos_cont_destroy(arg->pool.poh, str, 1, NULL); + assert_rc_equal(rc, 0); +} diff --git a/src/tests/suite/daos_extend_simple.c b/src/tests/suite/daos_extend_simple.c index 98be6290844..f2f5af8bbca 100644 --- a/src/tests/suite/daos_extend_simple.c +++ b/src/tests/suite/daos_extend_simple.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2016-2023 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -12,47 +13,46 @@ * * */ -#define D_LOGFAC DD_FAC(tests) +#define D_LOGFAC DD_FAC(tests) #include "daos_iotest.h" +#include "daos_test.h" #include "dfs_test.h" #include #include #include -#define KEY_NR 10 -#define OBJ_NR 10 +#define KEY_NR 10 +#define OBJ_NR 10 static void extend_dkeys(void **state) { - test_arg_t *arg = *state; - daos_obj_id_t oids[OBJ_NR]; - struct ioreq req; - int i; - int j; - int rc; + test_arg_t *arg = *state; + daos_obj_id_t oids[OBJ_NR]; + struct ioreq req; + int i; + int j; + int rc; - print_message("BEGIN %s\n", __FUNCTION__); + T_BEGIN(); if (!test_runable(arg, 3)) return; for (i = 0; i < OBJ_NR; i++) { - oids[i] = daos_test_oid_gen(arg->coh, OC_RP_3G1, 0, 0, - arg->myrank); + oids[i] = daos_test_oid_gen(arg->coh, OC_RP_3G1, 0, 0, arg->myrank); ioreq_init(&req, arg->coh, oids[i], DAOS_IOD_ARRAY, arg); /** Insert 10 records */ - print_message("Insert %d kv record in object "DF_OID"\n", - KEY_NR, DP_OID(oids[i])); + print_message("Insert %d kv record in object " DF_OID "\n", KEY_NR, + DP_OID(oids[i])); for (j = 0; j < KEY_NR; j++) { - char key[32] = {0}; + char key[32] = {0}; sprintf(key, "dkey_0_%d", j); - insert_single(key, "a_key", 0, "data", - strlen("data") + 1, - DAOS_TX_NONE, &req); + insert_single(key, "a_key", 0, "data", strlen("data") + 1, DAOS_TX_NONE, + &req); } ioreq_fini(&req); } @@ -63,38 +63,38 @@ extend_dkeys(void **state) rc = daos_obj_verify(arg->coh, oids[i], DAOS_EPOCH_MAX); assert_rc_equal(rc, 0); } + + T_END(); } static void extend_akeys(void **state) { - test_arg_t *arg = *state; - daos_obj_id_t oids[OBJ_NR]; - struct ioreq req; - int i; - int j; - int rc; + test_arg_t *arg = *state; + daos_obj_id_t oids[OBJ_NR]; + struct ioreq req; + int i; + int j; + int rc; - print_message("BEGIN %s\n", __FUNCTION__); + T_BEGIN(); if (!test_runable(arg, 3)) return; for (i = 0; i < OBJ_NR; i++) { - oids[i] = daos_test_oid_gen(arg->coh, OC_RP_3G1, 0, 0, - arg->myrank); + oids[i] = daos_test_oid_gen(arg->coh, OC_RP_3G1, 0, 0, arg->myrank); ioreq_init(&req, arg->coh, oids[i], DAOS_IOD_ARRAY, arg); /** Insert 10 records */ - print_message("Insert %d kv record in object "DF_OID"\n", - KEY_NR, DP_OID(oids[i])); + print_message("Insert %d kv record in object " DF_OID "\n", KEY_NR, + DP_OID(oids[i])); for (j = 0; j < KEY_NR; j++) { - char akey[16]; + char akey[16]; sprintf(akey, "%d", j); - insert_single("dkey_1_0", akey, 0, "data", - strlen("data") + 1, - DAOS_TX_NONE, &req); + insert_single("dkey_1_0", akey, 0, "data", strlen("data") + 1, DAOS_TX_NONE, + &req); } ioreq_fini(&req); } @@ -104,41 +104,41 @@ extend_akeys(void **state) rc = daos_obj_verify(arg->coh, oids[i], DAOS_EPOCH_MAX); assert_rc_equal(rc, 0); } + + T_END(); } static void extend_indexes(void **state) { - test_arg_t *arg = *state; - daos_obj_id_t oids[OBJ_NR]; - struct ioreq req; - int i; - int j; - int k; - int rc; + test_arg_t *arg = *state; + daos_obj_id_t oids[OBJ_NR]; + struct ioreq req; + int i; + int j; + int k; + int rc; - print_message("BEGIN %s\n", __FUNCTION__); + T_BEGIN(); if (!test_runable(arg, 3)) return; for (i = 0; i < OBJ_NR; i++) { - oids[i] = daos_test_oid_gen(arg->coh, OC_RP_3G1, 0, 0, - arg->myrank); + oids[i] = daos_test_oid_gen(arg->coh, OC_RP_3G1, 0, 0, arg->myrank); ioreq_init(&req, arg->coh, oids[i], DAOS_IOD_ARRAY, arg); /** Insert 10 records */ - print_message("Insert %d kv record in object "DF_OID"\n", - KEY_NR, DP_OID(oids[i])); + print_message("Insert %d kv record in object " DF_OID "\n", KEY_NR, + DP_OID(oids[i])); for (j = 0; j < KEY_NR; j++) { - char key[32] = {0}; + char key[32] = {0}; sprintf(key, "dkey_2_%d", j); for (k = 0; k < 20; k++) - insert_single(key, "a_key", k, "data", - strlen("data") + 1, DAOS_TX_NONE, - &req); + insert_single(key, "a_key", k, "data", strlen("data") + 1, + DAOS_TX_NONE, &req); } ioreq_fini(&req); } @@ -148,39 +148,39 @@ extend_indexes(void **state) rc = daos_obj_verify(arg->coh, oids[i], DAOS_EPOCH_MAX); assert_rc_equal(rc, 0); } + + T_END(); } static void extend_large_rec(void **state) { - test_arg_t *arg = *state; - daos_obj_id_t oids[OBJ_NR]; - struct ioreq req; - char buffer[5000]; - int i; - int j; - int rc; + test_arg_t *arg = *state; + daos_obj_id_t oids[OBJ_NR]; + struct ioreq req; + char buffer[5000]; + int i; + int j; + int rc; - print_message("BEGIN %s\n", __FUNCTION__); + T_BEGIN(); if (!test_runable(arg, 3)) return; memset(buffer, 'a', 5000); for (i = 0; i < OBJ_NR; i++) { - oids[i] = daos_test_oid_gen(arg->coh, OC_RP_3G1, 0, 0, - arg->myrank); + oids[i] = daos_test_oid_gen(arg->coh, OC_RP_3G1, 0, 0, arg->myrank); ioreq_init(&req, arg->coh, oids[i], DAOS_IOD_ARRAY, arg); /** Insert 10 records */ - print_message("Insert %d kv record in object "DF_OID"\n", - KEY_NR, DP_OID(oids[i])); + print_message("Insert %d kv record in object " DF_OID "\n", KEY_NR, + DP_OID(oids[i])); for (j = 0; j < KEY_NR; j++) { - char key[32] = {0}; + char key[32] = {0}; sprintf(key, "dkey_3_%d", j); - insert_single(key, "a_key", 0, buffer, 5000, - DAOS_TX_NONE, &req); + insert_single(key, "a_key", 0, buffer, 5000, DAOS_TX_NONE, &req); } ioreq_fini(&req); } @@ -190,28 +190,28 @@ extend_large_rec(void **state) rc = daos_obj_verify(arg->coh, oids[i], DAOS_EPOCH_MAX); assert_rc_equal(rc, 0); } + + T_END(); } static void extend_objects(void **state) { - test_arg_t *arg = *state; - struct ioreq req; - daos_obj_id_t oids[OBJ_NR]; - int i; + test_arg_t *arg = *state; + struct ioreq req; + daos_obj_id_t oids[OBJ_NR]; + int i; - print_message("BEGIN %s\n", __FUNCTION__); + T_BEGIN(); if (!test_runable(arg, 3)) return; for (i = 0; i < OBJ_NR; i++) { - oids[i] = daos_test_oid_gen(arg->coh, OC_S1, 0, - 0, arg->myrank); + oids[i] = daos_test_oid_gen(arg->coh, OC_S1, 0, 0, arg->myrank); ioreq_init(&req, arg->coh, oids[i], DAOS_IOD_ARRAY, arg); - insert_single("dkey", "akey", 0, "data", strlen("data") + 1, - DAOS_TX_NONE, &req); + insert_single("dkey", "akey", 0, "data", strlen("data") + 1, DAOS_TX_NONE, &req); ioreq_fini(&req); } @@ -222,151 +222,53 @@ extend_objects(void **state) ioreq_init(&req, arg->coh, oids[i], DAOS_IOD_ARRAY, arg); memset(buffer, 0, 16); - lookup_single("dkey", "akey", 0, buffer, 16, - DAOS_TX_NONE, &req); + lookup_single("dkey", "akey", 0, buffer, 16, DAOS_TX_NONE, &req); assert_string_equal(buffer, "data"); ioreq_fini(&req); } -} - -#define EXTEND_OBJ_NR 1000 -struct extend_cb_arg{ - daos_obj_id_t *oids; - dfs_t *dfs_mt; - dfs_obj_t *dir; - d_rank_t rank; - int opc; - bool kill; -}; - -enum extend_opc { - EXTEND_PUNCH, - EXTEND_STAT, - EXTEND_ENUMERATE, - EXTEND_FETCH, - EXTEND_UPDATE, -}; -static void -extend_read_check(dfs_t *dfs_mt, dfs_obj_t *dir) -{ - char *buf = NULL; - char *verify_buf = NULL; - daos_size_t buf_size = 512 * 1024; - d_sg_list_t sgl; - d_iov_t iov; - d_iov_t verify_iov; - int i; - - buf = malloc(buf_size); - verify_buf = malloc(buf_size); - print_message("%s(): allocations buf_size=" DF_U64 ", buf=%p, verify_buf=%p\n", - __FUNCTION__, buf_size, buf, verify_buf); - assert_non_null(buf); - assert_non_null(verify_buf); - d_iov_set(&iov, buf, buf_size); - d_iov_set(&verify_iov, buf, buf_size); - sgl.sg_nr = 1; - sgl.sg_iovs = &iov; - - for (i = 0; i < 20; i++) { - char filename[32]; - daos_size_t read_size = buf_size; - dfs_obj_t *obj; - int rc; - - sprintf(filename, "file%d", i); - rc = dfs_open(dfs_mt, dir, filename, S_IFREG | S_IWUSR | S_IRUSR, - O_RDWR, OC_EC_2P1GX, 1048576, NULL, &obj); - print_message("%s(): dfs_open(filename=%s) rc=%d\n", __FUNCTION__, filename, rc); - assert_int_equal(rc, 0); - - memset(verify_buf, 'a' + i, buf_size); - rc = dfs_read(dfs_mt, obj, &sgl, 0, &read_size, NULL); - print_message("%s(): dfs_read() read_size=" DF_U64 ", rc=%d\n", __FUNCTION__, - read_size, rc); - assert_int_equal(rc, 0); - assert_int_equal((int)read_size, buf_size); - assert_memory_equal(buf, verify_buf, read_size); - rc = dfs_release(obj); - print_message("%s(): dfs_release() rc=%d\n", __FUNCTION__, rc); - assert_int_equal(rc, 0); - } - free(buf); - free(verify_buf); - print_message("%s(): done, freed buf and verify_buf\n", __FUNCTION__); -} - -static void -extend_write(dfs_t *dfs_mt, dfs_obj_t *dir) -{ - char *buf = NULL; - daos_size_t buf_size = 512 * 1024; - d_sg_list_t sgl; - d_iov_t iov; - int i; - - buf = malloc(buf_size); - assert_non_null(buf); - d_iov_set(&iov, buf, buf_size); - sgl.sg_nr = 1; - sgl.sg_iovs = &iov; - - for (i = 0; i < 20; i++) { - char filename[32]; - dfs_obj_t *obj; - int rc; - - sprintf(filename, "file%d", i); - rc = dfs_open(dfs_mt, dir, filename, S_IFREG | S_IWUSR | S_IRUSR, - O_RDWR | O_CREAT, OC_EC_2P1GX, 1048576, NULL, &obj); - assert_int_equal(rc, 0); - - memset(buf, 'a' + i, buf_size); - rc = dfs_write(dfs_mt, obj, &sgl, 0, NULL); - assert_int_equal(rc, 0); - rc = dfs_release(obj); - assert_int_equal(rc, 0); - } - free(buf); + T_END(); } static int extend_cb_internal(void *arg) { - test_arg_t *test_arg = arg; - struct extend_cb_arg *cb_arg = test_arg->rebuild_cb_arg; - dfs_t *dfs_mt = cb_arg->dfs_mt; - daos_obj_id_t *oids = cb_arg->oids; - dfs_obj_t *dir = cb_arg->dir; - struct dirent ents[10]; - int opc = cb_arg->opc; - int total_entries = 0; - uint32_t num_ents = 10; - daos_anchor_t anchor = { 0 }; - const char *pre_op = (cb_arg->kill ? "kill" : "extend"); - int rc; - int i; - - print_message("sleep 10 seconds then %s %u and start op %d\n", pre_op, - cb_arg->rank, opc); + test_arg_t *test_arg = arg; + struct extend_cb_arg *cb_arg = test_arg->rebuild_cb_arg; + dfs_t *dfs_mt = cb_arg->dfs_mt; + daos_obj_id_t *oids = cb_arg->oids; + dfs_obj_t *dir = cb_arg->dir; + struct dirent ents[10]; + int opc = cb_arg->opc; + int total_entries = 0; + uint32_t num_ents = 10; + daos_anchor_t anchor = {0}; + const char *pre_op = (cb_arg->kill ? "kill" : "extend"); + int rc; + int i; + + print_message("Extending, sleep 10, %s another rank %u, and start op %d (%s)\n", pre_op, + cb_arg->rank, opc, extend_opstrs[opc]); + sleep(10); if (cb_arg->kill) { + /* Kill another rank during extend */ daos_kill_server(test_arg, test_arg->pool.pool_uuid, test_arg->group, test_arg->pool.alive_svc, cb_arg->rank); } else { - /* it should fail with -DER_BUSY */ + /* Extend another rank during extend */ print_message("extend pool " DF_UUID " rank %u\n", DP_UUID(test_arg->pool.pool_uuid), cb_arg->rank); rc = dmg_pool_extend(test_arg->dmg_config, test_arg->pool.pool_uuid, test_arg->group, &cb_arg->rank, 1); assert_int_equal(rc, 0); } - /* Kill another rank during extend */ - switch(opc) { + + switch (opc) { case EXTEND_PUNCH: - print_message("punch objects during %s\n", pre_op); + print_message("punch objects during extend one rank, %s rank %u\n", pre_op, + cb_arg->rank); for (i = 0; i < EXTEND_OBJ_NR; i++) { char filename[32]; @@ -376,10 +278,11 @@ extend_cb_internal(void *arg) } break; case EXTEND_STAT: - print_message("stat objects during %s\n", pre_op); + print_message("stat objects during extend one rank, %s rank %u\n", pre_op, + cb_arg->rank); for (i = 0; i < EXTEND_OBJ_NR; i++) { - char filename[32]; - struct stat stbuf; + char filename[32]; + struct stat stbuf; sprintf(filename, "file%d", i); rc = dfs_stat(dfs_mt, dir, filename, &stbuf); @@ -387,21 +290,24 @@ extend_cb_internal(void *arg) } break; case EXTEND_ENUMERATE: - print_message("enumerate objects during %s\n", pre_op); + print_message("enumerate objects during extend one rank, %s rank %u\n", pre_op, + cb_arg->rank); while (!daos_anchor_is_eof(&anchor)) { num_ents = 10; - rc = dfs_readdir(dfs_mt, dir, &anchor, &num_ents, ents); + rc = dfs_readdir(dfs_mt, dir, &anchor, &num_ents, ents); assert_int_equal(rc, 0); total_entries += num_ents; } assert_int_equal(total_entries, 1000); break; case EXTEND_FETCH: - print_message("fetch objects during %s\n", pre_op); + print_message("fetch objects during extend one rank, %s rank %u\n", pre_op, + cb_arg->rank); extend_read_check(dfs_mt, dir); break; case EXTEND_UPDATE: - print_message("update objects during %s\n", pre_op); + print_message("update objects during extend one rank, %s rank %u\n", pre_op, + cb_arg->rank); extend_write(dfs_mt, dir); break; default: @@ -413,121 +319,14 @@ extend_cb_internal(void *arg) return 0; } -void -dfs_extend_internal(void **state, int opc, test_rebuild_cb_t extend_cb, bool kill) -{ - test_arg_t *arg = *state; - dfs_t *dfs_mt; - daos_handle_t co_hdl; - dfs_obj_t *obj; - dfs_obj_t *dir; - uuid_t co_uuid; - int i; - d_rank_t extend_rank = 3; - char str[37]; - daos_obj_id_t oids[EXTEND_OBJ_NR]; - struct extend_cb_arg cb_arg; - dfs_attr_t attr = {}; - int rc; - - attr.da_props = daos_prop_alloc(2); - assert_non_null(attr.da_props); - attr.da_props->dpp_entries[0].dpe_type = DAOS_PROP_CO_REDUN_LVL; - attr.da_props->dpp_entries[0].dpe_val = DAOS_PROP_CO_REDUN_RANK; - attr.da_props->dpp_entries[1].dpe_type = DAOS_PROP_CO_REDUN_FAC; - attr.da_props->dpp_entries[1].dpe_val = DAOS_PROP_CO_REDUN_RF1; - rc = dfs_cont_create(arg->pool.poh, &co_uuid, &attr, &co_hdl, &dfs_mt); - daos_prop_free(attr.da_props); - assert_int_equal(rc, 0); - print_message("Created DFS Container "DF_UUIDF"\n", DP_UUID(co_uuid)); - - rc = dfs_open(dfs_mt, NULL, "dir", S_IFDIR | S_IWUSR | S_IRUSR, - O_RDWR | O_CREAT, OC_EC_2P1GX, 0, NULL, &dir); - assert_int_equal(rc, 0); - - /* Create 1000 files */ - if (opc == EXTEND_FETCH) { - extend_write(dfs_mt, dir); - } else { - for (i = 0; i < EXTEND_OBJ_NR; i++) { - char filename[32]; - - sprintf(filename, "file%d", i); - rc = dfs_open(dfs_mt, dir, filename, S_IFREG | S_IWUSR | S_IRUSR, - O_RDWR | O_CREAT, OC_EC_2P1GX, 1048576, NULL, &obj); - assert_int_equal(rc, 0); - dfs_obj2id(obj, &oids[i]); - rc = dfs_release(obj); - assert_int_equal(rc, 0); - } - } - - cb_arg.oids = oids; - cb_arg.dfs_mt = dfs_mt; - cb_arg.dir = dir; - cb_arg.opc = opc; - cb_arg.kill = kill; - if (kill) - cb_arg.rank = 2; - else - cb_arg.rank = 4; - - arg->rebuild_cb = extend_cb; - arg->rebuild_cb_arg = &cb_arg; - - /* HOLD rebuild ULT. FIXME: maybe change to use test_set_engine_fail_loc()? */ - print_message("inject DAOS_REBUILD_TGT_SCAN_HANG fault on engines\n"); - daos_debug_set_params(arg->group, -1, DMG_KEY_FAIL_LOC, - DAOS_REBUILD_TGT_SCAN_HANG | DAOS_FAIL_ALWAYS, 0, NULL); - - arg->no_rebuild=1; - extend_single_pool_rank(arg, extend_rank); - arg->no_rebuild=0; - - print_message("sleep 30 secs for rank %u %s\n", cb_arg.rank, - cb_arg.kill ? "exclude" : "extend"); - sleep(30); - print_message("wait for rebuild due to rank %u extend and rank %u %s\n", extend_rank, - cb_arg.rank, cb_arg.kill ? "exclude" : "extend"); - test_rebuild_wait(&arg, 1); - - if (opc == EXTEND_UPDATE) { - print_message("First extend update read check\n"); - extend_read_check(dfs_mt, dir); - } - - arg->rebuild_cb = NULL; - arg->rebuild_cb_arg = NULL; - if (kill) { - print_message("reintegrate rank %u\n", cb_arg.rank); - reintegrate_single_pool_rank(arg, cb_arg.rank, true); - } - - if (opc == EXTEND_UPDATE) { - print_message("Second extend update read check\n"); - extend_read_check(dfs_mt, dir); - } - - rc = dfs_release(dir); - assert_int_equal(rc, 0); - rc = dfs_umount(dfs_mt); - assert_int_equal(rc, 0); - - rc = daos_cont_close(co_hdl, NULL); - assert_rc_equal(rc, 0); - - uuid_unparse(co_uuid, str); - rc = daos_cont_destroy(arg->pool.poh, str, 1, NULL); - assert_rc_equal(rc, 0); -} - void dfs_extend_punch_kill(void **state) { FAULT_INJECTION_REQUIRED(); - print_message("BEGIN %s\n", __FUNCTION__); + T_BEGIN(); dfs_extend_internal(state, EXTEND_PUNCH, extend_cb_internal, true); + T_END(); } void @@ -535,8 +334,9 @@ dfs_extend_punch_extend(void **state) { FAULT_INJECTION_REQUIRED(); - print_message("BEGIN %s\n", __FUNCTION__); + T_BEGIN(); dfs_extend_internal(state, EXTEND_PUNCH, extend_cb_internal, false); + T_END(); } void @@ -544,8 +344,9 @@ dfs_extend_stat_kill(void **state) { FAULT_INJECTION_REQUIRED(); - print_message("BEGIN %s\n", __FUNCTION__); + T_BEGIN(); dfs_extend_internal(state, EXTEND_STAT, extend_cb_internal, true); + T_END(); } void @@ -553,8 +354,9 @@ dfs_extend_stat_extend(void **state) { FAULT_INJECTION_REQUIRED(); - print_message("BEGIN %s\n", __FUNCTION__); + T_BEGIN(); dfs_extend_internal(state, EXTEND_STAT, extend_cb_internal, false); + T_END(); } void @@ -562,8 +364,9 @@ dfs_extend_enumerate_kill(void **state) { FAULT_INJECTION_REQUIRED(); - print_message("BEGIN %s\n", __FUNCTION__); + T_BEGIN(); dfs_extend_internal(state, EXTEND_ENUMERATE, extend_cb_internal, true); + T_END(); } void @@ -571,8 +374,9 @@ dfs_extend_enumerate_extend(void **state) { FAULT_INJECTION_REQUIRED(); - print_message("BEGIN %s\n", __FUNCTION__); + T_BEGIN(); dfs_extend_internal(state, EXTEND_ENUMERATE, extend_cb_internal, false); + T_END(); } void @@ -580,8 +384,9 @@ dfs_extend_fetch_kill(void **state) { FAULT_INJECTION_REQUIRED(); - print_message("BEGIN %s\n", __FUNCTION__); + T_BEGIN(); dfs_extend_internal(state, EXTEND_FETCH, extend_cb_internal, true); + T_END(); } void @@ -614,14 +419,14 @@ dfs_extend_write_extend(void **state) void dfs_extend_fail_retry(void **state) { - test_arg_t *arg = *state; - dfs_t *dfs_mt; - daos_handle_t co_hdl; - dfs_obj_t *dir; - uuid_t co_uuid; - char str[37]; - dfs_attr_t attr = {}; - int rc; + test_arg_t *arg = *state; + dfs_t *dfs_mt; + daos_handle_t co_hdl; + dfs_obj_t *dir; + uuid_t co_uuid; + char str[37]; + dfs_attr_t attr = {}; + int rc; FAULT_INJECTION_REQUIRED(); @@ -630,14 +435,14 @@ dfs_extend_fail_retry(void **state) attr.da_props = daos_prop_alloc(1); assert_non_null(attr.da_props); attr.da_props->dpp_entries[0].dpe_type = DAOS_PROP_CO_REDUN_LVL; - attr.da_props->dpp_entries[0].dpe_val = DAOS_PROP_CO_REDUN_RANK; + attr.da_props->dpp_entries[0].dpe_val = DAOS_PROP_CO_REDUN_RANK; rc = dfs_cont_create(arg->pool.poh, &co_uuid, &attr, &co_hdl, &dfs_mt); daos_prop_free(attr.da_props); assert_int_equal(rc, 0); - print_message("Created DFS Container "DF_UUIDF"\n", DP_UUID(co_uuid)); + print_message("Created DFS Container " DF_UUIDF "\n", DP_UUID(co_uuid)); - rc = dfs_open(dfs_mt, NULL, "dir", S_IFDIR | S_IWUSR | S_IRUSR, - O_RDWR | O_CREAT, OC_EC_2P1GX, 0, NULL, &dir); + rc = dfs_open(dfs_mt, NULL, "dir", S_IFDIR | S_IWUSR | S_IRUSR, O_RDWR | O_CREAT, + OC_EC_2P1GX, 0, NULL, &dir); assert_int_equal(rc, 0); extend_write(dfs_mt, dir); @@ -673,55 +478,53 @@ dfs_extend_fail_retry(void **state) /** create a new pool/container for each test */ static const struct CMUnitTest extend_tests[] = { - {"EXTEND1: extend small rec multiple dkeys", - extend_dkeys, rebuild_sub_3nodes_rf0_setup, test_teardown}, - {"EXTEND2: extend small rec multiple akeys", - extend_akeys, rebuild_sub_3nodes_rf0_setup, test_teardown}, - {"EXTEND3: extend small rec multiple indexes", - extend_indexes, rebuild_sub_3nodes_rf0_setup, test_teardown}, - {"EXTEND4: extend large rec single index", - extend_large_rec, rebuild_sub_3nodes_rf0_setup, test_teardown}, - {"EXTEND5: extend multiple objects", - extend_objects, rebuild_sub_3nodes_rf0_setup, test_teardown}, - {"EXTEND6: punch object during extend and kill", - dfs_extend_punch_kill, rebuild_sub_3nodes_rf0_setup, test_teardown}, - {"EXTEND7: punch object during extend and extend", - dfs_extend_punch_extend, rebuild_sub_3nodes_rf0_setup, test_teardown}, - {"EXTEND8: stat object during extend and kill", - dfs_extend_stat_kill, rebuild_sub_3nodes_rf0_setup, test_teardown}, - {"EXTEND9: stat object during extend and extend", - dfs_extend_stat_extend, rebuild_sub_3nodes_rf0_setup, test_teardown}, - {"EXTEND10: enumerate object during extend and kill", - dfs_extend_enumerate_kill, rebuild_sub_3nodes_rf0_setup, test_teardown}, - {"EXTEND11: enumerate object during extend and extend", - dfs_extend_enumerate_extend, rebuild_sub_3nodes_rf0_setup, test_teardown}, - {"EXTEND12: read object during extend and kill", - dfs_extend_fetch_kill, rebuild_sub_3nodes_rf0_setup, test_teardown}, - {"EXTEND13: read object during extend and extend", - dfs_extend_fetch_extend, rebuild_sub_3nodes_rf0_setup, test_teardown}, - {"EXTEND14: write object during extend and kill", - dfs_extend_write_kill, rebuild_sub_3nodes_rf0_setup, test_teardown}, - {"EXTEND15: write object during extend and extend", - dfs_extend_write_extend, rebuild_sub_3nodes_rf0_setup, test_teardown}, - {"EXTEND16: extend fail then retry", - dfs_extend_fail_retry, rebuild_sub_3nodes_rf0_setup, test_teardown}, + {"EXTEND1: extend small rec multiple dkeys", extend_dkeys, rebuild_sub_3nodes_rf0_setup, + test_teardown}, + {"EXTEND2: extend small rec multiple akeys", extend_akeys, rebuild_sub_3nodes_rf0_setup, + test_teardown}, + {"EXTEND3: extend small rec multiple indexes", extend_indexes, rebuild_sub_3nodes_rf0_setup, + test_teardown}, + {"EXTEND4: extend large rec single index", extend_large_rec, rebuild_sub_3nodes_rf0_setup, + test_teardown}, + {"EXTEND5: extend multiple objects", extend_objects, rebuild_sub_3nodes_rf0_setup, + test_teardown}, + {"EXTEND6: punch object during extend and kill", dfs_extend_punch_kill, + rebuild_sub_3nodes_rf0_setup, test_teardown}, + {"EXTEND7: punch object during extend and extend", dfs_extend_punch_extend, + rebuild_sub_3nodes_rf0_setup, test_teardown}, + {"EXTEND8: stat object during extend and kill", dfs_extend_stat_kill, + rebuild_sub_3nodes_rf0_setup, test_teardown}, + {"EXTEND9: stat object during extend and extend", dfs_extend_stat_extend, + rebuild_sub_3nodes_rf0_setup, test_teardown}, + {"EXTEND10: enumerate object during extend and kill", dfs_extend_enumerate_kill, + rebuild_sub_3nodes_rf0_setup, test_teardown}, + {"EXTEND11: enumerate object during extend and extend", dfs_extend_enumerate_extend, + rebuild_sub_3nodes_rf0_setup, test_teardown}, + {"EXTEND12: read object during extend and kill", dfs_extend_fetch_kill, + rebuild_sub_3nodes_rf0_setup, test_teardown}, + {"EXTEND13: read object during extend and extend", dfs_extend_fetch_extend, + rebuild_sub_3nodes_rf0_setup, test_teardown}, + {"EXTEND14: write object during extend and kill", dfs_extend_write_kill, + rebuild_sub_3nodes_rf0_setup, test_teardown}, + {"EXTEND15: write object during extend and extend", dfs_extend_write_extend, + rebuild_sub_3nodes_rf0_setup, test_teardown}, + {"EXTEND16: extend fail then retry", dfs_extend_fail_retry, rebuild_sub_3nodes_rf0_setup, + test_teardown}, }; int -run_daos_extend_simple_test(int rank, int size, int *sub_tests, - int sub_tests_size) +run_daos_extend_simple_test(int rank, int size, int *sub_tests, int sub_tests_size) { int rc = 0; par_barrier(PAR_COMM_WORLD); if (sub_tests_size == 0) { sub_tests_size = ARRAY_SIZE(extend_tests); - sub_tests = NULL; + sub_tests = NULL; } - run_daos_sub_tests_only("DAOS_Extend_Simple", extend_tests, - ARRAY_SIZE(extend_tests), sub_tests, - sub_tests_size); + run_daos_sub_tests_only("DAOS_Extend_Simple", extend_tests, ARRAY_SIZE(extend_tests), + sub_tests, sub_tests_size); par_barrier(PAR_COMM_WORLD); diff --git a/src/tests/suite/daos_rebuild_common.c b/src/tests/suite/daos_rebuild_common.c index 529894af7a7..8407d4af5b0 100644 --- a/src/tests/suite/daos_rebuild_common.c +++ b/src/tests/suite/daos_rebuild_common.c @@ -975,8 +975,10 @@ reintegrate_inflight_io(void *data) daos_obj_id_t oid = *(daos_obj_id_t *)arg->rebuild_cb_arg; char single_data[LARGE_SINGLE_VALUE_SIZE]; struct ioreq req; + bool interactive_rebuild = arg->interactive_rebuild && !arg->no_rebuild; int i; + print_message("%s(): begin\n", __FUNCTION__); ioreq_init(&req, arg->coh, oid, DAOS_IOD_ARRAY, arg); for (i = 0; i < 5; i++) { char key[64]; @@ -996,6 +998,14 @@ reintegrate_inflight_io(void *data) insert_recxs(key, "a_key_1M", 1, DAOS_TX_NONE, &recx, 1, buf, DATA_SIZE, &req); + /* Stop the rebuild */ + if (i == 3 && interactive_rebuild) { + print_message("%s(): stop rebuild in middle of inflight IO\n", + __FUNCTION__); + rebuild_stop_with_dmg(arg); + test_rebuild_wait(&arg, 1); /* rebuild is stopped here */ + } + req.iod_type = DAOS_IOD_SINGLE; memset(single_data, 'a' + i, LARGE_SINGLE_VALUE_SIZE); sprintf(key, "d_inflight_single_small_%d", i); @@ -1007,7 +1017,16 @@ reintegrate_inflight_io(void *data) &req); } ioreq_fini(&req); - print_message("sleep 12 seconds to wait for the stable epoch update.\n"); + + /* Resume the rebuild */ + if (interactive_rebuild) { + print_message("%s(): restart rebuild after remaining inflight IO done\n", + __FUNCTION__); + rebuild_resume_wait_to_start(arg); + } + + print_message("%s() sleep 12 seconds to wait for the stable epoch update and return.\n", + __FUNCTION__); sleep(12); if (arg->myrank == 0) daos_debug_set_params(arg->group, -1, DMG_KEY_FAIL_LOC, 0, 0, @@ -1237,10 +1256,10 @@ rebuild_stop_with_dmg(void *data) { test_arg_t *arg = data; - print_message("wait for rebuild to start for pool " DF_UUID "\n", + print_message("(before stopping) wait for rebuild to start for pool " DF_UUID "\n", DP_UUID(arg->pool.pool_uuid)); test_rebuild_wait_to_start(&arg, 1); - sleep(5); + sleep(4); return rebuild_stop_with_dmg_internal(arg->dmg_config, arg->pool.pool_uuid, arg->group, false); @@ -1252,7 +1271,7 @@ rebuild_force_stop_with_dmg(void *data) { test_arg_t *arg = data; - print_message("wait for rebuild to start for pool " DF_UUID "\n", + print_message("(before stopping) wait for rebuild to start for pool " DF_UUID "\n", DP_UUID(arg->pool.pool_uuid)); test_rebuild_wait_to_start(&arg, 1); sleep(5); @@ -1275,12 +1294,51 @@ rebuild_start_with_dmg(void *data) return 0; } +/* wait for previously-issued dmg pool rebuild stop to finish; + * invoke rebuild start, and make sure it got started before returning. + */ +int +rebuild_resume_wait_to_start(void *data) +{ + test_arg_t *arg = data; + struct daos_rebuild_status *rst = &arg->pool.pool_info.pi_rebuild_st; + bool state_match; + int rc; + + /* Verify that the stop resulted in the correct rebuild status. + * NB: you have to be sure the rebuild stop was issued while rebuild was running + * (e.g., when a fault was injected to hang the rebuild, or with carefully-timed sleeps). + */ + print_message( + "(before starting) wait for stopped rebuild and check: rs_errno=%d (expect %d), " + "rs_state=%d (expect %d)\n", + rst->rs_errno, -DER_OP_CANCELED, rst->rs_state, DRS_NOT_STARTED); + test_rebuild_wait(&arg, 1); + state_match = (rst->rs_errno == -DER_OP_CANCELED && rst->rs_state == DRS_NOT_STARTED); + print_message("%sMATCHED check: rs_errno=%d, rs_state=%d\n", state_match ? "" : "NOT-", + rst->rs_errno, rst->rs_state); + assert_int_equal(rst->rs_errno, -DER_OP_CANCELED); + assert_int_equal(rst->rs_state, DRS_NOT_STARTED); + + rc = rebuild_start_with_dmg(data); + assert_rc_equal(rc, 0); + + /* Verify that the rebuild is no longer stopped (has been restarted). */ + test_rebuild_wait_to_start(&arg, 1); + + return 0; +} + +/* Check rebuild state from previously-stopped rebuild; + * invoke rebuild start and wait for it to completely finish before returning. + */ int rebuild_resume_wait(void *data) { test_arg_t *arg = data; struct daos_rebuild_status *rst = &arg->pool.pool_info.pi_rebuild_st; bool skip_restart = false; + bool state_match; int rc; if (arg->rebuild_cb == rebuild_resume_wait && arg->rebuild_cb_arg) @@ -1288,12 +1346,19 @@ rebuild_resume_wait(void *data) if (arg->rebuild_post_cb == rebuild_resume_wait && arg->rebuild_post_cb_arg) skip_restart = *((bool *)arg->rebuild_post_cb_arg); - /* Verify that the stop resulted in the correct rebuild status */ - print_message("check: stopped rebuild rs_errno=%d (expect %d), rs_state=%d (expect %d)\n", + /* Check whether the stop resulted in the expected rebuild status. + * NB: the stop is already done; the "wait" is just for the pool query rebuild state. + * NB: if the rebuild stop occurred after rebuild completed, we will not see the + * -DER_OP_CANCELED rebuild state. Warn in these instances, since it's all up + * to some variable test timing conditions. + */ + print_message("(before starting) check: stopped rebuild rs_errno=%d (want %d), rs_state=%d " + "(want %d)\n", rst->rs_errno, -DER_OP_CANCELED, rst->rs_state, DRS_NOT_STARTED); - assert_int_equal(rst->rs_errno, -DER_OP_CANCELED); - assert_int_equal(rst->rs_state, DRS_NOT_STARTED); - print_message("check passed\n"); + test_rebuild_wait(&arg, 1); + state_match = (rst->rs_errno == -DER_OP_CANCELED && rst->rs_state == DRS_NOT_STARTED); + print_message("%sMATCHED check: rs_errno=%d, rs_state=%d\n", + state_match ? "" : "WARN: NOT-", rst->rs_errno, rst->rs_state); if (skip_restart) return 0; @@ -1308,15 +1373,15 @@ rebuild_resume_wait(void *data) sleep(2); test_rebuild_wait(&arg, 1); print_message( - "current rebuild state: rs_errno=%d (expect %d), rs_state=%d (expect %d)\n", + "waiting rebuild state: rs_errno=%d (wait for %d), rs_state=%d (wait for %d)\n", rst->rs_errno, 0, rst->rs_state, DRS_COMPLETED); } while (rst->rs_errno == -DER_OP_CANCELED); + state_match = (rst->rs_errno == 0 && rst->rs_state == DRS_COMPLETED); print_message( - "check: resumed rebuild done: rs_errno=%d (expect %d), rs_state=%d (expect %d)\n", - rst->rs_errno, 0, rst->rs_state, DRS_COMPLETED); + "check %s: resumed rebuild rs_errno=%d (expect %d), rs_state=%d (expect %d)\n", + state_match ? "passed" : "FAILED", rst->rs_errno, 0, rst->rs_state, DRS_COMPLETED); assert_int_equal(rst->rs_errno, 0); assert_int_equal(rst->rs_state, DRS_COMPLETED); - print_message("check passed\n"); return 0; } diff --git a/src/tests/suite/daos_rebuild_interactive.c b/src/tests/suite/daos_rebuild_interactive.c new file mode 100644 index 00000000000..b74a875549a --- /dev/null +++ b/src/tests/suite/daos_rebuild_interactive.c @@ -0,0 +1,682 @@ +/** + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ +/** + * This file is for interactive rebuild stop|start testing based on pool exclude, drain, extend, + * and reintegrate. + * + * tests/suite/daos_rebuild_interactive.c + * + */ +#define D_LOGFAC DD_FAC(tests) + +#include "daos_test.h" +#include "daos_iotest.h" +#include "dfs_test.h" +#include +#include +#include +#include + +#define DEFAULT_FAIL_TGT 0 +#define DRAIN_KEY_NR 50 +#define OBJ_NR 10 + +static void +reintegrate_with_inflight_io(test_arg_t *arg, daos_obj_id_t *oid, d_rank_t rank, int tgt) +{ + daos_obj_id_t inflight_oid; + + if (oid != NULL) { + inflight_oid = *oid; + } else { + inflight_oid = + daos_test_oid_gen(arg->coh, DAOS_OC_R3S_SPEC_RANK, 0, 0, arg->myrank); + inflight_oid = dts_oid_set_rank(inflight_oid, rank); + } + + arg->rebuild_cb = reintegrate_inflight_io; + arg->rebuild_cb_arg = &inflight_oid; + + /* To make sure the IO will be done before reintegration is done */ + if (arg->myrank == 0) + daos_debug_set_params(arg->group, -1, DMG_KEY_FAIL_LOC, + DAOS_REBUILD_TGT_REBUILD_HANG, 0, NULL); + reintegrate_single_pool_target(arg, rank, tgt); + arg->rebuild_cb = NULL; + arg->rebuild_cb_arg = NULL; + + if (oid == NULL) { + int rc; + + rc = daos_obj_verify(arg->coh, inflight_oid, DAOS_EPOCH_MAX); + if (rc != 0) + assert_rc_equal(rc, -DER_NOSYS); + } +} + +#define SNAP_CNT 5 +static void +int_rebuild_snap_update_recs(void **state) +{ + test_arg_t *arg = *state; + daos_obj_id_t oid; + struct ioreq req; + daos_recx_t recx; + int tgt = DEFAULT_FAIL_TGT; + char string[100 * SNAP_CNT] = {0}; + daos_epoch_t snap_epoch[SNAP_CNT]; + int i; + int rc; + + if (!test_runable(arg, 4)) + return; + + T_BEGIN(); + oid = daos_test_oid_gen(arg->coh, arg->obj_class, 0, 0, arg->myrank); + oid = dts_oid_set_rank(oid, ranks_to_kill[0]); + oid = dts_oid_set_tgt(oid, tgt); + ioreq_init(&req, arg->coh, oid, DAOS_IOD_ARRAY, arg); + for (i = 0; i < SNAP_CNT; i++) + sprintf(string + strlen(string), "old-snap%d", i); + + recx.rx_idx = 0; + recx.rx_nr = strlen(string); + insert_recxs("d_key", "a_key", 1, DAOS_TX_NONE, &recx, 1, string, strlen(string) + 1, &req); + + for (i = 0; i < SNAP_CNT; i++) { + char data[100] = {0}; + + /* Update string for each snapshot */ + daos_cont_create_snap(arg->coh, &snap_epoch[i], NULL, NULL); + sprintf(data, "new-snap%d", i); + recx.rx_idx = i * strlen(data); + recx.rx_nr = strlen(data); + insert_recxs("d_key", "a_key", 1, DAOS_TX_NONE, &recx, 1, data, strlen(data) + 1, + &req); + } + ioreq_fini(&req); + + /* insert rebuild stop|start into the exclude rebuild execution */ + arg->interactive_rebuild = 1; + arg->rebuild_cb = rebuild_stop_with_dmg; + arg->rebuild_post_cb = rebuild_resume_wait; + rebuild_single_pool_target(arg, ranks_to_kill[0], tgt, false); + arg->rebuild_cb = NULL; + arg->rebuild_post_cb = NULL; + + for (i = 0; i < SNAP_CNT; i++) { + rc = daos_obj_verify(arg->coh, oid, snap_epoch[i]); + if (rc != 0) + assert_rc_equal(rc, -DER_NOSYS); + } + rc = daos_obj_verify(arg->coh, oid, DAOS_EPOCH_MAX); + if (rc != 0) + assert_rc_equal(rc, -DER_NOSYS); + + arg->interactive_rebuild = 0; + reintegrate_with_inflight_io(arg, &oid, ranks_to_kill[0], tgt); + for (i = 0; i < SNAP_CNT; i++) { + rc = daos_obj_verify(arg->coh, oid, snap_epoch[i]); + if (rc != 0) + assert_rc_equal(rc, -DER_NOSYS); + } + rc = daos_obj_verify(arg->coh, oid, DAOS_EPOCH_MAX); + if (rc != 0) + assert_rc_equal(rc, -DER_NOSYS); + T_END(); +} + +static void +int_rebuild_snap_punch_recs(void **state) +{ + test_arg_t *arg = *state; + daos_obj_id_t oid; + struct ioreq req; + daos_recx_t recx; + int tgt = DEFAULT_FAIL_TGT; + char string[200]; + daos_epoch_t snap_epoch[SNAP_CNT]; + int i; + int rc; + + if (!test_runable(arg, 4)) + return; + + T_BEGIN(); + oid = daos_test_oid_gen(arg->coh, arg->obj_class, 0, 0, arg->myrank); + oid = dts_oid_set_rank(oid, ranks_to_kill[0]); + oid = dts_oid_set_tgt(oid, tgt); + ioreq_init(&req, arg->coh, oid, DAOS_IOD_ARRAY, arg); + for (i = 0; i < SNAP_CNT; i++) + sprintf(string + strlen(string), "old-snap%d", i); + + recx.rx_idx = 0; + recx.rx_nr = strlen(string); + insert_recxs("d_key", "a_key", 1, DAOS_TX_NONE, &recx, 1, string, strlen(string) + 1, &req); + + for (i = 0; i < SNAP_CNT; i++) { + /* punch string */ + daos_cont_create_snap(arg->coh, &snap_epoch[i], NULL, NULL); + recx.rx_idx = i * 9; /* strlen("old-snap%d") */ + recx.rx_nr = 9; + punch_recxs("d_key", "a_key", &recx, 1, DAOS_TX_NONE, &req); + } + ioreq_fini(&req); + + rebuild_single_pool_target(arg, ranks_to_kill[0], tgt, false); + + for (i = 0; i < SNAP_CNT; i++) { + rc = daos_obj_verify(arg->coh, oid, snap_epoch[i]); + if (rc != 0) + assert_rc_equal(rc, -DER_NOSYS); + } + rc = daos_obj_verify(arg->coh, oid, DAOS_EPOCH_MAX); + if (rc != 0) + assert_rc_equal(rc, -DER_NOSYS); + + /* insert rebuild stop|start into the reintegrate rebuild execution */ + arg->interactive_rebuild = 1; + reintegrate_with_inflight_io(arg, &oid, ranks_to_kill[0], tgt); + for (i = 0; i < SNAP_CNT; i++) { + rc = daos_obj_verify(arg->coh, oid, snap_epoch[i]); + if (rc != 0) + assert_rc_equal(rc, -DER_NOSYS); + } + rc = daos_obj_verify(arg->coh, oid, DAOS_EPOCH_MAX); + if (rc != 0) + assert_rc_equal(rc, -DER_NOSYS); + T_END(); +} + +static int +rebuild_wait_error_reset_fail_cb(void *data) +{ + test_arg_t *arg = data; + int rc; + + print_message("wait until rebuild errors (and starts Fail_reclaim)\n"); + test_rebuild_wait_to_error(&arg, 1); + print_message("check rebuild errored, rs_errno=%d (expecting -DER_IO=%d)\n", + arg->pool.pool_info.pi_rebuild_st.rs_errno, -DER_IO); + assert_int_equal(arg->pool.pool_info.pi_rebuild_st.rs_errno, -DER_IO); + print_message("rebuild error code check passed\n"); + + print_message("clearing fault injection on all engines\n"); + daos_debug_set_params(arg->group, -1, DMG_KEY_FAIL_LOC, 0, 0, NULL); + daos_debug_set_params(arg->group, -1, DMG_KEY_FAIL_VALUE, 0, 0, NULL); + daos_debug_set_params(arg->group, -1, DMG_KEY_FAIL_NUM, 0, 0, NULL); + + /* Give time for transition from op:Rebuild into op:Fail_reclaim */ + sleep(2); + + print_message( + "send rebuild stop --force request during first/only Fail_reclaim operation\n"); + rc = rebuild_force_stop_with_dmg(data); + if (rc != 0) + print_message("rebuild_force_stop_with_dmg failed, rc=%d\n", rc); + + print_message("wait for rebuild to be stopped\n"); + test_rebuild_wait(&arg, 1); + /* Verifying rs_state/rs_errno will happen in post_cb rebuild_resume_wait() */ + + return rc; +} + +static void +int_rebuild_many_objects_with_failure(void **state) +{ + test_arg_t *arg = *state; + daos_obj_id_t *oids; + const int NUM_OBJS = 500; + int rc; + int i; + + if (!test_runable(arg, 6)) + return; + + T_BEGIN(); + arg->interactive_rebuild = 1; + D_ALLOC_ARRAY(oids, NUM_OBJS); + for (i = 0; i < NUM_OBJS; i++) { + char buffer[256]; + daos_recx_t recx; + struct ioreq req; + + oids[i] = daos_test_oid_gen(arg->coh, OC_RP_3G1, 0, 0, arg->myrank); + ioreq_init(&req, arg->coh, oids[i], DAOS_IOD_ARRAY, arg); + memset(buffer, 'a', 256); + recx.rx_idx = 0; + recx.rx_nr = 256; + insert_recxs("d_key", "a_key", 1, DAOS_TX_NONE, &recx, 1, buffer, 256, &req); + + ioreq_fini(&req); + } + + /* Inject faults on engines. Special handling for interactive_rebuild case */ + if (arg->myrank == 0) { + print_message("inject fault DAOS_REBUILD_OBJ_FAIL on all engines\n"); + daos_debug_set_params(arg->group, -1, DMG_KEY_FAIL_LOC, + DAOS_REBUILD_OBJ_FAIL | DAOS_FAIL_ALWAYS, 0, NULL); + daos_debug_set_params(arg->group, -1, DMG_KEY_FAIL_VALUE, 3, 0, NULL); + } + + /* For interactive rebuild, we need: + * 1. trigger rebuild (which will fail), query pool reubild state until op:Rebuild fails + * and op:Fail_reclaim begins. See test_rebuild_wait_to_error(). + * 2. Then, while rebuild is in op:Fail_reclaim, issue dmg system stop to test that you + * can't stop during Fail_reclaim (though the command will take effect by not retrying + * rebuild). + */ + arg->rebuild_cb = rebuild_wait_error_reset_fail_cb; + arg->rebuild_post_cb = rebuild_resume_wait; + rebuild_single_pool_target(arg, 3, -1, false); + + for (i = 0; i < NUM_OBJS; i++) { + rc = daos_obj_verify(arg->coh, oids[i], DAOS_EPOCH_MAX); + if (rc != 0) + assert_rc_equal(rc, -DER_NOSYS); + } + D_FREE(oids); + T_END(); +} + +static int +cont_open_and_inflight_io(void *data) +{ + test_arg_t *arg = data; + int rc; + + assert_int_equal(arg->setup_state, SETUP_CONT_CREATE); + rc = test_setup_next_step((void **)&arg, NULL, NULL, NULL); + assert_success(rc); + assert_int_equal(arg->setup_state, SETUP_CONT_CONNECT); + + return reintegrate_inflight_io(data); +} + +static void +int_cont_open_in_drain(void **state) +{ + test_arg_t *arg = *state; + daos_obj_id_t oid; + struct ioreq req; + int tgt = DEFAULT_FAIL_TGT; + int i; + + FAULT_INJECTION_REQUIRED(); + + if (!test_runable(arg, 4)) + return; + + T_BEGIN(); + oid = daos_test_oid_gen(arg->coh, DAOS_OC_R1S_SPEC_RANK, 0, 0, arg->myrank); + oid = dts_oid_set_rank(oid, ranks_to_kill[0]); + oid = dts_oid_set_tgt(oid, tgt); + ioreq_init(&req, arg->coh, oid, DAOS_IOD_ARRAY, arg); + + /** Insert records */ + print_message("Insert %d kv record in object " DF_OID "\n", DRAIN_KEY_NR, DP_OID(oid)); + for (i = 0; i < DRAIN_KEY_NR; i++) { + char key[32] = {0}; + + sprintf(key, "dkey_0_%d", i); + insert_single(key, "a_key", 0, "data", strlen("data") + 1, DAOS_TX_NONE, &req); + } + ioreq_fini(&req); + + test_teardown_cont_hdl(arg); + arg->interactive_rebuild = 1; + arg->rebuild_cb = cont_open_and_inflight_io; + arg->rebuild_cb_arg = &oid; + drain_single_pool_target(arg, ranks_to_kill[0], tgt, false); + + ioreq_init(&req, arg->coh, oid, DAOS_IOD_ARRAY, arg); + for (i = 0; i < DRAIN_KEY_NR; i++) { + char key[32] = {0}; + char buf[16] = {0}; + + sprintf(key, "dkey_0_%d", i); + /** Lookup */ + memset(buf, 0, 10); + lookup_single(key, "a_key", 0, buf, 10, DAOS_TX_NONE, &req); + assert_int_equal(req.iod[0].iod_size, strlen("data") + 1); + + /** Verify data consistency */ + assert_string_equal(buf, "data"); + } + + reintegrate_inflight_io_verify(arg); + ioreq_fini(&req); + T_END(); +} + +static void +int_drain_fail_and_retry_objects(void **state) +{ + test_arg_t *arg = *state; + daos_obj_id_t oids[OBJ_NR]; + int i; + + FAULT_INJECTION_REQUIRED(); + + if (!test_runable(arg, 4)) + return; + + T_BEGIN(); + for (i = 0; i < OBJ_NR; i++) { + oids[i] = daos_test_oid_gen(arg->coh, DAOS_OC_R1S_SPEC_RANK, 0, 0, arg->myrank); + oids[i] = dts_oid_set_rank(oids[i], ranks_to_kill[0]); + oids[i] = dts_oid_set_tgt(oids[i], DEFAULT_FAIL_TGT); + } + + rebuild_io(arg, oids, OBJ_NR); + daos_debug_set_params(arg->group, -1, DMG_KEY_FAIL_LOC, + DAOS_REBUILD_OBJ_FAIL | DAOS_FAIL_ALWAYS, 0, NULL); + + arg->no_rebuild = 1; + drain_single_pool_rank(arg, ranks_to_kill[0], false); + print_message("wait drain to fail and exit\n"); + /* NB: could be better to wait (in drain_single_pool_rank or test_rebuild_wait), but that + * requires new logic in rebuild_task_complete_schedule() to update state after + * Fail_reclaim + */ + print_message("wait for op:Reclaim to get -DER_IO\n"); + test_rebuild_wait_to_error(&arg, 1); + print_message("sleep for op:Fail_reclaim to run\n"); + sleep(30); + arg->no_rebuild = 0; + + daos_debug_set_params(arg->group, -1, DMG_KEY_FAIL_LOC, 0, 0, NULL); + rebuild_io_validate(arg, oids, OBJ_NR); + + arg->interactive_rebuild = 1; + arg->rebuild_cb = reintegrate_inflight_io; + arg->rebuild_cb_arg = &oids[OBJ_NR - 1]; + drain_single_pool_rank(arg, ranks_to_kill[0], false); + rebuild_io_validate(arg, oids, OBJ_NR); + reintegrate_inflight_io_verify(arg); + T_END(); +} + +/* FIXME: rename a few things - most of this code is performing drain + kill/exclude, NOT extend */ + +static int +int_extend_drain_cb_internal(void *arg) +{ + test_arg_t *test_arg = arg; + struct extend_drain_cb_arg *cb_arg = test_arg->rebuild_cb_arg; + dfs_t *dfs_mt = cb_arg->dfs_mt; + daos_obj_id_t *oids = cb_arg->oids; + dfs_obj_t *dir = cb_arg->dir; + uint32_t objclass = cb_arg->objclass; + struct dirent ents[10]; + int opc = cb_arg->opc; + int total_entries = 0; + uint32_t num_ents = 10; + daos_anchor_t anchor = {0}; + int rc; + int i; + + if (opc != EXTEND_DRAIN_WRITELOOP) { + print_message("sleep 5 seconds first\n"); + sleep(5); + } + + print_message("%sstart op %d (%s)\n", + test_arg->interactive_rebuild ? "stop rebuild before " : "", opc, + extend_drain_opstrs[opc]); + + if (test_arg->interactive_rebuild) + rebuild_stop_with_dmg(arg); + + /* Kill another rank during extend */ + switch (opc) { + case EXTEND_DRAIN_PUNCH: + print_message("punch objects during extend & drain%s\n", + test_arg->interactive_rebuild ? " during stopped rebuild" : ""); + for (i = 0; i < EXTEND_DRAIN_OBJ_NR; i++) { + char filename[32]; + + sprintf(filename, "file%d", i); + rc = dfs_remove(dfs_mt, dir, filename, true, &oids[i]); + assert_int_equal(rc, 0); + } + break; + case EXTEND_DRAIN_STAT: + print_message("stat objects during extend & drain%s\n", + test_arg->interactive_rebuild ? " during stopped rebuild" : ""); + for (i = 0; i < EXTEND_DRAIN_OBJ_NR; i++) { + char filename[32]; + struct stat stbuf; + + sprintf(filename, "file%d", i); + rc = dfs_stat(dfs_mt, dir, filename, &stbuf); + assert_int_equal(rc, 0); + } + break; + case EXTEND_DRAIN_ENUMERATE: + print_message("enumerate objects during extend & drain%s]n", + test_arg->interactive_rebuild ? " during stopped rebuild" : ""); + while (!daos_anchor_is_eof(&anchor)) { + num_ents = 10; + rc = dfs_readdir(dfs_mt, dir, &anchor, &num_ents, ents); + assert_int_equal(rc, 0); + total_entries += num_ents; + } + assert_int_equal(total_entries, EXTEND_DRAIN_OBJ_NR); + break; + case EXTEND_DRAIN_FETCH: + print_message("fetch objects during extend & drain%s\n", + test_arg->interactive_rebuild ? " during stopped rebuild" : ""); + extend_drain_read_check(dfs_mt, dir, objclass, EXTEND_DRAIN_OBJ_NR, WRITE_SIZE, + 'a'); + break; + case EXTEND_DRAIN_UPDATE: + print_message("update objects during extend & drain%s\n", + test_arg->interactive_rebuild ? " during stopped rebuild" : ""); + extend_drain_write(dfs_mt, dir, objclass, EXTEND_DRAIN_OBJ_NR, WRITE_SIZE, 'a', + NULL); + break; + case EXTEND_DRAIN_OVERWRITE: + print_message("overwrite objects during extend & drain%s\n", + test_arg->interactive_rebuild ? " during stopped rebuild" : ""); + extend_drain_write(dfs_mt, dir, objclass, EXTEND_DRAIN_OBJ_NR, WRITE_SIZE, 'b', + NULL); + break; + case EXTEND_DRAIN_WRITELOOP: + print_message("keepwrite objects during extend & drain%s\n", + test_arg->interactive_rebuild ? " during stopped rebuild" : ""); + extend_drain_write(dfs_mt, dir, objclass, 1, 512 * 1048576, 'a', NULL); + break; + default: + break; + } + + daos_debug_set_params(test_arg->group, -1, DMG_KEY_FAIL_LOC, 0, 0, NULL); + + print_message("%sdone op %d (%s)\n", + test_arg->interactive_rebuild ? "resume rebuild after " : "", opc, + extend_drain_opstrs[opc]); + + if (test_arg->interactive_rebuild) + rebuild_resume_wait_to_start(arg); + + return 0; +} + +static void +int_dfs_drain_overwrite(void **state) +{ + test_arg_t *arg = *state; + + arg->interactive_rebuild = 1; + print_message("=== Begin EXTEND_DRAIN_OVERWRITE, oclass OC_EC_4P2GX\n"); + dfs_extend_drain_common(state, EXTEND_DRAIN_OVERWRITE, OC_EC_4P2GX, + int_extend_drain_cb_internal); + T_END(); +} + +static int +int_extend_cb_internal(void *arg) +{ + test_arg_t *test_arg = arg; + struct extend_cb_arg *cb_arg = test_arg->rebuild_cb_arg; + dfs_t *dfs_mt = cb_arg->dfs_mt; + daos_obj_id_t *oids = cb_arg->oids; + dfs_obj_t *dir = cb_arg->dir; + struct dirent ents[10]; + int opc = cb_arg->opc; + int total_entries = 0; + uint32_t num_ents = 10; + daos_anchor_t anchor = {0}; + bool do_stop = (!cb_arg->kill && test_arg->interactive_rebuild); + const char *pre_op = (cb_arg->kill ? "kill" : "extend"); + daos_pool_info_t pinfo = {0}; + int rc; + int i; + + /* get rebuild version for first extend, so we can wait for second rebuild to start + * (by waiting for an in-progress rebuild with version > pinfo.pi_rebuild_st.rs_version) + */ + pinfo.pi_bits = DPI_REBUILD_STATUS; + rc = test_pool_get_info(test_arg, &pinfo, NULL /* engine_ranks */); + assert_rc_equal(rc, 0); + + print_message("Extending (rs_version=%u), sleep 10, %s rank %u, %sand start op %d (%s)\n", + pinfo.pi_rebuild_st.rs_version, pre_op, cb_arg->rank, + do_stop ? "stop rebuild, " : "", opc, extend_opstrs[opc]); + + sleep(10); + + if (cb_arg->kill) { + /* Kill another rank during extend */ + daos_kill_server(test_arg, test_arg->pool.pool_uuid, test_arg->group, + test_arg->pool.alive_svc, cb_arg->rank); + } else { + /* Extend another rank during extend */ + print_message("extend pool " DF_UUID " rank %u\n", + DP_UUID(test_arg->pool.pool_uuid), cb_arg->rank); + rc = dmg_pool_extend(test_arg->dmg_config, test_arg->pool.pool_uuid, + test_arg->group, &cb_arg->rank, 1); + assert_int_equal(rc, 0); + } + + if (do_stop) { + daos_debug_set_params(test_arg->group, -1, DMG_KEY_FAIL_LOC, 0, 0, NULL); + test_rebuild_wait_to_start_after_ver( + &test_arg, 1, + pinfo.pi_rebuild_st.rs_version /* original extend rebuild version */); + rebuild_stop_with_dmg(arg); /* then stop the new rebuild */ + test_rebuild_wait_to_error(&test_arg, 1); + } + + switch (opc) { + case EXTEND_PUNCH: + print_message("punch objects during extend one rank%s, %s rank %u\n", + do_stop ? ", stop rebuild" : "", pre_op, cb_arg->rank); + for (i = 0; i < EXTEND_OBJ_NR; i++) { + char filename[32]; + + sprintf(filename, "file%d", i); + rc = dfs_remove(dfs_mt, dir, filename, true, &oids[i]); + assert_int_equal(rc, 0); + } + break; + case EXTEND_STAT: + print_message("stat objects during extend one rank%s, %s rank %u\n", + do_stop ? ", stop rebuild" : "", pre_op, cb_arg->rank); + for (i = 0; i < EXTEND_OBJ_NR; i++) { + char filename[32]; + struct stat stbuf; + + sprintf(filename, "file%d", i); + rc = dfs_stat(dfs_mt, dir, filename, &stbuf); + assert_int_equal(rc, 0); + } + break; + case EXTEND_ENUMERATE: + print_message("enumerate objects during extend one rank%s, %s rank %u\n", + do_stop ? ", stop rebuild" : "", pre_op, cb_arg->rank); + while (!daos_anchor_is_eof(&anchor)) { + num_ents = 10; + rc = dfs_readdir(dfs_mt, dir, &anchor, &num_ents, ents); + assert_int_equal(rc, 0); + total_entries += num_ents; + } + assert_int_equal(total_entries, 1000); + break; + case EXTEND_FETCH: + print_message("fetch objects during extend one rank%s, %s rank %u\n", + do_stop ? ", stop rebuild" : "", pre_op, cb_arg->rank); + extend_read_check(dfs_mt, dir); + break; + case EXTEND_UPDATE: + print_message("update objects during extend one rank%s, %s rank %u\n", + do_stop ? ", stop rebuild" : "", pre_op, cb_arg->rank); + extend_write(dfs_mt, dir); + break; + default: + break; + } + + daos_debug_set_params(test_arg->group, -1, DMG_KEY_FAIL_LOC, 0, 0, NULL); + + if (do_stop) + rebuild_resume_wait_to_start(arg); + + return 0; +} + +static void +int_dfs_extend_enumerate_extend(void **state) +{ + test_arg_t *arg = *state; + + FAULT_INJECTION_REQUIRED(); + + T_BEGIN(); + arg->interactive_rebuild = 1; + dfs_extend_internal(state, EXTEND_ENUMERATE, int_extend_cb_internal, false); + T_END(); +} + +/** create a new pool/container for each test */ +static const struct CMUnitTest rebuild_interactive_tests[] = { + {"IREBUILD1: interactive exclude: records with multiple snapshots", + int_rebuild_snap_update_recs, rebuild_small_sub_setup, test_teardown}, + {"IREBUILD2: interactive exclude: punch/records with multiple snapshots", + int_rebuild_snap_punch_recs, rebuild_small_sub_setup, test_teardown}, + {"IREBUILD3: interactive exclude: lot of objects with failure", + int_rebuild_many_objects_with_failure, rebuild_sub_setup, test_teardown}, + {"IREBUILD4: interactive drain: cont open and update during rebuild", int_cont_open_in_drain, + rebuild_small_sub_rf0_setup, test_teardown}, + {"IREBUILD5: drain fail and retry", int_drain_fail_and_retry_objects, rebuild_sub_rf0_setup, + test_teardown}, + {"IREBUILD6: interactive drain: overwrite during rebuild", int_dfs_drain_overwrite, + rebuild_sub_rf0_setup, test_teardown}, + {"IREBUILD7: interactive extend: enumerate object during two rebuilds", + int_dfs_extend_enumerate_extend, rebuild_sub_3nodes_rf0_setup, test_teardown}, +}; + +int +run_daos_int_rebuild_test(int rank, int size, int *sub_tests, int sub_tests_size) +{ + int rc = 0; + + par_barrier(PAR_COMM_WORLD); + if (sub_tests_size == 0) { + sub_tests_size = ARRAY_SIZE(rebuild_interactive_tests); + sub_tests = NULL; + } + + rc = run_daos_sub_tests_only("DAOS_Rebuild_Interactive", rebuild_interactive_tests, + ARRAY_SIZE(rebuild_interactive_tests), sub_tests, + sub_tests_size); + + par_barrier(PAR_COMM_WORLD); + + return rc; +} diff --git a/src/tests/suite/daos_rebuild_simple.c b/src/tests/suite/daos_rebuild_simple.c index 185deda338a..adc1677a5ce 100644 --- a/src/tests/suite/daos_rebuild_simple.c +++ b/src/tests/suite/daos_rebuild_simple.c @@ -80,6 +80,7 @@ rebuild_dkeys(void **state) if (!test_runable(arg, 4)) return; + T_BEGIN(); oid = daos_test_oid_gen(arg->coh, arg->obj_class, 0, 0, arg->myrank); ioreq_init(&req, arg->coh, oid, DAOS_IOD_ARRAY, arg); @@ -107,13 +108,7 @@ rebuild_dkeys(void **state) get_killing_rank_by_oid(arg, oid, 1, 0, &kill_rank, &kill_rank_nr); ioreq_fini(&req); - if (arg->interactive_rebuild) { - arg->rebuild_cb = rebuild_stop_with_dmg; - arg->rebuild_post_cb = rebuild_resume_wait; - } rebuild_single_pool_target(arg, kill_rank, -1, false); - arg->rebuild_cb = NULL; - arg->rebuild_post_cb = NULL; rc = daos_obj_verify(arg->coh, oid, DAOS_EPOCH_MAX); if (rc != 0) @@ -123,7 +118,7 @@ rebuild_dkeys(void **state) rc = daos_obj_verify(arg->coh, oid, DAOS_EPOCH_MAX); if (rc != 0) assert_rc_equal(rc, -DER_NOSYS); - print_message("success\n"); + T_END(); } static void @@ -141,6 +136,7 @@ rebuild_akeys(void **state) if (!test_runable(arg, 4)) return; + T_BEGIN(); oid = daos_test_oid_gen(arg->coh, arg->obj_class, 0, 0, arg->myrank); ioreq_init(&req, arg->coh, oid, DAOS_IOD_ARRAY, arg); @@ -169,13 +165,7 @@ rebuild_akeys(void **state) get_killing_rank_by_oid(arg, oid, 1, 0, &kill_rank, &kill_rank_nr); ioreq_fini(&req); - if (arg->interactive_rebuild) { - arg->rebuild_cb = rebuild_stop_with_dmg; - arg->rebuild_post_cb = rebuild_resume_wait; - } rebuild_single_pool_target(arg, kill_rank, tgt, false); - arg->rebuild_cb = NULL; - arg->rebuild_post_cb = NULL; rc = daos_obj_verify(arg->coh, oid, DAOS_EPOCH_MAX); if (rc != 0) @@ -185,7 +175,7 @@ rebuild_akeys(void **state) rc = daos_obj_verify(arg->coh, oid, DAOS_EPOCH_MAX); if (rc != 0) assert_rc_equal(rc, -DER_NOSYS); - print_message("success\n"); + T_END(); } static void @@ -202,6 +192,7 @@ rebuild_indexes(void **state) if (!test_runable(arg, 4)) return; + T_BEGIN(); oid = daos_test_oid_gen(arg->coh, arg->obj_class, 0, 0, arg->myrank); oid = dts_oid_set_rank(oid, ranks_to_kill[0]); oid = dts_oid_set_tgt(oid, tgt); @@ -221,13 +212,7 @@ rebuild_indexes(void **state) ioreq_fini(&req); /* Rebuild rank 1 */ - if (arg->interactive_rebuild) { - arg->rebuild_cb = rebuild_stop_with_dmg; - arg->rebuild_post_cb = rebuild_resume_wait; - } rebuild_single_pool_target(arg, ranks_to_kill[0], tgt, false); - arg->rebuild_cb = NULL; - arg->rebuild_post_cb = NULL; rc = daos_obj_verify(arg->coh, oid, DAOS_EPOCH_MAX); if (rc != 0) @@ -237,7 +222,7 @@ rebuild_indexes(void **state) rc = daos_obj_verify(arg->coh, oid, DAOS_EPOCH_MAX); if (rc != 0) assert_rc_equal(rc, -DER_NOSYS); - print_message("success\n"); + T_END(); } #define SNAP_CNT 20 @@ -257,6 +242,7 @@ rebuild_snap_update_recs(void **state) if (!test_runable(arg, 4)) return; + T_BEGIN(); oid = daos_test_oid_gen(arg->coh, arg->obj_class, 0, 0, arg->myrank); oid = dts_oid_set_rank(oid, ranks_to_kill[0]); oid = dts_oid_set_tgt(oid, tgt); @@ -282,13 +268,7 @@ rebuild_snap_update_recs(void **state) } ioreq_fini(&req); - if (arg->interactive_rebuild) { - arg->rebuild_cb = rebuild_stop_with_dmg; - arg->rebuild_post_cb = rebuild_resume_wait; - } rebuild_single_pool_target(arg, ranks_to_kill[0], tgt, false); - arg->rebuild_cb = NULL; - arg->rebuild_post_cb = NULL; for (i = 0; i < SNAP_CNT; i++) { rc = daos_obj_verify(arg->coh, oid, snap_epoch[i]); @@ -308,7 +288,7 @@ rebuild_snap_update_recs(void **state) rc = daos_obj_verify(arg->coh, oid, DAOS_EPOCH_MAX); if (rc != 0) assert_rc_equal(rc, -DER_NOSYS); - print_message("success\n"); + T_END(); } static void @@ -327,6 +307,7 @@ rebuild_snap_punch_recs(void **state) if (!test_runable(arg, 4)) return; + T_BEGIN(); oid = daos_test_oid_gen(arg->coh, arg->obj_class, 0, 0, arg->myrank); oid = dts_oid_set_rank(oid, ranks_to_kill[0]); oid = dts_oid_set_tgt(oid, tgt); @@ -348,13 +329,7 @@ rebuild_snap_punch_recs(void **state) } ioreq_fini(&req); - if (arg->interactive_rebuild) { - arg->rebuild_cb = rebuild_stop_with_dmg; - arg->rebuild_post_cb = rebuild_resume_wait; - } rebuild_single_pool_target(arg, ranks_to_kill[0], tgt, false); - arg->rebuild_cb = NULL; - arg->rebuild_post_cb = NULL; for (i = 0; i < SNAP_CNT; i++) { rc = daos_obj_verify(arg->coh, oid, snap_epoch[i]); @@ -374,7 +349,7 @@ rebuild_snap_punch_recs(void **state) rc = daos_obj_verify(arg->coh, oid, DAOS_EPOCH_MAX); if (rc != 0) assert_rc_equal(rc, -DER_NOSYS); - print_message("success\n"); + T_END(); } static void @@ -391,6 +366,7 @@ rebuild_snap_update_keys(void **state) if (!test_runable(arg, 4)) return; + T_BEGIN(); oid = daos_test_oid_gen(arg->coh, arg->obj_class, 0, 0, arg->myrank); oid = dts_oid_set_rank(oid, ranks_to_kill[0]); oid = dts_oid_set_tgt(oid, tgt); @@ -408,13 +384,7 @@ rebuild_snap_update_keys(void **state) insert_single("dkey", akey, 0, "data", 1, DAOS_TX_NONE, &req); } - if (arg->interactive_rebuild) { - arg->rebuild_cb = rebuild_stop_with_dmg; - arg->rebuild_post_cb = rebuild_resume_wait; - } rebuild_single_pool_target(arg, ranks_to_kill[0], tgt, false); - arg->rebuild_cb = NULL; - arg->rebuild_post_cb = NULL; daos_fail_loc_set(DAOS_OBJ_SPECIAL_SHARD); for (i = 0; i < OBJ_REPLICAS; i++) { @@ -464,7 +434,7 @@ rebuild_snap_update_keys(void **state) if (rc != 0) assert_rc_equal(rc, -DER_NOSYS); ioreq_fini(&req); - print_message("success\n"); + T_END(); } static void @@ -481,6 +451,7 @@ rebuild_snap_punch_keys(void **state) if (!test_runable(arg, 4)) return; + T_BEGIN(); oid = daos_test_oid_gen(arg->coh, arg->obj_class, 0, 0, arg->myrank); oid = dts_oid_set_rank(oid, ranks_to_kill[0]); oid = dts_oid_set_tgt(oid, tgt); @@ -516,13 +487,7 @@ rebuild_snap_punch_keys(void **state) punch_akey("dkey", akey, DAOS_TX_NONE, &req); } - if (arg->interactive_rebuild) { - arg->rebuild_cb = rebuild_stop_with_dmg; - arg->rebuild_post_cb = rebuild_resume_wait; - } rebuild_single_pool_target(arg, ranks_to_kill[0], tgt, false); - arg->rebuild_cb = NULL; - arg->rebuild_post_cb = NULL; daos_fail_loc_set(DAOS_OBJ_SPECIAL_SHARD); for (i = 0; i < OBJ_REPLICAS; i++) { @@ -573,7 +538,7 @@ rebuild_snap_punch_keys(void **state) if (rc != 0) assert_rc_equal(rc, -DER_NOSYS); ioreq_fini(&req); - print_message("success\n"); + T_END(); } static void @@ -590,6 +555,7 @@ rebuild_snap_punch_empty(void **state) if (!test_runable(arg, 4)) return; + T_BEGIN(); oid = daos_test_oid_gen(arg->coh, DAOS_OC_R3S_SPEC_RANK, 0, 0, arg->myrank); oid = dts_oid_set_rank(oid, ranks_to_kill[0]); @@ -603,17 +569,7 @@ rebuild_snap_punch_empty(void **state) punch_obj(DAOS_TX_NONE, &req); - /* stop exclude rebuild, but skip the start, then directly reintegrate */ - if (arg->interactive_rebuild) { - bool skip_restart = true; - arg->rebuild_cb = rebuild_stop_with_dmg; - arg->rebuild_post_cb = rebuild_resume_wait; - arg->rebuild_post_cb_arg = &skip_restart; - } rebuild_single_pool_target(arg, ranks_to_kill[0], tgt, false); - arg->rebuild_cb = NULL; - arg->rebuild_post_cb = NULL; - arg->rebuild_post_cb_arg = NULL; daos_fail_loc_set(DAOS_OBJ_SPECIAL_SHARD); for (i = 0; i < OBJ_REPLICAS; i++) { @@ -653,17 +609,12 @@ rebuild_snap_punch_empty(void **state) assert_int_equal(number, 0); } - /* from a stopped (not restarted) exclude, directly reintegrate (stop+start this rebuild) */ - if (arg->interactive_rebuild) { - arg->rebuild_cb = rebuild_stop_with_dmg; - arg->rebuild_post_cb = rebuild_resume_wait; - } reintegrate_single_pool_target(arg, ranks_to_kill[0], tgt); rc = daos_obj_verify(arg->coh, oid, DAOS_EPOCH_MAX); if (rc != 0) assert_rc_equal(rc, -DER_NOSYS); ioreq_fini(&req); - print_message("success\n"); + T_END(); } static void @@ -681,6 +632,7 @@ rebuild_multiple(void **state) if (!test_runable(arg, 4)) return; + T_BEGIN(); oid = daos_test_oid_gen(arg->coh, arg->obj_class, 0, 0, arg->myrank); oid = dts_oid_set_rank(oid, ranks_to_kill[0]); oid = dts_oid_set_tgt(oid, tgt); @@ -704,19 +656,13 @@ rebuild_multiple(void **state) } } - if (arg->interactive_rebuild) { - arg->rebuild_cb = rebuild_stop_with_dmg; - arg->rebuild_post_cb = rebuild_resume_wait; - } rebuild_single_pool_target(arg, ranks_to_kill[0], tgt, false); - arg->rebuild_cb = NULL; - arg->rebuild_post_cb = NULL; rc = daos_obj_verify(arg->coh, oid, DAOS_EPOCH_MAX); if (rc != 0) assert_rc_equal(rc, -DER_NOSYS); ioreq_fini(&req); - print_message("success\n"); + T_END(); } #define LARGE_BUFFER_SIZE (32 * 1024 * 4) @@ -734,6 +680,7 @@ rebuild_large_rec(void **state) if (!test_runable(arg, 4)) return; + T_BEGIN(); oid = daos_test_oid_gen(arg->coh, arg->obj_class, 0, 0, arg->myrank); oid = dts_oid_set_rank(oid, ranks_to_kill[0]); oid = dts_oid_set_tgt(oid, tgt); @@ -762,13 +709,7 @@ rebuild_large_rec(void **state) ioreq_fini(&req); - if (arg->interactive_rebuild) { - arg->rebuild_cb = rebuild_stop_with_dmg; - arg->rebuild_post_cb = rebuild_resume_wait; - } rebuild_single_pool_target(arg, ranks_to_kill[0], tgt, false); - arg->rebuild_cb = NULL; - arg->rebuild_post_cb = NULL; rc = daos_obj_verify(arg->coh, oid, DAOS_EPOCH_MAX); if (rc != 0) @@ -778,7 +719,7 @@ rebuild_large_rec(void **state) rc = daos_obj_verify(arg->coh, oid, DAOS_EPOCH_MAX); if (rc != 0) assert_rc_equal(rc, -DER_NOSYS); - print_message("success\n"); + T_END(); } static void @@ -793,6 +734,7 @@ rebuild_objects(void **state) if (!test_runable(arg, 4)) return; + T_BEGIN(); for (i = 0; i < OBJ_NR; i++) { oids[i] = daos_test_oid_gen(arg->coh, arg->obj_class, 0, 0, arg->myrank); @@ -802,13 +744,7 @@ rebuild_objects(void **state) rebuild_io(arg, oids, OBJ_NR); - if (arg->interactive_rebuild) { - arg->rebuild_cb = rebuild_stop_with_dmg; - arg->rebuild_post_cb = rebuild_resume_wait; - } rebuild_single_pool_target(arg, ranks_to_kill[0], tgt, false); - arg->rebuild_cb = NULL; - arg->rebuild_post_cb = NULL; for (i = 0; i < OBJ_NR; i++) { rc = daos_obj_verify(arg->coh, oids[i], DAOS_EPOCH_MAX); @@ -822,7 +758,7 @@ rebuild_objects(void **state) if (rc != 0) assert_rc_equal(rc, -DER_NOSYS); } - print_message("success\n"); + T_END(); } static void @@ -844,12 +780,6 @@ rebuild_sx_object_internal(void **state, daos_oclass_id_t oclass, if (!test_runable(arg, 4)) return; - if (arg->interactive_rebuild && !wait_rebuild) { - print_message("SKIP due to interactive_rebuild enabled, but not tested here\n"); - skip(); - return; - } - oid = daos_test_oid_gen(arg->coh, oclass, 0, 0, arg->myrank); ioreq_init(&req, arg->coh, oid, DAOS_IOD_ARRAY, arg); print_message("insert 100 dkeys\n"); @@ -885,15 +815,9 @@ rebuild_sx_object_internal(void **state, daos_oclass_id_t oclass, arg->group, rank, -1); assert_success(rc); - if (arg->interactive_rebuild) - rebuild_stop_with_dmg(arg); - /* wait until exclude rebuild done */ - if (wait_rebuild) { + if (wait_rebuild) test_rebuild_wait(&arg, 1); - if (arg->interactive_rebuild) - rebuild_resume_wait(arg); - } print_message("dmg pool reintegrate rank %u " DF_UUID "\n", rank, DP_UUID(arg->pool.pool_uuid)); @@ -901,15 +825,9 @@ rebuild_sx_object_internal(void **state, daos_oclass_id_t oclass, rank, -1); assert_success(rc); - if (arg->interactive_rebuild) - rebuild_stop_with_dmg(arg); - /* wait until reintegration rebuild is done */ - if (wait_rebuild) { + if (wait_rebuild) test_rebuild_wait(&arg, 1); - if (arg->interactive_rebuild) - rebuild_resume_wait(arg); - } print_message("lookup 100 dkeys\n"); for (i = 0; i < 100 && verify; i++) { @@ -928,22 +846,25 @@ rebuild_sx_object_internal(void **state, daos_oclass_id_t oclass, static void rebuild_sx_object(void **state) { + T_BEGIN(); rebuild_sx_object_internal(state, OC_SX, false, true); - print_message("success\n"); + T_END(); } static void rebuild_xsf_object(void **state) { + T_BEGIN(); rebuild_sx_object_internal(state, OC_RP_XSF, true, true); - print_message("success\n"); + T_END(); } static void rebuild_sx_object_no_data_sync(void **state) { + T_BEGIN(); rebuild_sx_object_internal(state, OC_SX, false, false); - print_message("success\n"); + T_END(); } static int @@ -978,6 +899,7 @@ rebuild_large_object(void **state) if (!test_runable(arg, 4)) return; + T_BEGIN(); for (i = 0; i < 5; i++) { oid = daos_test_oid_gen(arg->coh, OC_RP_2G8, 0, 0, arg->myrank); ioreq_init(&req, arg->coh, oid, DAOS_IOD_ARRAY, arg); @@ -994,13 +916,8 @@ rebuild_large_object(void **state) rank, -1); assert_success(rc); - if (arg->interactive_rebuild) - rebuild_stop_with_dmg(arg); - /* wait until exclude rebuild done */ test_rebuild_wait(&arg, 1); - if (arg->interactive_rebuild) - rebuild_resume_wait(arg); print_message("dmg pool reintegrate rank %u " DF_UUID "\n", rank, DP_UUID(arg->pool.pool_uuid)); @@ -1008,16 +925,10 @@ rebuild_large_object(void **state) rank, -1); assert_success(rc); - if (arg->interactive_rebuild) { - rebuild_stop_with_dmg(arg); - } - /* wait until reintegration rebuild is done */ test_rebuild_wait(&arg, 1); - if (arg->interactive_rebuild) - rebuild_resume_wait(arg); - print_message("success\n"); + T_END(); } int @@ -1092,6 +1003,7 @@ rebuild_large_snap(void **state) if (!test_runable(arg, 4)) return; + T_BEGIN(); oid = daos_test_oid_gen(arg->coh, arg->obj_class, 0, 0, arg->myrank); oid = dts_oid_set_rank(oid, ranks_to_kill[0]); oid = dts_oid_set_tgt(oid, tgt); @@ -1109,19 +1021,11 @@ rebuild_large_snap(void **state) insert_single("dkey", akey, 0, "data", 1, DAOS_TX_NONE, &req); } - /* stop and resume the exclude rebuild */ - if (arg->interactive_rebuild) { - arg->rebuild_cb = rebuild_stop_with_dmg; - arg->rebuild_post_cb = rebuild_resume_wait; - } rebuild_single_pool_target(arg, ranks_to_kill[0], tgt, false); ioreq_fini(&req); - /* stop and resume the reintegration rebuild (cb functions are still set in arg) */ reintegrate_single_pool_target(arg, ranks_to_kill[0], tgt); - arg->rebuild_cb = NULL; - arg->rebuild_post_cb = NULL; - print_message("success\n"); + T_END(); } static void @@ -1135,6 +1039,7 @@ rebuild_full_shards(void **state) if (!test_runable(arg, 4)) return; + T_BEGIN(); /* require 4 nodes and 8 targets per node */ if (arg->myrank == 0 && arg->srv_ntgts / arg->srv_nnodes != 8) { print_message("skip - require 4 nodes and 8 targets/node\n"); @@ -1158,26 +1063,12 @@ rebuild_full_shards(void **state) ioreq_fini(&req); /* rebuild and reintegration to use full shards */ - if (arg->interactive_rebuild) { - arg->rebuild_cb = rebuild_stop_with_dmg; - arg->rebuild_post_cb = rebuild_resume_wait; - } - /* stop and resume the first exclude rebuild (but not the second one) */ rebuild_single_pool_target(arg, 0, -1, false); - arg->rebuild_cb = NULL; - arg->rebuild_post_cb = NULL; rebuild_single_pool_target(arg, 3, -1, false); - /* stop and resume the first reintegrate rebuild (but not the second one) */ - if (arg->interactive_rebuild) { - arg->rebuild_cb = rebuild_stop_with_dmg; - arg->rebuild_post_cb = rebuild_resume_wait; - } reintegrate_single_pool_target(arg, 0, -1); - arg->rebuild_cb = NULL; - arg->rebuild_post_cb = NULL; reintegrate_single_pool_target(arg, 3, -1); - print_message("success\n"); + T_END(); } static void @@ -1194,6 +1085,7 @@ rebuild_punch_recs(void **state) if (!test_runable(arg, 4)) return; + T_BEGIN(); oid = daos_test_oid_gen(arg->coh, arg->obj_class, 0, 0, arg->myrank); oid = dts_oid_set_rank(oid, ranks_to_kill[0]); ioreq_init(&req, arg->coh, oid, DAOS_IOD_ARRAY, arg); @@ -1212,18 +1104,12 @@ rebuild_punch_recs(void **state) } ioreq_fini(&req); - if (arg->interactive_rebuild) { - arg->rebuild_cb = rebuild_stop_with_dmg; - arg->rebuild_post_cb = rebuild_resume_wait; - } rebuild_single_pool_target(arg, ranks_to_kill[0], -1, false); - arg->rebuild_cb = NULL; - arg->rebuild_post_cb = NULL; rc = daos_obj_verify(arg->coh, oid, DAOS_EPOCH_MAX); if (rc != 0) assert_rc_equal(rc, -DER_NOSYS); - print_message("success\n"); + T_END(); } static void @@ -1240,6 +1126,7 @@ rebuild_multiple_group(void **state) if (!test_runable(arg, 7)) return; + T_BEGIN(); oid = daos_test_oid_gen(arg->coh, OC_RP_2G4, 0, 0, arg->myrank); ioreq_init(&req, arg->coh, oid, DAOS_IOD_ARRAY, arg); @@ -1269,13 +1156,7 @@ rebuild_multiple_group(void **state) get_killing_rank_by_oid(arg, oid, 1, 0, &kill_rank, &kill_rank_nr); ioreq_fini(&req); - if (arg->interactive_rebuild) { - arg->rebuild_cb = rebuild_stop_with_dmg; - arg->rebuild_post_cb = rebuild_resume_wait; - } rebuild_single_pool_target(arg, kill_rank, -1, false); - arg->rebuild_cb = NULL; - arg->rebuild_post_cb = NULL; rc = daos_obj_verify(arg->coh, oid, DAOS_EPOCH_MAX); if (rc != 0) @@ -1286,7 +1167,7 @@ rebuild_multiple_group(void **state) rc = daos_obj_verify(arg->coh, oid, DAOS_EPOCH_MAX); if (rc != 0) assert_rc_equal(rc, -DER_NOSYS); - print_message("success\n"); + T_END(); } /** i/o to variable idx offset */ @@ -1307,6 +1188,7 @@ rebuild_with_large_offset(void **state) if (!test_runable(arg, 4)) return; + T_BEGIN(); oid = daos_test_oid_gen(arg->coh, arg->obj_class, 0, 0, arg->myrank); ioreq_init(&req, arg->coh, oid, DAOS_IOD_ARRAY, arg); memset(data, 'a', 128); @@ -1323,13 +1205,7 @@ rebuild_with_large_offset(void **state) get_killing_rank_by_oid(arg, oid, 1, 0, &kill_rank, &kill_rank_nr); ioreq_fini(&req); - if (arg->interactive_rebuild) { - arg->rebuild_cb = rebuild_stop_with_dmg; - arg->rebuild_post_cb = rebuild_resume_wait; - } rebuild_single_pool_target(arg, kill_rank, -1, false); - arg->rebuild_cb = NULL; - arg->rebuild_post_cb = NULL; rc = daos_obj_verify(arg->coh, oid, DAOS_EPOCH_MAX); if (rc != 0) @@ -1340,7 +1216,7 @@ rebuild_with_large_offset(void **state) rc = daos_obj_verify(arg->coh, oid, DAOS_EPOCH_MAX); if (rc != 0) assert_rc_equal(rc, -DER_NOSYS); - print_message("success\n"); + T_END(); } #define LARGE_KEY_SIZE 1048576 @@ -1361,6 +1237,7 @@ rebuild_with_large_key(void **state) if (!test_runable(arg, 4)) return; + T_BEGIN(); oid = daos_test_oid_gen(arg->coh, arg->obj_class, 0, 0, arg->myrank); ioreq_init(&req, arg->coh, oid, DAOS_IOD_ARRAY, arg); dkey = calloc(LARGE_KEY_SIZE, 1); @@ -1375,13 +1252,7 @@ rebuild_with_large_key(void **state) get_killing_rank_by_oid(arg, oid, 1, 0, &kill_rank, &kill_rank_nr); ioreq_fini(&req); - if (arg->interactive_rebuild) { - arg->rebuild_cb = rebuild_stop_with_dmg; - arg->rebuild_post_cb = rebuild_resume_wait; - } rebuild_single_pool_target(arg, kill_rank, -1, false); - arg->rebuild_cb = NULL; - arg->rebuild_post_cb = NULL; rc = daos_obj_verify(arg->coh, oid, DAOS_EPOCH_MAX); if (rc != 0) @@ -1393,7 +1264,7 @@ rebuild_with_large_key(void **state) assert_rc_equal(rc, -DER_NOSYS); free(dkey); free(akey); - print_message("success\n"); + T_END(); } void @@ -1416,6 +1287,7 @@ rebuild_with_dfs_open_create_punch(void **state) if (!test_runable(arg, 6)) return; + T_BEGIN(); dfs_attr_t attr = {}; attr.da_props = daos_prop_alloc(1); @@ -1444,16 +1316,10 @@ rebuild_with_dfs_open_create_punch(void **state) dfs_obj2id(dir, &oid); - if (arg->interactive_rebuild) { - arg->rebuild_cb = rebuild_stop_with_dmg; - arg->rebuild_post_cb = rebuild_resume_wait; - } rank = get_rank_by_oid_shard(arg, oid, 0); rebuild_single_pool_rank(arg, rank, false); reintegrate_single_pool_rank(arg, rank, false); - arg->rebuild_cb = NULL; - arg->rebuild_post_cb = NULL; daos_cont_status_clear(co_hdl, NULL); for (i = 0; i < 20; i++) { @@ -1480,7 +1346,7 @@ rebuild_with_dfs_open_create_punch(void **state) uuid_unparse(co_uuid, str); rc = daos_cont_destroy(arg->pool.poh, str, 1, NULL); assert_rc_equal(rc, 0); - print_message("success\n"); + T_END(); } static int @@ -1488,8 +1354,7 @@ rebuild_wait_reset_fail_cb(void *data) { test_arg_t *arg = data; - print_message("wait 60 seconds for rebuild/reclaim%s\n", - arg->interactive_rebuild ? "" : "/retry"); + print_message("wait 60 seconds for rebuild/reclaim\n"); sleep(60); print_message("clearing fault injection on all engines\n"); @@ -1500,40 +1365,6 @@ rebuild_wait_reset_fail_cb(void *data) return 0; } -static int -rebuild_wait_error_reset_fail_cb(void *data) -{ - test_arg_t *arg = data; - int rc; - - print_message("wait until rebuild errors (and starts Fail_reclaim)\n"); - test_rebuild_wait_to_error(&arg, 1); - print_message("check rebuild errored, rs_errno=%d (expecting -DER_IO=%d)\n", - arg->pool.pool_info.pi_rebuild_st.rs_errno, -DER_IO); - assert_int_equal(arg->pool.pool_info.pi_rebuild_st.rs_errno, -DER_IO); - print_message("rebuild error code check passed\n"); - - print_message("clearing fault injection on all engines\n"); - daos_debug_set_params(arg->group, -1, DMG_KEY_FAIL_LOC, 0, 0, NULL); - daos_debug_set_params(arg->group, -1, DMG_KEY_FAIL_VALUE, 0, 0, NULL); - daos_debug_set_params(arg->group, -1, DMG_KEY_FAIL_NUM, 0, 0, NULL); - - /* Give time for transition from op:Rebuild into op:Fail_reclaim */ - sleep(2); - - print_message( - "send rebuild stop --force request during first/only Fail_reclaim operation\n"); - rc = rebuild_force_stop_with_dmg(data); - if (rc != 0) - print_message("rebuild_force_stop_with_dmg failed, rc=%d\n", rc); - - print_message("wait for rebuild to be stopped\n"); - test_rebuild_wait(&arg, 1); - /* Verifying rs_state/rs_errno will happen in post_cb rebuild_resume_wait() */ - - return rc; -} - static void rebuild_many_objects_with_failure(void **state) { @@ -1545,6 +1376,7 @@ rebuild_many_objects_with_failure(void **state) if (!test_runable(arg, 6)) return; + T_BEGIN(); D_ALLOC_ARRAY(oids, 8000); for (i = 0; i < 8000; i++) { char buffer[256]; @@ -1561,7 +1393,7 @@ rebuild_many_objects_with_failure(void **state) ioreq_fini(&req); } - /* Inject faults on engines. Special handling for interactive_rebuild case */ + /* Inject faults on engines */ if (arg->myrank == 0) { print_message("inject fault DAOS_REBUILD_OBJ_FAIL on all engines\n"); daos_debug_set_params(arg->group, -1, DMG_KEY_FAIL_LOC, @@ -1569,19 +1401,7 @@ rebuild_many_objects_with_failure(void **state) daos_debug_set_params(arg->group, -1, DMG_KEY_FAIL_VALUE, 50, 0, NULL); } - /* For interactive rebuild, we need: - * 1. trigger rebuild (which will fail), query pool reubild state until op:Rebuild fails - * and op:Fail_reclaim begins. See test_rebuild_wait_to_error(). - * 2. Then, while rebuild is in op:Fail_reclaim, issue dmg system stop to test that you - * can't stop during Fail_reclaim (though the command will take effect by not retrying - * rebuild). - */ - if (arg->interactive_rebuild) { - arg->rebuild_cb = rebuild_wait_error_reset_fail_cb; - arg->rebuild_post_cb = rebuild_resume_wait; - } else { - arg->rebuild_cb = rebuild_wait_reset_fail_cb; - } + arg->rebuild_cb = rebuild_wait_reset_fail_cb; rebuild_single_pool_target(arg, 3, -1, false); for (i = 0; i < 8000; i++) { @@ -1590,7 +1410,7 @@ rebuild_many_objects_with_failure(void **state) assert_rc_equal(rc, -DER_NOSYS); } D_FREE(oids); - print_message("success\n"); + T_END(); } #define KB 1024 @@ -1649,10 +1469,7 @@ rebuild_object_with_csum_error(void **state) skip(); } - if (arg->interactive_rebuild) { - print_message("SKIP due to interactive_rebuild enabled, but not tested here\n"); - skip(); - } + T_BEGIN(); /* setup pool to have scrubbing turned on */ assert_success(dmg_pool_set_prop(dmg_config_file, "scrub", "timed", pool_uuid)); @@ -1729,7 +1546,7 @@ rebuild_object_with_csum_error(void **state) assert_success(daos_cont_close(coh, NULL)); assert_success(daos_cont_destroy(poh, uuid_cont_str, false, NULL)); assert_success(dmg_pool_set_prop(dmg_config_file, "scrub", "off", arg->pool.pool_uuid)); - print_message("success\n"); + T_END(); } struct rebuild_cb_arg { @@ -1889,12 +1706,14 @@ rebuild_dfs_append_cb(void *data) rebuild_dfs_write(cb_arg->dfs_mt, cb_arg->dir, cb_arg->offset, cb_arg->size, O_RDWR | O_EXCL); + print_message("%s(): completed rebuild_dfs_write()\n", __FUNCTION__); if (arg->myrank == 0) { daos_debug_set_params(arg->group, -1, DMG_KEY_FAIL_LOC, 0, 0, NULL); daos_debug_set_params(arg->group, -1, DMG_KEY_FAIL_VALUE, 0, 0, NULL); } + return 0; } @@ -1905,12 +1724,14 @@ rebuild_dfs_punch_cb(void *data) struct rebuild_cb_arg *cb_arg = arg->rebuild_cb_arg; rebuild_dfs_remove(cb_arg->dfs_mt, cb_arg->dir); + print_message("%s(): completed rebuild_dfs_remove()\n", __FUNCTION__); if (arg->myrank == 0) { daos_debug_set_params(arg->group, -1, DMG_KEY_FAIL_LOC, 0, 0, NULL); daos_debug_set_params(arg->group, -1, DMG_KEY_FAIL_VALUE, 0, 0, NULL); } + return 0; } @@ -1929,11 +1750,7 @@ rebuild_with_dfs_inflight_append(void **state) if (!test_runable(arg, 6)) return; - if (arg->interactive_rebuild) { - print_message("SKIP due to interactive_rebuild enabled, but not tested here\n"); - skip(); - } - + T_BEGIN(); daos_pool_set_prop(arg->pool.pool_uuid, "reclaim", "disabled"); rebuild_dfs_prep(arg, &dfs_mt, &dir, &co_hdl, &co_uuid); @@ -1972,7 +1789,7 @@ rebuild_with_dfs_inflight_append(void **state) rebuild_dfs_read_check(dfs_mt, dir, 0, 1048576 * 3); rebuild_dfs_fini(arg, dfs_mt, dir, co_hdl, co_uuid); - print_message("success\n"); + T_END(); } void @@ -1994,11 +1811,7 @@ rebuild_with_dfs_inflight_punch(void **state) if (!test_runable(arg, 6)) return; - if (arg->interactive_rebuild) { - print_message("SKIP due to interactive_rebuild enabled, but not tested here\n"); - skip(); - } - + T_BEGIN(); daos_pool_set_prop(arg->pool.pool_uuid, "reclaim", "disabled"); rebuild_dfs_prep(arg, &dfs_mt, &dir, &co_hdl, &co_uuid); @@ -2041,7 +1854,7 @@ rebuild_with_dfs_inflight_punch(void **state) } rebuild_dfs_fini(arg, dfs_mt, dir, co_hdl, co_uuid); - print_message("success\n"); + T_END(); } static int @@ -2052,13 +1865,14 @@ rebuild_dfs_create_append_cb(void *data) rebuild_dfs_write(cb_arg->dfs_mt, cb_arg->dir, cb_arg->offset, cb_arg->size, O_RDWR | O_CREAT | O_EXCL); - + print_message("%s(): completed rebuild_dfs_write()\n", __FUNCTION__); if (arg->myrank == 0) { daos_debug_set_params(arg->group, -1, DMG_KEY_FAIL_LOC, 0, 0, NULL); daos_debug_set_params(arg->group, -1, DMG_KEY_FAIL_VALUE, 0, 0, NULL); } + return 0; } @@ -2081,11 +1895,7 @@ rebuild_with_dfs_inflight_append_punch(void **state) if (!test_runable(arg, 6)) return; - if (arg->interactive_rebuild) { - print_message("SKIP due to interactive_rebuild enabled, but not tested here\n"); - skip(); - } - + T_BEGIN(); daos_pool_set_prop(arg->pool.pool_uuid, "reclaim", "disabled"); rebuild_dfs_prep(arg, &dfs_mt, &dir, &co_hdl, &co_uuid); @@ -2124,7 +1934,7 @@ rebuild_with_dfs_inflight_append_punch(void **state) rebuild_dfs_read_check(dfs_mt, dir, 0, 1048576 + 10); rebuild_dfs_fini(arg, dfs_mt, dir, co_hdl, co_uuid); - print_message("success\n"); + T_END(); } static int @@ -2134,13 +1944,14 @@ rebuild_dfs_punch_create_cb(void *data) struct rebuild_cb_arg *cb_arg = arg->rebuild_cb_arg; int i; - print_message("start remove/update loop\n"); + print_message("%s(): start remove/update loop\n", __FUNCTION__); for (i = 0; i < 100; i++) { rebuild_dfs_remove(cb_arg->dfs_mt, cb_arg->dir); rebuild_dfs_write(cb_arg->dfs_mt, cb_arg->dir, cb_arg->offset, cb_arg->size, O_RDWR | O_CREAT | O_EXCL); } - print_message("end remove/update loop\n"); + print_message("%s() end remove/update loop\n", __FUNCTION__); + return 0; } @@ -2163,11 +1974,7 @@ rebuild_with_dfs_inflight_punch_create(void **state) if (!test_runable(arg, 6)) return; - if (arg->interactive_rebuild) { - print_message("SKIP due to interactive_rebuild enabled, but not tested here\n"); - skip(); - } - + T_BEGIN(); daos_pool_set_prop(arg->pool.pool_uuid, "reclaim", "disabled"); rebuild_dfs_prep(arg, &dfs_mt, &dir, &co_hdl, &co_uuid); @@ -2191,7 +1998,7 @@ rebuild_with_dfs_inflight_punch_create(void **state) rebuild_dfs_read_check(dfs_mt, dir, 0, 1048576 + 10); rebuild_dfs_fini(arg, dfs_mt, dir, co_hdl, co_uuid); - print_message("success\n"); + T_END(); } /** create a new pool/container for each test */ diff --git a/src/tests/suite/daos_test.c b/src/tests/suite/daos_test.c index d4f789c05cb..18c6fcbab7d 100644 --- a/src/tests/suite/daos_test.c +++ b/src/tests/suite/daos_test.c @@ -24,7 +24,7 @@ * These tests will only be run if explicitly specified. They don't get * run if no test is specified. */ -#define EXPLICIT_TESTS "x" +#define EXPLICIT_TESTS "xj" static const char *all_tests = TESTS; static const char *all_tests_defined = TESTS EXPLICIT_TESTS; @@ -32,7 +32,6 @@ enum { CHECKSUM_ARG_VAL_TYPE = 0x2713, CHECKSUM_ARG_VAL_CHUNKSIZE = 0x2714, CHECKSUM_ARG_VAL_SERVERVERIFY = 0x2715, - REBUILD_INTERACTIVE = 0x2716, }; static void @@ -89,7 +88,6 @@ print_usage(int rank) print_message("daos_test --csum_type CSUM_TYPE\n"); print_message("daos_test --csum_cs CHUNKSIZE\n"); print_message("daos_test --csum_sv\n"); - print_message("daos_test --rebuild_interactive\n"); print_message("\n=============================\n"); } @@ -318,6 +316,13 @@ run_specified_tests(const char *tests, int rank, int size, daos_test_print(rank, "================="); nr_failed += run_daos_inc_reint_test(rank, size, sub_tests, sub_tests_size); break; + case 'j': + daos_test_print(rank, "\n\n================="); + daos_test_print(rank, "DAOS interactive rebuild tests.."); + daos_test_print(rank, "================="); + nr_failed += + run_daos_int_rebuild_test(rank, size, sub_tests, sub_tests_size); + break; default: D_ASSERT(0); } @@ -402,7 +407,6 @@ main(int argc, char **argv) {"work_dir", required_argument, NULL, 'W'}, {"workload_file", required_argument, NULL, 'w'}, {"obj_class", required_argument, NULL, 'l'}, - {"rebuild_interactive", no_argument, NULL, REBUILD_INTERACTIVE}, {"help", no_argument, NULL, 'h'}, {NULL, 0, NULL, 0}}; @@ -415,7 +419,7 @@ main(int argc, char **argv) memset(tests, 0, sizeof(tests)); while ( - (opt = getopt_long(argc, argv, "amFpcCdtTViIzUZxADKeoROg:n:s:u:E:f:w:W:hrNvbBSXl:GPY", + (opt = getopt_long(argc, argv, "amFpcCdtTViIzUZxADKeoROg:n:s:u:E:f:w:W:hrNvbBSXl:GPYj", long_options, &index)) != -1) { if (strchr(all_tests_defined, opt) != NULL) { tests[ntests] = opt; @@ -477,9 +481,6 @@ main(int argc, char **argv) case CHECKSUM_ARG_VAL_SERVERVERIFY: dt_csum_server_verify = true; break; - case REBUILD_INTERACTIVE: - dt_rb_interactive = true; - break; default: daos_test_print(rank, "Unknown Option\n"); print_usage(rank); diff --git a/src/tests/suite/daos_test.h b/src/tests/suite/daos_test.h index 23de8a21501..50281309405 100644 --- a/src/tests/suite/daos_test.h +++ b/src/tests/suite/daos_test.h @@ -42,11 +42,22 @@ #include #include #include +#include #if D_HAS_WARNING(4, "-Wframe-larger-than=") #pragma GCC diagnostic ignored "-Wframe-larger-than=" #endif +#define T_BEGIN() \ + do { \ + printf("BEGIN %s()\n", __FUNCTION__); \ + } while (0) + +#define T_END() \ + do { \ + printf("END %s() success\n", __FUNCTION__); \ + } while (0) + /** Server crt group ID */ extern const char *server_group; @@ -54,9 +65,6 @@ extern const char *server_group; extern int dt_incr_reint; extern bool dt_no_punch; -/** pool interactive rebuild */ -extern bool dt_rb_interactive; - /** Pool service replicas */ extern unsigned int svc_nreplicas; extern const char *dmg_config_file; @@ -379,6 +387,7 @@ int run_daos_nvme_recov_test(int rank, int size, int *sub_tests, int run_daos_rebuild_simple_test(int rank, int size, int *tests, int test_size); int run_daos_drain_simple_test(int rank, int size, int *tests, int test_size); int run_daos_extend_simple_test(int rank, int size, int *tests, int test_size); +int run_daos_int_rebuild_test(int rank, int size, int *tests, int test_size); int run_daos_inc_reint_test(int rank, int size, int *tests, int test_size); int run_daos_rebuild_simple_ec_test(int rank, int size, int *tests, int test_size); @@ -411,6 +420,8 @@ bool test_rebuild_query(test_arg_t **args, int args_cnt); void test_rebuild_wait(test_arg_t **args, int args_cnt); void test_rebuild_wait_to_start(test_arg_t **args, int args_cnt); +void +test_rebuild_wait_to_start_after_ver(test_arg_t **args, int args_cnt, uint32_t rs_version); void test_rebuild_wait_to_error(test_arg_t **args, int args_cnt); int daos_pool_set_prop(const uuid_t pool_uuid, const char *name, @@ -500,6 +511,8 @@ int rebuild_start_with_dmg(void *data); int rebuild_resume_wait(void *data); +int + rebuild_resume_wait_to_start(void *data); int get_server_config(char *host, char *server_config_file); int get_log_file(char *host, char *server_config_file, @@ -539,8 +552,10 @@ void make_buffer(char *buffer, char start, int total); bool oid_is_ec(daos_obj_id_t oid, struct daos_oclass_attr **attr); uint32_t test_ec_get_parity_off(daos_key_t *dkey, struct daos_oclass_attr *oca); + int reintegrate_inflight_io(void *data); -int reintegrate_inflight_io_verify(void *data); +int +reintegrate_inflight_io_verify(void *data); static inline void daos_test_print(int rank, char *message) @@ -750,4 +765,72 @@ void test_set_engine_fail_num(test_arg_t *arg, d_rank_t engine_rank, uint64_t fa void test_verify_cont(test_arg_t *arg, struct test_pool *pool, struct test_cont *conts, int cont_nr); +/* Common types and functions for drain rebuild tests */ + +#define EXTEND_DRAIN_OBJ_NR 5 +#define WRITE_SIZE (1048576 * 5) + +struct extend_drain_cb_arg { + daos_obj_id_t *oids; + dfs_t *dfs_mt; + dfs_obj_t *dir; + d_rank_t rank; + uint32_t objclass; + int opc; +}; + +enum extend_drain_opc { + EXTEND_DRAIN_PUNCH, + EXTEND_DRAIN_STAT, + EXTEND_DRAIN_ENUMERATE, + EXTEND_DRAIN_FETCH, + EXTEND_DRAIN_UPDATE, + EXTEND_DRAIN_OVERWRITE, + EXTEND_DRAIN_WRITELOOP, +}; + +extern const char *extend_drain_opstrs[]; + +void +extend_drain_read_check(dfs_t *dfs_mt, dfs_obj_t *dir, uint32_t objclass, uint32_t objcnt, + daos_size_t total_size, char start_char); +void +extend_drain_write(dfs_t *dfs_mt, dfs_obj_t *dir, uint32_t objclass, uint32_t objcnt, + daos_size_t total_size, char write_char, daos_obj_id_t *oids); +void +extend_drain_check(dfs_t *dfs_mt, dfs_obj_t *dir, int objclass, int opc); +void +dfs_extend_drain_common(void **state, int opc, uint32_t objclass, + test_rebuild_cb_t extend_drain_cb_fn); + +/* Common types and functions for extend rebuild tests */ + +#define EXTEND_OBJ_NR 1000 + +struct extend_cb_arg { + daos_obj_id_t *oids; + dfs_t *dfs_mt; + dfs_obj_t *dir; + d_rank_t rank; + int opc; + bool kill; +}; + +enum extend_opc { + EXTEND_PUNCH, + EXTEND_STAT, + EXTEND_ENUMERATE, + EXTEND_FETCH, + EXTEND_UPDATE, +}; + +extern const char *extend_opstrs[]; + +void +dfs_extend_internal(void **state, int opc, test_rebuild_cb_t extend_cb, bool kill); +void +extend_read_check(dfs_t *dfs_mt, dfs_obj_t *dir); +void +extend_write(dfs_t *dfs_mt, dfs_obj_t *dir); + #endif diff --git a/src/tests/suite/daos_test_common.c b/src/tests/suite/daos_test_common.c index 31d6ce922a8..cc8b0e5b77c 100644 --- a/src/tests/suite/daos_test_common.c +++ b/src/tests/suite/daos_test_common.c @@ -38,9 +38,6 @@ int dt_redun_fac; int dt_incr_reint; bool dt_no_punch; /* will remove later */ -/** rebuild test variants */ -bool dt_rb_interactive = false; - /* Create or import a single pool with option to store info in arg->pool * or an alternate caller-specified test_pool structure. * ipool (optional): import pool: store info for an existing pool to arg->pool. @@ -407,12 +404,6 @@ test_setup(void **state, unsigned int step, bool multi_rank, } /** Look at variables set by test arguments and configure testing */ - if (dt_rb_interactive) { - print_message("\n-------\n" - "Interactive rebuild (stop|start) is enabled in some tests!" - "\n-------\n"); - arg->interactive_rebuild = 1; - } /** Look at variables set by test arguments and setup pool props */ if (dt_incr_reint) { @@ -790,8 +781,9 @@ test_pool_get_info(test_arg_t *arg, daos_pool_info_t *pinfo, d_rank_list_t **eng return rc; } +/* Determine if pool rebuild is busy, and the rebuild version is > rs_version */ static bool -rebuild_pool_started(test_arg_t *arg) +rebuild_pool_started(test_arg_t *arg, uint32_t rs_version) { daos_pool_info_t pinfo = {0}; struct daos_rebuild_status *rst; @@ -806,10 +798,12 @@ rebuild_pool_started(test_arg_t *arg) rc, DP_UUID(arg->pool.pool_uuid)); return false; } else { - bool started = (rst->rs_state == DRS_IN_PROGRESS); - print_message("rebuild for pool " DF_UUIDF "has %sstarted\n", - DP_UUID(arg->pool.pool_uuid), started ? "" : "not yet "); - return started; + bool in_progress = (rst->rs_state == DRS_IN_PROGRESS); + print_message("rebuild for pool " DF_UUIDF "has %sstarted, rs_version=%u " + "(waiting for > %d)\n", + DP_UUID(arg->pool.pool_uuid), in_progress ? "" : "not yet ", + rst->rs_version, rs_version); + return in_progress && (rst->rs_version > rs_version); } } @@ -830,11 +824,10 @@ rebuild_pool_erroring(test_arg_t *arg) return false; } else { bool started = (rst->rs_state == DRS_IN_PROGRESS); - bool erroring = started && (rst->rs_errno != 0); + bool erroring = (rst->rs_errno != 0); - print_message("rebuild for pool " DF_UUIDF "has %sstarted, rs_errno=%d\n", - DP_UUID(arg->pool.pool_uuid), started ? "" : "not yet ", - rst->rs_errno); + print_message("rebuild for pool " DF_UUIDF " is %scurrently running, rs_errno=%d\n", + DP_UUID(arg->pool.pool_uuid), started ? "" : "not ", rst->rs_errno); /* save final pool query info to be able to inspect rebuild status */ if (erroring) @@ -917,7 +910,7 @@ test_get_last_svr_rank(test_arg_t *arg) } bool -test_rebuild_started(test_arg_t **args, int args_cnt) +test_rebuild_started(test_arg_t **args, int args_cnt, uint32_t rs_version) { bool all_started = true; int i; @@ -926,7 +919,7 @@ test_rebuild_started(test_arg_t **args, int args_cnt) bool started = true; if (!args[i]->pool.destroyed) - started = rebuild_pool_started(args[i]); + started = rebuild_pool_started(args[i], rs_version); if (!started) all_started = false; @@ -937,7 +930,14 @@ test_rebuild_started(test_arg_t **args, int args_cnt) void test_rebuild_wait_to_start(test_arg_t **args, int args_cnt) { - while (!test_rebuild_started(args, args_cnt)) + while (!test_rebuild_started(args, args_cnt, 0 /* don't care rs_version */)) + sleep(2); +} + +void +test_rebuild_wait_to_start_after_ver(test_arg_t **args, int args_cnt, uint32_t rs_version) +{ + while (!test_rebuild_started(args, args_cnt, rs_version)) sleep(2); } From 57820c6b2815c77685133692eea2a9e1f491babf Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 2 Dec 2025 11:34:31 -0800 Subject: [PATCH 050/253] DAOS-18109 cq: update python pip versions (#16986) Updates `isort` from 6.1.0 to 7.0.0 Updates `pylint` from 3.3.9 to 4.0.1 Signed-off-by: dependabot[bot] Signed-off-by: Dalton Bohning --- .github/dependabot.yml | 2 +- site_scons/components/__init__.py | 4 +- src/client/pydaos/raw/daos_api.py | 129 ++++++++++--------- src/tests/ftest/cart/util/cart_logtest.py | 6 +- src/tests/ftest/nvme/enospace.py | 2 + src/tests/ftest/pool/create_all_hw.py | 4 + src/tests/ftest/pool/create_all_vm.py | 3 + src/tests/ftest/util/pool_create_all_base.py | 3 + utils/cq/d_logging_check.py | 2 +- utils/cq/requirements.txt | 4 +- 10 files changed, 85 insertions(+), 74 deletions(-) diff --git a/.github/dependabot.yml b/.github/dependabot.yml index cc3c1983fc3..a67faa6f34a 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -8,7 +8,7 @@ updates: ignore: - dependency-name: "avocado-framework*" schedule: - interval: daily + interval: weekly groups: python-packages: patterns: diff --git a/site_scons/components/__init__.py b/site_scons/components/__init__.py index 0d074dd3c24..da2518a9928 100644 --- a/site_scons/components/__init__.py +++ b/site_scons/components/__init__.py @@ -32,9 +32,7 @@ # Check if this is an ARM platform PROCESSOR = platform.machine() ARM_LIST = ["ARMv7", "armeabi", "aarch64", "arm64"] -ARM_PLATFORM = False -if PROCESSOR.lower() in [x.lower() for x in ARM_LIST]: - ARM_PLATFORM = True +ARM_PLATFORM = PROCESSOR.lower() in [x.lower() for x in ARM_LIST] class InstalledComps(): diff --git a/src/client/pydaos/raw/daos_api.py b/src/client/pydaos/raw/daos_api.py index e0207374828..76fb026b71d 100644 --- a/src/client/pydaos/raw/daos_api.py +++ b/src/client/pydaos/raw/daos_api.py @@ -403,23 +403,24 @@ class DaosObjClassOld(enum.IntEnum): DAOS_OC_R3S_SPEC_RANK = 21 -# pylint: disable=no-member +# pylint: disable=no-member,invalid-name + ConvertObjClass = { - DaosObjClassOld.DAOS_OC_TINY_RW: DaosObjClass.OC_S1, - DaosObjClassOld.DAOS_OC_SMALL_RW: DaosObjClass.OC_S4, - DaosObjClassOld.DAOS_OC_LARGE_RW: DaosObjClass.OC_SX, - DaosObjClassOld.DAOS_OC_R2S_RW: DaosObjClass.OC_RP_2G1, - DaosObjClassOld.DAOS_OC_R2_RW: DaosObjClass.OC_RP_2G2, - DaosObjClassOld.DAOS_OC_R2_MAX_RW: DaosObjClass.OC_RP_2GX, - DaosObjClassOld.DAOS_OC_R3S_RW: DaosObjClass.OC_RP_3G1, - DaosObjClassOld.DAOS_OC_R3_RW: DaosObjClass.OC_RP_3G2, - DaosObjClassOld.DAOS_OC_R3_MAX_RW: DaosObjClass.OC_RP_3GX, - DaosObjClassOld.DAOS_OC_R4S_RW: DaosObjClass.OC_RP_4G1, - DaosObjClassOld.DAOS_OC_R4_RW: DaosObjClass.OC_RP_4G2, - DaosObjClassOld.DAOS_OC_R4_MAX_RW: DaosObjClass.OC_RP_4GX, - DaosObjClassOld.DAOS_OC_REPL_MAX_RW: DaosObjClass.OC_RP_XSF + DaosObjClassOld.DAOS_OC_TINY_RW: DaosObjClass.OC_S1, # noqa: E241 + DaosObjClassOld.DAOS_OC_SMALL_RW: DaosObjClass.OC_S4, # noqa: E241 + DaosObjClassOld.DAOS_OC_LARGE_RW: DaosObjClass.OC_SX, # noqa: E241 + DaosObjClassOld.DAOS_OC_R2S_RW: DaosObjClass.OC_RP_2G1, # noqa: E241 + DaosObjClassOld.DAOS_OC_R2_RW: DaosObjClass.OC_RP_2G2, # noqa: E241 + DaosObjClassOld.DAOS_OC_R2_MAX_RW: DaosObjClass.OC_RP_2GX, # noqa: E241 + DaosObjClassOld.DAOS_OC_R3S_RW: DaosObjClass.OC_RP_3G1, # noqa: E241 + DaosObjClassOld.DAOS_OC_R3_RW: DaosObjClass.OC_RP_3G2, # noqa: E241 + DaosObjClassOld.DAOS_OC_R3_MAX_RW: DaosObjClass.OC_RP_3GX, # noqa: E241 + DaosObjClassOld.DAOS_OC_R4S_RW: DaosObjClass.OC_RP_4G1, # noqa: E241 + DaosObjClassOld.DAOS_OC_R4_RW: DaosObjClass.OC_RP_4G2, # noqa: E241 + DaosObjClassOld.DAOS_OC_R4_MAX_RW: DaosObjClass.OC_RP_4GX, # noqa: E241 + DaosObjClassOld.DAOS_OC_REPL_MAX_RW: DaosObjClass.OC_RP_XSF # noqa: E241 } -# pylint: enable=no-member +# pylint: enable=no-member,invalid-name def get_object_class(item): @@ -2307,55 +2308,55 @@ def __init__(self, path): # Note: action-subject format self.ftable = { - 'close-cont': self.libdaos.daos_cont_close, - 'close-obj': self.libdaos.daos_obj_close, - 'close-tx': self.libdaos.daos_tx_close, - 'commit-tx': self.libdaos.daos_tx_commit, - 'connect-pool': self.libdaos.daos_pool_connect, - 'convert-cglobal': self.libdaos.daos_cont_global2local, - 'convert-clocal': self.libdaos.daos_cont_local2global, - 'convert-pglobal': self.libdaos.daos_pool_global2local, - 'convert-plocal': self.libdaos.daos_pool_local2global, - 'create-cont': self.libdaos.daos_cont_create, - 'create-eq': self.libdaos.daos_eq_create, - 'create-snap': self.libdaos.daos_cont_create_snap, - 'destroy-cont': self.libdaos.daos_cont_destroy, - 'destroy-eq': self.libdaos.daos_eq_destroy, - 'destroy-snap': self.libdaos.daos_cont_destroy_snap, - 'destroy-tx': self.libdaos.daos_tx_abort, - 'disconnect-pool': self.libdaos.daos_pool_disconnect, - 'fetch-obj': self.libdaos.daos_obj_fetch, - 'generate-oid': self.libdaos.daos_obj_generate_oid, - 'get-cont-attr': self.libdaos.daos_cont_get_attr, - 'get-pool-attr': self.libdaos.daos_pool_get_attr, - 'get-layout': self.libdaos.daos_obj_layout_get, - 'init-event': self.libdaos.daos_event_init, - 'list-akey': self.libdaos.daos_obj_list_akey, - 'list-attr': self.libdaos.daos_cont_list_attr, - 'list-cont-attr': self.libdaos.daos_cont_list_attr, - 'list-dkey': self.libdaos.daos_obj_list_dkey, - 'list-pool-attr': self.libdaos.daos_pool_list_attr, - 'cont-aggregate': self.libdaos.daos_cont_aggregate, - 'list-snap': self.libdaos.daos_cont_list_snap, - 'open-cont': self.libdaos.daos_cont_open, - 'open-obj': self.libdaos.daos_obj_open, - 'open-snap': self.libdaos.daos_tx_open_snap, - 'open-tx': self.libdaos.daos_tx_open, - 'poll-eq': self.libdaos.daos_eq_poll, - 'punch-akeys': self.libdaos.daos_obj_punch_akeys, - 'punch-dkeys': self.libdaos.daos_obj_punch_dkeys, - 'punch-obj': self.libdaos.daos_obj_punch, - 'query-cont': self.libdaos.daos_cont_query, - 'query-obj': self.libdaos.daos_obj_query, - 'query-pool': self.libdaos.daos_pool_query, - 'query-target': self.libdaos.daos_pool_query_target, - 'restart-tx': self.libdaos.daos_tx_restart, - 'set-cont-attr': self.libdaos.daos_cont_set_attr, - 'set-pool-attr': self.libdaos.daos_pool_set_attr, - 'stop-service': self.libdaos.daos_pool_stop_svc, - 'test-event': self.libdaos.daos_event_test, - 'update-obj': self.libdaos.daos_obj_update, - 'oid_gen': self.libtest.dts_oid_gen if self.libtest else None} + 'close-cont': self.libdaos.daos_cont_close, # noqa: E241 + 'close-obj': self.libdaos.daos_obj_close, # noqa: E241 + 'close-tx': self.libdaos.daos_tx_close, # noqa: E241 + 'commit-tx': self.libdaos.daos_tx_commit, # noqa: E241 + 'connect-pool': self.libdaos.daos_pool_connect, # noqa: E241 + 'convert-cglobal': self.libdaos.daos_cont_global2local, # noqa: E241 + 'convert-clocal': self.libdaos.daos_cont_local2global, # noqa: E241 + 'convert-pglobal': self.libdaos.daos_pool_global2local, # noqa: E241 + 'convert-plocal': self.libdaos.daos_pool_local2global, # noqa: E241 + 'create-cont': self.libdaos.daos_cont_create, # noqa: E241 + 'create-eq': self.libdaos.daos_eq_create, # noqa: E241 + 'create-snap': self.libdaos.daos_cont_create_snap, # noqa: E241 + 'destroy-cont': self.libdaos.daos_cont_destroy, # noqa: E241 + 'destroy-eq': self.libdaos.daos_eq_destroy, # noqa: E241 + 'destroy-snap': self.libdaos.daos_cont_destroy_snap, # noqa: E241 + 'destroy-tx': self.libdaos.daos_tx_abort, # noqa: E241 + 'disconnect-pool': self.libdaos.daos_pool_disconnect, # noqa: E241 + 'fetch-obj': self.libdaos.daos_obj_fetch, # noqa: E241 + 'generate-oid': self.libdaos.daos_obj_generate_oid, # noqa: E241 + 'get-cont-attr': self.libdaos.daos_cont_get_attr, # noqa: E241 + 'get-pool-attr': self.libdaos.daos_pool_get_attr, # noqa: E241 + 'get-layout': self.libdaos.daos_obj_layout_get, # noqa: E241 + 'init-event': self.libdaos.daos_event_init, # noqa: E241 + 'list-akey': self.libdaos.daos_obj_list_akey, # noqa: E241 + 'list-attr': self.libdaos.daos_cont_list_attr, # noqa: E241 + 'list-cont-attr': self.libdaos.daos_cont_list_attr, # noqa: E241 + 'list-dkey': self.libdaos.daos_obj_list_dkey, # noqa: E241 + 'list-pool-attr': self.libdaos.daos_pool_list_attr, # noqa: E241 + 'cont-aggregate': self.libdaos.daos_cont_aggregate, # noqa: E241 + 'list-snap': self.libdaos.daos_cont_list_snap, # noqa: E241 + 'open-cont': self.libdaos.daos_cont_open, # noqa: E241 + 'open-obj': self.libdaos.daos_obj_open, # noqa: E241 + 'open-snap': self.libdaos.daos_tx_open_snap, # noqa: E241 + 'open-tx': self.libdaos.daos_tx_open, # noqa: E241 + 'poll-eq': self.libdaos.daos_eq_poll, # noqa: E241 + 'punch-akeys': self.libdaos.daos_obj_punch_akeys, # noqa: E241 + 'punch-dkeys': self.libdaos.daos_obj_punch_dkeys, # noqa: E241 + 'punch-obj': self.libdaos.daos_obj_punch, # noqa: E241 + 'query-cont': self.libdaos.daos_cont_query, # noqa: E241 + 'query-obj': self.libdaos.daos_obj_query, # noqa: E241 + 'query-pool': self.libdaos.daos_pool_query, # noqa: E241 + 'query-target': self.libdaos.daos_pool_query_target, # noqa: E241 + 'restart-tx': self.libdaos.daos_tx_restart, # noqa: E241 + 'set-cont-attr': self.libdaos.daos_cont_set_attr, # noqa: E241 + 'set-pool-attr': self.libdaos.daos_pool_set_attr, # noqa: E241 + 'stop-service': self.libdaos.daos_pool_stop_svc, # noqa: E241 + 'test-event': self.libdaos.daos_event_test, # noqa: E241 + 'update-obj': self.libdaos.daos_obj_update, # noqa: E241 + 'oid_gen': self.libtest.dts_oid_gen if self.libtest else None} # noqa: E241 def get_function(self, function): """Get a function handle by name. diff --git a/src/tests/ftest/cart/util/cart_logtest.py b/src/tests/ftest/cart/util/cart_logtest.py index e0cfe5bd3bb..decfa9fd015 100755 --- a/src/tests/ftest/cart/util/cart_logtest.py +++ b/src/tests/ftest/cart/util/cart_logtest.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 # # (C) Copyright 2018-2024 Intel Corporation +# (C) Copyright 2025 Hewlett Packard Enterprise Development LP # # SPDX-License-Identifier: BSD-2-Clause-Patent @@ -14,11 +15,10 @@ import cart_logparse -HAVE_TABULATE = True try: import tabulate except ImportError: - HAVE_TABULATE = False + tabulate = None class LogCheckError(Exception): @@ -766,7 +766,7 @@ def report(self): errors.append("ERROR: Opcode {}: Alloc'd Total = {}, Dealloc'd Total = {}". format(operation, counts['ALLOCATED'], counts['DEALLOCATED'])) - if HAVE_TABULATE: + if tabulate is not None: print('Opcode State Transition Tally') print(tabulate.tabulate(table, headers=headers, diff --git a/src/tests/ftest/nvme/enospace.py b/src/tests/ftest/nvme/enospace.py index 89376c9427d..1b85b26e1f4 100644 --- a/src/tests/ftest/nvme/enospace.py +++ b/src/tests/ftest/nvme/enospace.py @@ -461,6 +461,7 @@ def run_enospace_foreground(self, log_file): # Fill 75% of current SCM free space. Aggregation is Enabled so NVMe space will # start to fill up. + # pylint: disable-next=logging-too-few-args self.log.info('--Filling 75% of the current SCM free space--') try: self.start_ior_load(storage='SCM', operation="Auto_Write", percent=75) @@ -477,6 +478,7 @@ def run_enospace_foreground(self, log_file): # Fill 60% of current SCM free space. This time, NVMe will be Full so data will # not be moved to NVMe and continue to fill up SCM. SCM will be full and this # command is expected to fail with DER_NOSPACE. + # pylint: disable-next=logging-too-few-args self.log.info('--Filling 60% of the current SCM free space--') try: self.start_ior_load( diff --git a/src/tests/ftest/pool/create_all_hw.py b/src/tests/ftest/pool/create_all_hw.py index 2762cd07407..66f0cec8080 100644 --- a/src/tests/ftest/pool/create_all_hw.py +++ b/src/tests/ftest/pool/create_all_hw.py @@ -1,5 +1,6 @@ """ (C) Copyright 2022-2023 Intel Corporation. +(C) Copyright 2025 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -108,17 +109,20 @@ def test_two_pools_hw(self): pool_half_deltas_bytes = self.get_deltas("test_two_pools_hw", "pool_half") pool_full_deltas_bytes = self.get_deltas("test_two_pools_hw", "pool_full") distribution_deltas_bytes = self.get_deltas("test_two_pools_hw", "distribution") + # pylint: disable-next=logging-too-few-args self.log.info( "Test pool creation of two pools with 50% and 100% of the available storage") for name in ('pool_half', 'pool_full', 'distribution'): val = locals()["{}_deltas_bytes".format(name)] self.log_deltas(*val, prefix=name) + # pylint: disable-next=logging-format-truncated self.log.info("Creating first pool with half of the available storage: size=50%") self.check_pool_half_storage(*pool_half_deltas_bytes) self.log.info("Checking data distribution among the different engines") self.check_pool_distribution(*distribution_deltas_bytes) + # pylint: disable-next=logging-format-truncated self.log.info("Creating second pool with all the available storage: size=100%") self.check_pool_full_storage(*pool_full_deltas_bytes) diff --git a/src/tests/ftest/pool/create_all_vm.py b/src/tests/ftest/pool/create_all_vm.py index eeaee46b9db..43830531712 100644 --- a/src/tests/ftest/pool/create_all_vm.py +++ b/src/tests/ftest/pool/create_all_vm.py @@ -156,6 +156,7 @@ def test_two_pools_vm(self): "distribution", "/run/test_two_pools_vm/deltas/*", 0) + # pylint: disable-next=logging-too-few-args self.log.info( "Test pool creation of two pools with 50% and 100% of the available storage") for name in ('pool_half', 'pool_full', 'distribution'): @@ -163,11 +164,13 @@ def test_two_pools_vm(self): self.log.info("\t- %s=%s (%d Bytes)", name, bytes_to_human(val), val) self.log.info("\t- scm_hugepages_enabled=%s", self.scm_hugepages_enabled) + # pylint: disable-next=logging-format-truncated self.log.info("Creating first pool with half of the available storage: size=50%") self.check_pool_half_storage(pool_half_delta_bytes) self.log.info("Checking data distribution among the different engines") self.check_pool_distribution(distribution_delta_bytes) + # pylint: disable-next=logging-format-truncated self.log.info("Creating second pool with all the available storage: size=100%") self.check_pool_full_storage(pool_full_delta_bytes) diff --git a/src/tests/ftest/util/pool_create_all_base.py b/src/tests/ftest/util/pool_create_all_base.py index ff72e2f077f..660626c2c8e 100644 --- a/src/tests/ftest/util/pool_create_all_base.py +++ b/src/tests/ftest/util/pool_create_all_base.py @@ -1,5 +1,6 @@ """ (C) Copyright 2022-2024 Intel Corporation. +(C) Copyright 2025 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -109,6 +110,7 @@ def check_pool_full_storage(self, scm_delta_bytes, nvme_delta_bytes=None, ranks= self.add_pool_qty(pool_count, create=False) pool_idx = len(self.pool) - pool_count + # pylint: disable-next=logging-format-truncated self.log.info("Creating a pool with all the available storage: size=100%") self.pool[pool_idx].size.update("100%", "pool[{}].size".format(pool_idx)) if ranks is not None: @@ -362,6 +364,7 @@ def check_pool_half_storage(self, scm_delta_bytes, nvme_delta_bytes=None): usable_bytes = self.get_usable_bytes() self.log.info("Usable bytes: scm_size=%d, nvme_size=%d", *usable_bytes) + # pylint: disable-next=logging-format-truncated self.log.info("Creating pool with half of the available storage: size=50%") self.pool[0].size.update("50%") self.pool[0].create() diff --git a/utils/cq/d_logging_check.py b/utils/cq/d_logging_check.py index 328f0155469..d3d2b3d84ce 100755 --- a/utils/cq/d_logging_check.py +++ b/utils/cq/d_logging_check.py @@ -18,7 +18,7 @@ import re import sys -ARGS = None +ARGS = None # pylint: disable=invalid-name class FileLine(): diff --git a/utils/cq/requirements.txt b/utils/cq/requirements.txt index b3fb1ab2f2f..dd4ce3ee6be 100644 --- a/utils/cq/requirements.txt +++ b/utils/cq/requirements.txt @@ -3,8 +3,8 @@ pyenchant ## flake8 6 removed --diff option which breaks flake precommit hook. ## https://github.com/pycqa/flake8/issues/1389 https://github.com/PyCQA/flake8/pull/1720 flake8==7.3.0 -isort==6.1.0 -pylint==3.3.9 +isort==7.0.0 +pylint==4.0.1 yamllint==1.37.1 codespell==2.4.1 # Used by ci/jira_query.py which pip installs it standalone. From 7ac9fa07600eedf1d512f668e55fd41b7f39d15c Mon Sep 17 00:00:00 2001 From: Cedric Koch-Hofer <94527853+knard38@users.noreply.github.com> Date: Wed, 3 Dec 2025 17:11:23 +0100 Subject: [PATCH 051/253] DAOS-17322 ddb: DTX aggregation with ddb (#17022) Fix spelling issue and code style. Signed-off-by: Cedric Koch-Hofer --- src/control/cmd/ddb/ddb_commands.go | 2 +- src/include/daos_srv/vos.h | 3 +- src/utils/ddb/ddb_commands.c | 55 ++++++++++++++++++----------- src/vos/vos_dtx.c | 48 +++++++++++++++---------- 4 files changed, 67 insertions(+), 41 deletions(-) diff --git a/src/control/cmd/ddb/ddb_commands.go b/src/control/cmd/ddb/ddb_commands.go index bee8aeddc4d..aa8bb7f968a 100644 --- a/src/control/cmd/ddb/ddb_commands.go +++ b/src/control/cmd/ddb/ddb_commands.go @@ -434,7 +434,7 @@ the path must include the extent, otherwise, it must not.`, a.String("path", "Optional, VOS tree path of a container to aggregate.", grumble.Default("")) }, Flags: func(f *grumble.Flags) { - f.Uint64("t", "cmt_time", math.MaxUint64, "Max aggregfation committed time in seconds") + f.Uint64("t", "cmt_time", math.MaxUint64, "Max aggregation committed time in seconds") f.String("d", "cmt_date", "", "Max aggregation committed date (format '1970-01-01 00:00:00')") }, Run: func(c *grumble.Context) error { diff --git a/src/include/daos_srv/vos.h b/src/include/daos_srv/vos.h index 3eef3f4702d..f152a11f39a 100644 --- a/src/include/daos_srv/vos.h +++ b/src/include/daos_srv/vos.h @@ -1799,7 +1799,8 @@ bool vos_oi_exist(daos_handle_t coh, daos_unit_oid_t oid); /* Timing statistic of DTX entries */ -#define DTX_TIME_STAT_COUNT 3 +enum { DTX_TIME_STAT_MIN = 0, DTX_TIME_STAT_MAX, DTX_TIME_STAT_MEAN, DTX_TIME_STAT_COUNT }; + struct dtx_time_stat { daos_epoch_t dts_epoch[DTX_TIME_STAT_COUNT]; uint64_t dts_cmt_time[DTX_TIME_STAT_COUNT]; diff --git a/src/utils/ddb/ddb_commands.c b/src/utils/ddb/ddb_commands.c index 0e26686a24c..705c0eaabda 100644 --- a/src/utils/ddb/ddb_commands.c +++ b/src/utils/ddb/ddb_commands.c @@ -1506,35 +1506,48 @@ dtx_stat_cont_cb(daos_handle_t ih, vos_iter_entry_t *entry, vos_iter_type_t type if (args->opt->details) { if (args->aggr_epoch < args_tmp.aggr_epoch) args->aggr_epoch = args_tmp.aggr_epoch; - if (args->time_stat.dts_cmt_time[0] > args_tmp.time_stat.dts_cmt_time[0]) - args->time_stat.dts_cmt_time[0] = args_tmp.time_stat.dts_cmt_time[0]; - if (args->time_stat.dts_cmt_time[1] < args_tmp.time_stat.dts_cmt_time[1]) - args->time_stat.dts_cmt_time[1] = args_tmp.time_stat.dts_cmt_time[1]; - if (args->time_stat.dts_cmt_time[2] == 0) - args->time_stat.dts_cmt_time[2] = args_tmp.time_stat.dts_cmt_time[2]; + if (args->time_stat.dts_cmt_time[DTX_TIME_STAT_MIN] > + args_tmp.time_stat.dts_cmt_time[DTX_TIME_STAT_MIN]) + args->time_stat.dts_cmt_time[DTX_TIME_STAT_MIN] = + args_tmp.time_stat.dts_cmt_time[DTX_TIME_STAT_MIN]; + if (args->time_stat.dts_cmt_time[DTX_TIME_STAT_MAX] < + args_tmp.time_stat.dts_cmt_time[DTX_TIME_STAT_MAX]) + args->time_stat.dts_cmt_time[DTX_TIME_STAT_MAX] = + args_tmp.time_stat.dts_cmt_time[DTX_TIME_STAT_MAX]; + if (args->time_stat.dts_cmt_time[DTX_TIME_STAT_MEAN] == 0) + args->time_stat.dts_cmt_time[DTX_TIME_STAT_MEAN] = + args_tmp.time_stat.dts_cmt_time[DTX_TIME_STAT_MEAN]; else { long double tmp_mean; - tmp_mean = args->time_stat.dts_cmt_time[2] * (long double)args->cmt_cnt; - tmp_mean += (long double)args_tmp.time_stat.dts_cmt_time[2] * - (long double)args_tmp.cmt_cnt; + tmp_mean = args->time_stat.dts_cmt_time[DTX_TIME_STAT_MEAN] * + (long double)args->cmt_cnt; + tmp_mean += + (long double)args_tmp.time_stat.dts_cmt_time[DTX_TIME_STAT_MEAN] * + (long double)args_tmp.cmt_cnt; tmp_mean /= (long double)(args->cmt_cnt + args_tmp.cmt_cnt); - args->time_stat.dts_cmt_time[2] = tmp_mean; + args->time_stat.dts_cmt_time[DTX_TIME_STAT_MEAN] = tmp_mean; } - if (args->time_stat.dts_epoch[0] > args_tmp.time_stat.dts_epoch[0]) - args->time_stat.dts_epoch[0] = args_tmp.time_stat.dts_epoch[0]; - if (args->time_stat.dts_epoch[1] < args_tmp.time_stat.dts_epoch[1]) - args->time_stat.dts_epoch[1] = args_tmp.time_stat.dts_epoch[1]; - if (args->time_stat.dts_epoch[2] == 0) - args->time_stat.dts_epoch[2] = args_tmp.time_stat.dts_epoch[2]; + if (args->time_stat.dts_epoch[DTX_TIME_STAT_MIN] > + args_tmp.time_stat.dts_epoch[DTX_TIME_STAT_MIN]) + args->time_stat.dts_epoch[DTX_TIME_STAT_MIN] = + args_tmp.time_stat.dts_epoch[DTX_TIME_STAT_MIN]; + if (args->time_stat.dts_epoch[DTX_TIME_STAT_MAX] < + args_tmp.time_stat.dts_epoch[DTX_TIME_STAT_MAX]) + args->time_stat.dts_epoch[DTX_TIME_STAT_MAX] = + args_tmp.time_stat.dts_epoch[DTX_TIME_STAT_MAX]; + if (args->time_stat.dts_epoch[DTX_TIME_STAT_MEAN] == 0) + args->time_stat.dts_epoch[DTX_TIME_STAT_MEAN] = + args_tmp.time_stat.dts_epoch[DTX_TIME_STAT_MEAN]; else { long double tmp_mean; - tmp_mean = args->time_stat.dts_epoch[2] * (long double)args->cmt_cnt; - tmp_mean += (long double)args_tmp.time_stat.dts_epoch[2] * + tmp_mean = args->time_stat.dts_epoch[DTX_TIME_STAT_MEAN] * + (long double)args->cmt_cnt; + tmp_mean += (long double)args_tmp.time_stat.dts_epoch[DTX_TIME_STAT_MEAN] * (long double)args_tmp.cmt_cnt; tmp_mean /= (long double)(args->cmt_cnt + args_tmp.cmt_cnt); - args->time_stat.dts_epoch[2] = tmp_mean; + args->time_stat.dts_epoch[DTX_TIME_STAT_MEAN] = tmp_mean; } } @@ -1592,8 +1605,8 @@ ddb_run_dtx_stat(struct ddb_ctx *ctx, struct dtx_stat_options *opt) } args.cmt_cnt = 0; - args.time_stat.dts_cmt_time[0] = UINT64_MAX; - args.time_stat.dts_epoch[0] = DAOS_EPOCH_MAX; + args.time_stat.dts_cmt_time[DTX_TIME_STAT_MIN] = UINT64_MAX; + args.time_stat.dts_epoch[DTX_TIME_STAT_MIN] = DAOS_EPOCH_MAX; param.ip_hdl = ctx->dc_poh; param.ip_epr.epr_hi = DAOS_EPOCH_MAX; do { diff --git a/src/vos/vos_dtx.c b/src/vos/vos_dtx.c index 86005bca3bc..c64adc12bb0 100644 --- a/src/vos/vos_dtx.c +++ b/src/vos/vos_dtx.c @@ -4070,10 +4070,12 @@ vos_dtx_local_end(struct dtx_handle *dth, int result) return result; } +enum { DTS_EPOCH_ACC = 0, DTS_CMT_TIME_ACC, DTS_ACC_COUNT }; + struct dtx_time_stat_priv { struct dtx_time_stat dts_pub; /* DAOS-17322: Use of floating point to avoid integer overflow issue */ - long double dts_mean[2]; + long double dts_mean[DTS_ACC_COUNT]; }; int @@ -4099,8 +4101,8 @@ vos_dtx_get_cmt_stat(daos_handle_t coh, uint64_t *cmt_cnt, struct dtx_time_stat cmt_cnt_tmp = 0; umm = vos_cont2umm(cont); dbd = umem_off2ptr(umm, cont->vc_cont_df->cd_dtx_committed_head); - dts_tmp.dts_pub.dts_epoch[0] = DAOS_EPOCH_MAX; - dts_tmp.dts_pub.dts_cmt_time[0] = UINT64_MAX; + dts_tmp.dts_pub.dts_epoch[DTX_TIME_STAT_MIN] = DAOS_EPOCH_MAX; + dts_tmp.dts_pub.dts_cmt_time[DTX_TIME_STAT_MIN] = UINT64_MAX; while (dbd != NULL) { if (dbd->dbd_magic != DTX_CMT_BLOB_MAGIC) { D_ERROR("Committed DTX blob with bad magic: container=" DF_UUID @@ -4119,17 +4121,25 @@ vos_dtx_get_cmt_stat(daos_handle_t coh, uint64_t *cmt_cnt, struct dtx_time_stat dce_df = &dbd->dbd_committed_data[i]; - if (dts_tmp.dts_pub.dts_epoch[0] > dce_df->dce_epoch) - dts_tmp.dts_pub.dts_epoch[0] = dce_df->dce_epoch; - if (dts_tmp.dts_pub.dts_epoch[1] < dce_df->dce_epoch) - dts_tmp.dts_pub.dts_epoch[1] = dce_df->dce_epoch; - dts_tmp.dts_mean[0] += dce_df->dce_epoch; - - if (dts_tmp.dts_pub.dts_cmt_time[0] > dce_df->dce_cmt_time) - dts_tmp.dts_pub.dts_cmt_time[0] = dce_df->dce_cmt_time; - if (dts_tmp.dts_pub.dts_cmt_time[1] < dce_df->dce_cmt_time) - dts_tmp.dts_pub.dts_cmt_time[1] = dce_df->dce_cmt_time; - dts_tmp.dts_mean[1] += dce_df->dce_cmt_time; + if (dts_tmp.dts_pub.dts_epoch[DTX_TIME_STAT_MIN] > + dce_df->dce_epoch) + dts_tmp.dts_pub.dts_epoch[DTX_TIME_STAT_MIN] = + dce_df->dce_epoch; + if (dts_tmp.dts_pub.dts_epoch[DTX_TIME_STAT_MAX] < + dce_df->dce_epoch) + dts_tmp.dts_pub.dts_epoch[DTX_TIME_STAT_MAX] = + dce_df->dce_epoch; + dts_tmp.dts_mean[DTS_EPOCH_ACC] += dce_df->dce_epoch; + + if (dts_tmp.dts_pub.dts_cmt_time[DTX_TIME_STAT_MIN] > + dce_df->dce_cmt_time) + dts_tmp.dts_pub.dts_cmt_time[DTX_TIME_STAT_MIN] = + dce_df->dce_cmt_time; + if (dts_tmp.dts_pub.dts_cmt_time[DTX_TIME_STAT_MAX] < + dce_df->dce_cmt_time) + dts_tmp.dts_pub.dts_cmt_time[DTX_TIME_STAT_MAX] = + dce_df->dce_cmt_time; + dts_tmp.dts_mean[DTS_CMT_TIME_ACC] += dce_df->dce_cmt_time; } } @@ -4140,11 +4150,13 @@ vos_dtx_get_cmt_stat(daos_handle_t coh, uint64_t *cmt_cnt, struct dtx_time_stat if (dts != NULL) { if (cmt_cnt_tmp != 0) { - dts_tmp.dts_mean[0] /= (long double)cmt_cnt_tmp; - dts_tmp.dts_pub.dts_epoch[2] = (daos_epoch_t)dts_tmp.dts_mean[0]; + dts_tmp.dts_mean[DTS_EPOCH_ACC] /= (long double)cmt_cnt_tmp; + dts_tmp.dts_pub.dts_epoch[DTX_TIME_STAT_MEAN] = + (daos_epoch_t)dts_tmp.dts_mean[DTS_EPOCH_ACC]; - dts_tmp.dts_mean[1] /= (long double)cmt_cnt_tmp; - dts_tmp.dts_pub.dts_cmt_time[2] = (uint64_t)dts_tmp.dts_mean[1]; + dts_tmp.dts_mean[DTS_CMT_TIME_ACC] /= (long double)cmt_cnt_tmp; + dts_tmp.dts_pub.dts_cmt_time[DTX_TIME_STAT_MEAN] = + (uint64_t)dts_tmp.dts_mean[DTS_CMT_TIME_ACC]; } memcpy(dts, &dts_tmp, sizeof(struct dtx_time_stat)); From a02d84f67d450d7ed7779fce344d25ef183c4bd2 Mon Sep 17 00:00:00 2001 From: Tom Nabarro Date: Thu, 4 Dec 2025 14:11:08 +0000 Subject: [PATCH 052/253] DAOS-18233 test: Fix missing self_heal_policy in list_verbose.py (#17207) Signed-off-by: Tom Nabarro --- src/tests/ftest/pool/list_verbose.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/tests/ftest/pool/list_verbose.py b/src/tests/ftest/pool/list_verbose.py index f75320b17e5..5cd429a748b 100644 --- a/src/tests/ftest/pool/list_verbose.py +++ b/src/tests/ftest/pool/list_verbose.py @@ -84,6 +84,7 @@ def create_expected(self, pool, scm_free, nvme_free, scm_imbalance, "records": 0, "total_objects": 0 }, + "self_heal_policy": "", # NB: tests should not expect min/max/mean values "tier_stats": [ { From 7bae05c920555fad7dae734e789db359b49a3fd1 Mon Sep 17 00:00:00 2001 From: Tom Nabarro Date: Thu, 4 Dec 2025 14:14:15 +0000 Subject: [PATCH 053/253] DAOS-18300 control: Set system name in SystemGetProp upcall (#17209) Signed-off-by: Tom Nabarro --- src/control/server/mgmt_drpc.go | 1 + 1 file changed, 1 insertion(+) diff --git a/src/control/server/mgmt_drpc.go b/src/control/server/mgmt_drpc.go index 9dc06d9f6cf..03bc64983b1 100644 --- a/src/control/server/mgmt_drpc.go +++ b/src/control/server/mgmt_drpc.go @@ -297,6 +297,7 @@ func (mod *srvModule) handleGetSysProps(reqb []byte) ([]byte, error) { msReq.Keys = append(msReq.Keys, t) } msReq.SetHostList(mod.msReplicas) + msReq.SetSystem(req.Sys) msResp, err := control.SystemGetProp(ctx, mod.client, msReq) if err != nil { From 317a7bd0cea23c77512cb99e23203ceae8276588 Mon Sep 17 00:00:00 2001 From: Cedric Koch-Hofer <94527853+knard38@users.noreply.github.com> Date: Thu, 4 Dec 2025 15:38:30 +0100 Subject: [PATCH 054/253] DAOS-18284 ddb: display epoch when listing akey contents with ddb (#17185) As illustrated by the following trace, several upload of the same RECX are displayed with the same info with the ls command. ``` ddb: ls /[0]/[1]/[0]/[0] Listing contents of 'AKEY: (/[0]/[1]/[0]/[0]) /32c0eb32-0f21-49c9-877e-d9b78e8bca03/281479271677952.999.0.2/dkey/akey' RECX: (/[0]/[1]/[0]/[0]/[0]) /32c0eb32-0f21-49c9-877e-d9b78e8bca03/281479271677952.999.0.2/dkey/akey/{128-191} RECX: (/[0]/[1]/[0]/[0]/[1]) /32c0eb32-0f21-49c9-877e-d9b78e8bca03/281479271677952.999.0.2/dkey/akey/{128-191} RECX: (/[0]/[1]/[0]/[0]/[2]) /32c0eb32-0f21-49c9-877e-d9b78e8bca03/281479271677952.999.0.2/dkey/akey/{128-191} RECX: (/[0]/[1]/[0]/[0]/[3]) /32c0eb32-0f21-49c9-877e-d9b78e8bca03/281479271677952.999.0.2/dkey/akey/{128-191} ddb: ls -d /[0]/[1]/[0]/[0] Listing contents of 'AKEY: (/[0]/[1]/[0]/[0]) /32c0eb32-0f21-49c9-877e-d9b78e8bca03/281479271677952.999.0.2/dkey/akey' [0] Array Value (Length: 64 records, Record Indexes: {128-191}, Record Size: 1) [1] Array Value (Length: 64 records, Record Indexes: {128-191}, Record Size: 1) [2] Array Value (Length: 64 records, Record Indexes: {128-191}, Record Size: 1) [3] Array Value (Length: 64 records, Record Indexes: {128-191}, Record Size: 1) ``` As illustrated by the following trace, this PR add a new `epoch` field allowing to easily find which RECX is visible. ``` ddb: ls -d /[0]/[1]/[0]/[0] Listing contents of 'AKEY: (/[0]/[1]/[0]/[0]) /32c0eb32-0f21-49c9-877e-d9b78e8bca03/281479271677952.999.0.2/dkey/akey' [0] Array Value (Length: 64 records, Record Indexes: {128-191}, Record Size: 1, Epoch: 2475435709424205824) [1] Array Value (Length: 64 records, Record Indexes: {128-191}, Record Size: 1, Epoch: 2475435661897236480) [2] Array Value (Length: 64 records, Record Indexes: {128-191}, Record Size: 1, Epoch: 2475435607650205696) [3] Array Value (Length: 64 records, Record Indexes: {128-191}, Record Size: 1, Epoch: 2475435507796148224) ``` This new field will also be needed for being able to use efficiently the new checksum command introduced by the ticket [DAOS-17321](https://daosio.atlassian.net/browse/DAOS-17321) Signed-off-by: Cedric Koch-Hofer --- src/utils/ddb/ddb_printer.c | 19 +++++++++---------- src/utils/ddb/ddb_vos.c | 12 +++++++----- src/utils/ddb/ddb_vos.h | 17 +++++++++-------- .../ddb/tests/ddb_commands_print_tests.c | 16 +++++++++------- 4 files changed, 34 insertions(+), 30 deletions(-) diff --git a/src/utils/ddb/ddb_printer.c b/src/utils/ddb/ddb_printer.c index aadd7cd4b48..99302ece91c 100644 --- a/src/utils/ddb/ddb_printer.c +++ b/src/utils/ddb/ddb_printer.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2022-2024 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -135,22 +136,20 @@ void ddb_print_sv(struct ddb_ctx *ctx, struct ddb_sv *sv, uint32_t indent) { print_indent(ctx, indent); - ddb_printf(ctx, DF_IDX" Single Value (Length: "DF_U64" bytes)\n", - sv->ddbs_idx, - sv->ddbs_record_size); + ddb_printf(ctx, DF_IDX " Single Value (Length: " DF_U64 " bytes, Epoch: " DF_U64 ")\n", + sv->ddbs_idx, sv->ddbs_record_size, sv->ddbs_epoch); } void ddb_print_array(struct ddb_ctx *ctx, struct ddb_array *array, uint32_t indent) { print_indent(ctx, indent); - ddb_printf(ctx, DF_IDX" Array Value (Length: "DF_U64" records, Record Indexes: " - "{"DF_U64"-"DF_U64"}, Record Size: "DF_U64")\n", - array->ddba_idx, - array->ddba_recx.rx_nr, - array->ddba_recx.rx_idx, - array->ddba_recx.rx_idx + array->ddba_recx.rx_nr - 1, - array->ddba_record_size); + ddb_printf(ctx, + DF_IDX " Array Value (Length: " DF_U64 " records, Record Indexes: " + "{" DF_U64 "-" DF_U64 "}, Record Size: " DF_U64 ", Epoch: " DF_U64 ")\n", + array->ddba_idx, array->ddba_recx.rx_nr, array->ddba_recx.rx_idx, + array->ddba_recx.rx_idx + array->ddba_recx.rx_nr - 1, array->ddba_record_size, + array->ddba_epoch); } void diff --git a/src/utils/ddb/ddb_vos.c b/src/utils/ddb/ddb_vos.c index 5d53e8b3609..22160ae0e8f 100644 --- a/src/utils/ddb/ddb_vos.c +++ b/src/utils/ddb/ddb_vos.c @@ -778,8 +778,9 @@ handle_sv(struct ddb_iter_ctx *ctx, vos_iter_entry_t *entry) D_ASSERT(ctx && ctx->handlers && ctx->handlers->ddb_sv_handler); value.ddbs_record_size = entry->ie_rsize; - value.ddbs_idx = ctx->value_seen++; - value.ddbs_path = &ctx->itp; + value.ddbs_epoch = entry->ie_epoch; + value.ddbs_idx = ctx->value_seen++; + value.ddbs_path = &ctx->itp; return ctx->handlers->ddb_sv_handler(&value, ctx->handler_args); } @@ -791,10 +792,11 @@ handle_array(struct ddb_iter_ctx *ctx, vos_iter_entry_t *entry) D_ASSERT(ctx && ctx->handlers && ctx->handlers->ddb_array_handler); itp_set_recx(&ctx->itp, &entry->ie_orig_recx, ctx->value_seen); - value.ddba_path = &ctx->itp; + value.ddba_path = &ctx->itp; value.ddba_record_size = entry->ie_rsize; - value.ddba_recx = entry->ie_orig_recx; - value.ddba_idx = ctx->value_seen++; + value.ddba_recx = entry->ie_orig_recx; + value.ddba_epoch = entry->ie_epoch; + value.ddba_idx = ctx->value_seen++; return ctx->handlers->ddb_array_handler(&value, ctx->handler_args); } diff --git a/src/utils/ddb/ddb_vos.h b/src/utils/ddb/ddb_vos.h index 465ad12ccfc..3303c643340 100644 --- a/src/utils/ddb/ddb_vos.h +++ b/src/utils/ddb/ddb_vos.h @@ -36,17 +36,18 @@ struct ddb_key { }; struct ddb_sv { - uint64_t ddbs_record_size; - uint32_t ddbs_idx; - struct dv_indexed_tree_path *ddbs_path; + uint64_t ddbs_record_size; + daos_epoch_t ddbs_epoch; + uint32_t ddbs_idx; + struct dv_indexed_tree_path *ddbs_path; }; struct ddb_array { - uint64_t ddba_record_size; - daos_recx_t ddba_recx; - uint32_t ddba_idx; - struct dv_indexed_tree_path *ddba_path; - + uint64_t ddba_record_size; + daos_recx_t ddba_recx; + daos_epoch_t ddba_epoch; + uint32_t ddba_idx; + struct dv_indexed_tree_path *ddba_path; }; /* Open and close a pool for a ddb_ctx */ diff --git a/src/utils/ddb/tests/ddb_commands_print_tests.c b/src/utils/ddb/tests/ddb_commands_print_tests.c index 5b0d1673816..9b60b070dd5 100644 --- a/src/utils/ddb/tests/ddb_commands_print_tests.c +++ b/src/utils/ddb/tests/ddb_commands_print_tests.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2022-2023 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -146,25 +147,26 @@ print_key_test(void **state) static void print_sv_test(void **state) { - struct ddb_sv sv = {.ddbs_record_size = 19089555}; + struct ddb_sv sv = {.ddbs_record_size = 19089555, .ddbs_epoch = 49126485506073}; ddb_print_sv(&g_ctx, &sv, 0); - assert_printed_exact("[0] Single Value (Length: 19089555 bytes)\n"); + assert_printed_exact("[0] Single Value (Length: 19089555 bytes, Epoch: 49126485506073)\n"); } static void print_array_test(void **state) { struct ddb_array array = { - .ddba_recx.rx_idx = 64, - .ddba_recx.rx_nr = 128, - .ddba_record_size = 3, - .ddba_idx = 8, + .ddba_recx.rx_idx = 64, + .ddba_recx.rx_nr = 128, + .ddba_record_size = 3, + .ddba_idx = 8, + .ddba_epoch = 49126485506073, }; ddb_print_array(&g_ctx, &array, 0); assert_printed_exact("[8] Array Value (Length: 128 records, " - "Record Indexes: {64-191}, Record Size: 3)\n"); + "Record Indexes: {64-191}, Record Size: 3, Epoch: 49126485506073)\n"); } #define assert_hr_bytes(expected_str, bytes) \ From 4f57b0f4577725b3c53babed6f2d52008c69c313 Mon Sep 17 00:00:00 2001 From: Cedric Koch-Hofer <94527853+knard38@users.noreply.github.com> Date: Thu, 4 Dec 2025 15:39:42 +0100 Subject: [PATCH 055/253] DAOS-18283 build: Add C macro constant debug info (#17183) Add C macro constant debug information. Signed-off-by: Cedric Koch-Hofer --- site_scons/site_tools/compiler_setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/site_scons/site_tools/compiler_setup.py b/site_scons/site_tools/compiler_setup.py index 08e97f62d1f..812ef0632dd 100644 --- a/site_scons/site_tools/compiler_setup.py +++ b/site_scons/site_tools/compiler_setup.py @@ -92,7 +92,8 @@ def _base_setup(env): if build_type == 'debug': if compiler == 'gcc': - env.AppendUnique(CCFLAGS=['-Og']) + env['CCFLAGS'].remove('-g') + env.AppendUnique(CCFLAGS=['-g3', '-Og']) else: env.AppendUnique(CCFLAGS=['-O0']) else: From 84bd85a54df620ada2d70727aaaad51fa9bd17a3 Mon Sep 17 00:00:00 2001 From: Jan Michalski Date: Thu, 4 Dec 2025 15:09:43 +0000 Subject: [PATCH 056/253] DAOS-18271 dlck: preallocate VOS files (MD-on-SSD support) (#17190) Signed-off-by: Jan Michalski --- src/utils/dlck/cmds/dlck_cmd_check.c | 7 +++++++ src/utils/dlck/dlck_pool.c | 18 ++++++++++-------- src/utils/dlck/dlck_pool.h | 18 ++++++++++++++++++ 3 files changed, 35 insertions(+), 8 deletions(-) diff --git a/src/utils/dlck/cmds/dlck_cmd_check.c b/src/utils/dlck/cmds/dlck_cmd_check.c index a64ce81d2a4..b48116b6dd6 100644 --- a/src/utils/dlck/cmds/dlck_cmd_check.c +++ b/src/utils/dlck/cmds/dlck_cmd_check.c @@ -33,6 +33,13 @@ pool_process(struct xstream_arg *xa, struct dlck_file *file, struct checker *ck) daos_handle_t poh; int rc; + rc = dlck_pool_file_preallocate(xa->ctrl->engine.storage_path, file->po_uuid, + xa->xs->tgt_id); + CK_PRINTL_RC(ck, xa->rc, "VOS file allocation"); + if (rc != DER_SUCCESS) { + return rc; + } + /** generate a VOS file path */ rc = ds_mgmt_file(xa->ctrl->engine.storage_path, file->po_uuid, VOS_FILE, &xa->xs->tgt_id, &path); diff --git a/src/utils/dlck/dlck_pool.c b/src/utils/dlck/dlck_pool.c index a766d1ed292..f62cc5a7cbc 100644 --- a/src/utils/dlck/dlck_pool.c +++ b/src/utils/dlck/dlck_pool.c @@ -61,12 +61,17 @@ dlck_pool_mkdir_all(const char *storage_path, d_list_t *files, struct checker *c return DER_SUCCESS; } -static int -dlck_file_preallocate(const char *storage_path, uuid_t po_uuid, int tgt_id) +int +dlck_pool_file_preallocate(const char *storage_path, uuid_t po_uuid, int tgt_id) { struct smd_pool_info *pool_info = NULL; int rc; + /** no MD-on-SSD mode means no file preallocation is necessary */ + if (!bio_nvme_configured(SMD_DEV_TYPE_META)) { + return DER_SUCCESS; + } + rc = smd_pool_get_info(po_uuid, &pool_info); if (rc != 0) { return rc; @@ -90,12 +95,9 @@ dlck_pool_open(const char *storage_path, uuid_t po_uuid, int tgt_id, daos_handle return rc; } - /** no MD-on-SSD mode means no file preallocation is necessary */ - if (bio_nvme_configured(SMD_DEV_TYPE_META)) { - rc = dlck_file_preallocate(storage_path, po_uuid, tgt_id); - if (rc != 0) { - goto fail; - } + rc = dlck_pool_file_preallocate(storage_path, po_uuid, tgt_id); + if (rc != DER_SUCCESS) { + goto fail; } rc = vos_pool_open(path, po_uuid, DLCK_POOL_OPEN_FLAGS, poh); diff --git a/src/utils/dlck/dlck_pool.h b/src/utils/dlck/dlck_pool.h index 882cd3bf0f6..fbeea38f9a1 100644 --- a/src/utils/dlck/dlck_pool.h +++ b/src/utils/dlck/dlck_pool.h @@ -45,6 +45,24 @@ dlck_pool_mkdir(const char *storage_path, uuid_t po_uuid, struct checker *ck); int dlck_pool_mkdir_all(const char *storage_path, d_list_t *files, struct checker *ck); +/** + * Allocate the pool file if necessary (MD-on-SSD). + * + * \param[in] storage_path Storage path. + * \param[in] po_uuid Pool UUID. + * \param[in] tgt_id Target ID. + * + * \retval DER_SUCCESS Success. + * \retval -DER_NOMEM Out of memory. + * \retval -DER_NO_PERM Permission problem. Please see open(3) and fallocate(2). + * \retval -DER_EXIST The file already exists. Please see open(3). + * \retval -DER_NONEXIST The file does not exist. Please see open(3). + * \retval -DER_NOSPACE There is not enough space left on the device. + * \retval -DER_* Possibly other errors. + */ +int +dlck_pool_file_preallocate(const char *storage_path, uuid_t po_uuid, int tgt_id); + /** * Open a pool. * From 03947d0a4730af357acb9ba7c2efa683bb30f567 Mon Sep 17 00:00:00 2001 From: Nasf-Fan Date: Thu, 4 Dec 2025 23:10:38 +0800 Subject: [PATCH 057/253] DAOS-18196 object: use large stack for collective object RPC ULT (#17165) To avoid potential ULT stack overflow. Signed-off-by: Fan Yong --- src/dtx/dtx_coll.c | 2 +- src/engine/ult.c | 33 +++++++++--------------------- src/include/daos_srv/daos_engine.h | 6 ++---- src/object/srv_coll.c | 2 +- 4 files changed, 14 insertions(+), 29 deletions(-) diff --git a/src/dtx/dtx_coll.c b/src/dtx/dtx_coll.c index 106205e0c5b..fa5b640cc5a 100644 --- a/src/dtx/dtx_coll.c +++ b/src/dtx/dtx_coll.c @@ -426,7 +426,7 @@ dtx_coll_local_exec(uuid_t po_uuid, uuid_t co_uuid, struct dtx_id *xid, daos_epo coll_args.ca_tgt_bitmap_sz = bitmap_sz; coll_args.ca_tgt_bitmap = bitmap; - rc = dss_thread_collective_reduce(&coll_ops, &coll_args, DSS_USE_CURRENT_ULT); + rc = dss_thread_collective_reduce(&coll_ops, &coll_args, 0); D_CDEBUG(rc < 0, DLOG_ERR, DB_TRACE, "Locally exec collective DTX PRC %u for "DF_DTI": "DF_RC"\n", opc, DP_DTI(xid), DP_RC(rc)); diff --git a/src/engine/ult.c b/src/engine/ult.c index 56dd36009b6..5e39533c202 100644 --- a/src/engine/ult.c +++ b/src/engine/ult.c @@ -92,18 +92,16 @@ dss_collective_reduce_internal(struct dss_coll_ops *ops, struct dss_coll_args *args, bool create_ult, unsigned int flags) { - struct collective_arg carg; - struct dss_coll_stream_args *stream_args; - struct dss_stream_arg_type *stream; - struct aggregator_arg_type aggregator; - struct dss_xstream *dx; - ABT_future future; - int xs_nr; - int rc; - int tid; - int tgt_id = dss_get_module_info()->dmi_tgt_id; - uint32_t bm_len; - bool self = false; + struct dss_coll_stream_args *stream_args; + struct dss_stream_arg_type *stream; + struct dss_xstream *dx; + struct collective_arg carg; + struct aggregator_arg_type aggregator; + ABT_future future; + uint32_t bm_len; + int xs_nr; + int rc; + int tid; if (ops == NULL || args == NULL || ops->co_func == NULL) { D_DEBUG(DB_MD, "mandatory args missing dss_collective_reduce"); @@ -171,11 +169,6 @@ dss_collective_reduce_internal(struct dss_coll_ops *ops, D_ASSERTF(rc == ABT_SUCCESS, "%d\n", rc); continue; } - - if (tgt_id == tid && flags & DSS_USE_CURRENT_ULT) { - self = true; - continue; - } } dx = dss_get_xstream(DSS_MAIN_XS_ID(tid)); @@ -216,12 +209,6 @@ dss_collective_reduce_internal(struct dss_coll_ops *ops, } } - if (self) { - stream = &stream_args->csa_streams[tgt_id]; - stream->st_coll_args = &carg; - collective_func(stream); - } - ABT_future_wait(future); rc = aggregator.at_rc; diff --git a/src/include/daos_srv/daos_engine.h b/src/include/daos_srv/daos_engine.h index f6c14324155..94573b38bc7 100644 --- a/src/include/daos_srv/daos_engine.h +++ b/src/include/daos_srv/daos_engine.h @@ -453,11 +453,9 @@ int dss_parameters_set(unsigned int key_id, uint64_t value); enum dss_ult_flags { /* Periodically created ULTs */ - DSS_ULT_FL_PERIODIC = (1 << 0), + DSS_ULT_FL_PERIODIC = (1 << 0), /* Use DSS_DEEP_STACK_SZ as the stack size */ - DSS_ULT_DEEP_STACK = (1 << 1), - /* Use current ULT (instead of creating new one) for the task. */ - DSS_USE_CURRENT_ULT = (1 << 2), + DSS_ULT_DEEP_STACK = (1 << 1), }; int dss_ult_create(void (*func)(void *), void *arg, int xs_type, int tgt_id, diff --git a/src/object/srv_coll.c b/src/object/srv_coll.c index bc72894dcaa..28e34dd619b 100644 --- a/src/object/srv_coll.c +++ b/src/object/srv_coll.c @@ -70,7 +70,7 @@ obj_coll_local(crt_rpc_t *rpc, struct daos_coll_shard *shards, struct dtx_coll_e coll_args.ca_tgt_bitmap = dce->dce_bitmap; coll_args.ca_tgt_bitmap_sz = dce->dce_bitmap_sz; - rc = dss_thread_collective_reduce(&coll_ops, &coll_args, DSS_USE_CURRENT_ULT); + rc = dss_thread_collective_reduce(&coll_ops, &coll_args, DSS_ULT_DEEP_STACK); out: if (octa.octa_versions != NULL) { From b5a1ba6af88a4d3e053f1b6e194f9285fd75b41b Mon Sep 17 00:00:00 2001 From: Jan Zarzycki Date: Thu, 4 Dec 2025 16:17:22 +0100 Subject: [PATCH 058/253] SRE-3446 ci: storage prep script uses interface name instead of full path (#17084) fix the script to use whole filepath instead of interface name Signed-off-by: Jan Zarzycki Signed-off-by: Tomasz Gromadzki --- ci/storage/test_main_storage_prepare_node.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ci/storage/test_main_storage_prepare_node.sh b/ci/storage/test_main_storage_prepare_node.sh index f87333327b8..a0b6811b5e8 100755 --- a/ci/storage/test_main_storage_prepare_node.sh +++ b/ci/storage/test_main_storage_prepare_node.sh @@ -61,11 +61,11 @@ if ipmctl show -dimm; then fi else counter=0 - for ib in /sys/class/net/ib*; do + for ib_path in /sys/class/net/ib*; do ((counter++)) || true - ip addr show "$ib" + ip addr show "$(basename "$ib_path")" done - if "$counter" -ge 2; then + if [ $counter -ge 2 ]; then # All of our CI nodes with two ib adapters should have PMEM DIMMs echo 'No PMEM DIMM devices found on CI node!' exit 1 From 50f7ab82ba81d839a86a76ee17534b796e3c7a72 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 4 Dec 2025 07:21:03 -0800 Subject: [PATCH 059/253] DAOS-18303 cq: bump GHA versions (#17213) Updates `actions/checkout` from 6.0.0 to 6.0.1 Updates `actions/setup-python` from 6.0.0 to 6.1.0 Updates `github/codeql-action` from 4.31.4 to 4.31.6 Updates `dorny/test-reporter` from 2.2.0 to 2.3.0 Signed-off-by: dependabot[bot] --- .github/workflows/bash_unit_testing.yml | 4 +-- .github/workflows/bullseye-coverage.yml | 8 ++--- .github/workflows/ci2.yml | 4 +-- .github/workflows/create_release.yml | 2 +- .github/workflows/landing-builds.yml | 10 +++---- .github/workflows/linting.yml | 30 +++++++++---------- .github/workflows/ossf-scorecard.yml | 4 +-- .github/workflows/pr-metadata.yml | 2 +- .../workflows/rpm-build-and-test-report.yml | 4 +-- .github/workflows/rpm-build-and-test.yml | 8 ++--- .github/workflows/trivy.yml | 4 +-- .github/workflows/unit-testing.yml | 2 +- 12 files changed, 41 insertions(+), 41 deletions(-) diff --git a/.github/workflows/bash_unit_testing.yml b/.github/workflows/bash_unit_testing.yml index 6cd7e554a5f..6210d3e99e6 100644 --- a/.github/workflows/bash_unit_testing.yml +++ b/.github/workflows/bash_unit_testing.yml @@ -20,11 +20,11 @@ jobs: runs-on: [self-hosted, light] steps: - name: Checkout code - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: ref: ${{ github.event.pull_request.head.sha }} - name: Checkout bash_unit project - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: repository: 'pgrange/bash_unit' path: bash_unit diff --git a/.github/workflows/bullseye-coverage.yml b/.github/workflows/bullseye-coverage.yml index e3a4dd5201f..942b34ae01b 100644 --- a/.github/workflows/bullseye-coverage.yml +++ b/.github/workflows/bullseye-coverage.yml @@ -109,7 +109,7 @@ jobs: matrix: ${{ steps.matrix.outputs.text }} steps: - name: Checkout code - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: ref: ${{ github.event.pull_request.head.sha }} - name: Import commit pragmas @@ -235,7 +235,7 @@ jobs: COMMIT_STATUS_DISTRO_VERSION: steps: - name: Checkout code - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: submodules: 'recursive' fetch-depth: 500 @@ -409,7 +409,7 @@ jobs: matrix: ${{ steps.matrix.outputs.text }} steps: - name: Checkout code - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: ref: ${{ github.event.pull_request.head.sha }} - name: Import commit pragmas @@ -519,7 +519,7 @@ jobs: SIZE: steps: - name: Checkout code - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: submodules: 'recursive' fetch-depth: 500 diff --git a/.github/workflows/ci2.yml b/.github/workflows/ci2.yml index 8533cea6fbd..1eea2010590 100644 --- a/.github/workflows/ci2.yml +++ b/.github/workflows/ci2.yml @@ -34,7 +34,7 @@ jobs: DOCKER_BASE: ${{ matrix.base }} steps: - name: Checkout code - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: submodules: true fetch-depth: 500 @@ -100,7 +100,7 @@ jobs: COMPILER: ${{ matrix.compiler }} steps: - name: Checkout code - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: submodules: true fetch-depth: 500 diff --git a/.github/workflows/create_release.yml b/.github/workflows/create_release.yml index 1c04bf70022..9d3795f8c26 100644 --- a/.github/workflows/create_release.yml +++ b/.github/workflows/create_release.yml @@ -18,7 +18,7 @@ jobs: permissions: contents: write steps: - - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: fetch-depth: 2 - uses: ./.github/actions/make_release diff --git a/.github/workflows/landing-builds.yml b/.github/workflows/landing-builds.yml index d9e2ff2aa32..80026a3c8e7 100644 --- a/.github/workflows/landing-builds.yml +++ b/.github/workflows/landing-builds.yml @@ -64,7 +64,7 @@ jobs: DOCKER_BASE: ${{ matrix.base }} steps: - name: Checkout code - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: submodules: 'recursive' fetch-depth: 500 @@ -112,7 +112,7 @@ jobs: COMPILER: clang steps: - name: Checkout code - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: submodules: 'recursive' fetch-depth: 500 @@ -181,7 +181,7 @@ jobs: COMPILER: ${{ matrix.compiler }} steps: - name: Checkout code - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: submodules: 'recursive' fetch-depth: 500 @@ -255,7 +255,7 @@ jobs: BASE_DISTRO: ${{ matrix.with }} steps: - name: Checkout code - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: submodules: 'recursive' fetch-depth: 500 @@ -344,7 +344,7 @@ jobs: COMPILER: ${{ matrix.compiler }} steps: - name: Checkout code - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: submodules: 'recursive' fetch-depth: 500 diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 4121d0f8653..9f11ab28228 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -25,11 +25,11 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout code - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: ref: ${{ github.event.pull_request.head.sha }} - name: Set up Python environment - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0 + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 with: python-version: '3' - name: Install extra python packages @@ -48,7 +48,7 @@ jobs: runs-on: ubuntu-24.04 steps: - name: Checkout code - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: ref: ${{ github.event.pull_request.head.sha }} - name: Run @@ -66,7 +66,7 @@ jobs: runs-on: ubuntu-24.04 steps: - name: Checkout code - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: ref: ${{ github.event.pull_request.head.sha }} - name: Check DAOS logging macro use. @@ -77,7 +77,7 @@ jobs: runs-on: ubuntu-24.04 steps: - name: Checkout code - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 - name: Check DAOS ftest tags. run: \[ ! -x src/tests/ftest/tags.py \] || ./src/tests/ftest/tags.py lint --verbose @@ -86,11 +86,11 @@ jobs: name: Flake8 check steps: - name: Check out source repository - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: ref: ${{ github.event.pull_request.head.sha }} - name: Set up Python environment - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0 + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 with: python-version: '3' - name: Add parser @@ -119,7 +119,7 @@ jobs: runs-on: ubuntu-24.04 steps: - name: Checkout code - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: ref: ${{ github.event.pull_request.head.sha }} - name: Install doxygen @@ -140,10 +140,10 @@ jobs: runs-on: ubuntu-24.04 steps: - name: Checkout code - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: ref: ${{ github.event.pull_request.head.sha }} - - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0 + - uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 with: python-version: '3.11' - name: Install python packages @@ -160,7 +160,7 @@ jobs: runs-on: ubuntu-24.04 steps: - name: Checkout code - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 - name: Install extra python packages run: python3 -m pip install --requirement utils/cq/requirements.txt - name: Run check @@ -175,7 +175,7 @@ jobs: runs-on: ubuntu-24.04 steps: - name: Check out source repository - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: ref: ${{ github.event.pull_request.head.sha }} fetch-depth: 0 @@ -202,11 +202,11 @@ jobs: runs-on: ubuntu-24.04 steps: - name: Check out source repository - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: ref: ${{ github.event.pull_request.head.sha }} - name: Set up Python environment - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0 + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 with: python-version: '3' - name: Install extra python packages @@ -219,7 +219,7 @@ jobs: runs-on: ubuntu-24.04 steps: - name: Check out source repository - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: ref: ${{ github.event.pull_request.head.sha }} fetch-depth: 0 diff --git a/.github/workflows/ossf-scorecard.yml b/.github/workflows/ossf-scorecard.yml index 9fcb09cfa87..72451b65db6 100644 --- a/.github/workflows/ossf-scorecard.yml +++ b/.github/workflows/ossf-scorecard.yml @@ -33,7 +33,7 @@ jobs: steps: - name: "Checkout code" - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: persist-credentials: false @@ -71,6 +71,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard (optional). # Commenting out will disable upload of results to your repo's Code Scanning dashboard - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@e12f0178983d466f2f6028f5cc7a6d786fd97f4b # v4.31.4 + uses: github/codeql-action/upload-sarif@fe4161a26a8629af62121b670040955b330f9af2 # v4.31.6 with: sarif_file: results.sarif diff --git a/.github/workflows/pr-metadata.yml b/.github/workflows/pr-metadata.yml index 511de31eb15..d563f31162e 100644 --- a/.github/workflows/pr-metadata.yml +++ b/.github/workflows/pr-metadata.yml @@ -19,7 +19,7 @@ jobs: name: Report Jira data to PR comment steps: - name: Checkout - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 - name: install jira run: python3 -m pip install jira - name: Load jira metadata diff --git a/.github/workflows/rpm-build-and-test-report.yml b/.github/workflows/rpm-build-and-test-report.yml index 6801977ecc4..3fd674e1144 100644 --- a/.github/workflows/rpm-build-and-test-report.yml +++ b/.github/workflows/rpm-build-and-test-report.yml @@ -93,7 +93,7 @@ jobs: esac echo "STAGE_NAME=Build RPM on $DISTRO_NAME $DISTRO_VERSION" >> $GITHUB_ENV - name: Test Report - uses: dorny/test-reporter@7b7927aa7da8b82e81e755810cb51f39941a2cc7 # v2.2.0 + uses: dorny/test-reporter@fe45e9537387dac839af0d33ba56eed8e24189e8 # v2.3.0 with: artifact: ${{ env.STAGE_NAME }} test-results name: ${{ env.STAGE_NAME }} Test Results (dorny) @@ -112,7 +112,7 @@ jobs: - name: Set variables run: echo "STAGE_NAME=Functional Hardware ${{ matrix.stage }}" >> $GITHUB_ENV - name: Test Report - uses: dorny/test-reporter@7b7927aa7da8b82e81e755810cb51f39941a2cc7 # v2.2.0 + uses: dorny/test-reporter@fe45e9537387dac839af0d33ba56eed8e24189e8 # v2.3.0 with: artifact: ${{ env.STAGE_NAME }} test-results name: ${{ env.STAGE_NAME }} Test Results (dorny) diff --git a/.github/workflows/rpm-build-and-test.yml b/.github/workflows/rpm-build-and-test.yml index ad3ac5ad2a2..f83e45e65ce 100644 --- a/.github/workflows/rpm-build-and-test.yml +++ b/.github/workflows/rpm-build-and-test.yml @@ -118,7 +118,7 @@ jobs: matrix: ${{ steps.matrix.outputs.text }} steps: - name: Checkout code - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: ref: ${{ github.event.pull_request.head.sha }} - name: Import commit pragmas @@ -244,7 +244,7 @@ jobs: COMMIT_STATUS_DISTRO_VERSION: steps: - name: Checkout code - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: submodules: recursive fetch-depth: 500 @@ -418,7 +418,7 @@ jobs: matrix: ${{ steps.matrix.outputs.text }} steps: - name: Checkout code - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: ref: ${{ github.event.pull_request.head.sha }} - name: Import commit pragmas @@ -528,7 +528,7 @@ jobs: SIZE: steps: - name: Checkout code - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: submodules: recursive fetch-depth: 500 diff --git a/.github/workflows/trivy.yml b/.github/workflows/trivy.yml index 38ce98f92d6..8e264d4b923 100644 --- a/.github/workflows/trivy.yml +++ b/.github/workflows/trivy.yml @@ -33,7 +33,7 @@ jobs: security-events: write steps: - name: Checkout code - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 - name: Run Trivy vulnerability scanner in filesystem mode (table format) uses: aquasecurity/trivy-action@b6643a29fecd7f34b3597bc6acb0a98b03d33ff8 # 0.33.1 @@ -68,7 +68,7 @@ jobs: trivy-config: 'utils/trivy/trivy.yaml' - name: Upload Trivy scan results to GitHub Security tab - uses: github/codeql-action/upload-sarif@e12f0178983d466f2f6028f5cc7a6d786fd97f4b # v4.31.4 + uses: github/codeql-action/upload-sarif@fe4161a26a8629af62121b670040955b330f9af2 # v4.31.6 with: sarif_file: 'trivy-results.sarif' diff --git a/.github/workflows/unit-testing.yml b/.github/workflows/unit-testing.yml index 425fa8079eb..a2b5e2ddedf 100644 --- a/.github/workflows/unit-testing.yml +++ b/.github/workflows/unit-testing.yml @@ -15,7 +15,7 @@ jobs: runs-on: [self-hosted, docker] steps: - name: Checkout code - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 with: submodules: 'recursive' - name: Build deps in Docker From f6a77b2e9ca12ff702df4fa731490eaae291f802 Mon Sep 17 00:00:00 2001 From: Ravindran Padmanabhan Date: Thu, 4 Dec 2025 10:27:25 -0500 Subject: [PATCH 060/253] DAOS-17851 test: Pass ranks as parameters to test functions. (#16902) Signed-off-by: rpadma2 --- src/tests/ftest/osa/offline_drain.py | 42 +++++++++-------- src/tests/ftest/osa/offline_drain.yaml | 2 - src/tests/ftest/osa/offline_reintegration.py | 48 +++++++++++--------- src/tests/ftest/osa/online_drain.py | 28 ++++++------ src/tests/ftest/osa/online_reintegration.py | 35 +++++++------- src/tests/ftest/util/osa_utils.py | 19 ++++++++ 6 files changed, 102 insertions(+), 72 deletions(-) diff --git a/src/tests/ftest/osa/offline_drain.py b/src/tests/ftest/osa/offline_drain.py index 04cb98f67e8..e7ba3b1b25d 100644 --- a/src/tests/ftest/osa/offline_drain.py +++ b/src/tests/ftest/osa/offline_drain.py @@ -23,23 +23,21 @@ def setUp(self): """Set up for test case.""" super().setUp() self.dmg_command = self.get_dmg_command() - self.ranks = self.params.get("rank_list", '/run/test_ranks/*') self.test_oclass = self.params.get("oclass", '/run/test_obj_class/*') self.ior_test_sequence = self.params.get( "ior_test_sequence", '/run/ior/iorflags/*') # Recreate the client hostfile without slots defined self.hostfile_clients = write_host_file(self.hostlist_clients, self.workdir) - def run_offline_drain_test(self, num_pool, data=False, oclass=None, pool_fillup=0, - num_ranks=1): + def run_offline_drain_test(self, num_pool, ranks, data=False, oclass=None, pool_fillup=0): """Run the offline drain without data. Args: num_pool (int) : total pools to create for testing purposes. + ranks (list) : Ranks to drain. data (bool) : whether pool has no data or to create some data in pool. Defaults to False. oclass (str): DAOS object class (eg: RP_2G1,etc) - num_ranks (int): Number of ranks to drain. Defaults to 1. """ # Create a pool pool = {} @@ -48,12 +46,6 @@ def run_offline_drain_test(self, num_pool, data=False, oclass=None, pool_fillup= if oclass is None: oclass = self.ior_cmd.dfs_oclass.value - # Get a random rank(s) based on num_ranks input. - ranklist = list(self.server_managers[0].ranks.keys()) - # For tests which uses num_ranks equal to 1, use the YAML file information. - if num_ranks > 1: - self.ranks = [",".join(map(str, self.random.sample(ranklist, k=num_ranks)))] - # Exclude target : random two targets (target idx : 0-7) exc = self.random.randint(0, 6) target_list.append(exc) @@ -92,7 +84,7 @@ def run_offline_drain_test(self, num_pool, data=False, oclass=None, pool_fillup= # Drain ranks and targets for val in range(0, num_pool): # Drain ranks provided in YAML file - for index, rank in enumerate(self.ranks): + for index, rank in enumerate(ranks): self.pool = pool[val] # If we are testing using multiple pools, reintegrate # the rank back and then drain. @@ -164,7 +156,8 @@ def test_osa_offline_drain(self): :avocado: tags=OSAOfflineDrain,test_osa_offline_drain """ self.log.info("Offline Drain : Basic Drain") - self.run_offline_drain_test(1, True) + ranks = self.get_random_test_ranks() + self.run_offline_drain_test(num_pool=1, data=True, ranks=ranks) def test_osa_offline_drain_without_checksum(self): """Test ID: DAOS-7159. @@ -178,7 +171,8 @@ def test_osa_offline_drain_without_checksum(self): """ self.test_with_checksum = self.params.get("test_with_checksum", "/run/checksum/*") self.log.info("Offline Drain : Without Checksum") - self.run_offline_drain_test(1, data=True) + ranks = self.get_random_test_ranks() + self.run_offline_drain_test(num_pool=1, data=True, ranks=ranks) def test_osa_offline_drain_during_aggregation(self): """Test ID: DAOS-7159. @@ -193,7 +187,8 @@ def test_osa_offline_drain_during_aggregation(self): self.test_during_aggregation = self.params.get( "test_with_aggregation", "/run/aggregation/*") self.log.info("Offline Drain : During Aggregation") - self.run_offline_drain_test(1, data=True) + ranks = self.get_random_test_ranks() + self.run_offline_drain_test(num_pool=1, data=True, ranks=ranks) def test_osa_offline_drain_oclass(self): """Test ID: DAOS-7159. @@ -207,8 +202,9 @@ def test_osa_offline_drain_oclass(self): """ self.test_with_checksum = self.params.get("test_with_checksum", "/run/checksum/*") self.log.info("Offline Drain : Oclass") + ranks = self.get_random_test_ranks() for oclass in self.test_oclass: - self.run_offline_drain_test(1, data=True, oclass=oclass) + self.run_offline_drain_test(num_pool=1, data=True, ranks=ranks, oclass=oclass) def test_osa_offline_drain_multiple_pools(self): """Test ID: DAOS-7159. @@ -221,7 +217,8 @@ def test_osa_offline_drain_multiple_pools(self): :avocado: tags=OSAOfflineDrain,test_osa_offline_drain_multiple_pools """ self.log.info("Offline Drain : Multiple Pools") - self.run_offline_drain_test(2, data=True) + ranks = self.get_random_test_ranks() + self.run_offline_drain_test(num_pool=2, data=True, ranks=ranks) def test_osa_offline_drain_during_rebuild(self): """Test ID: DAOS-7159. @@ -235,7 +232,8 @@ def test_osa_offline_drain_during_rebuild(self): """ self.test_during_rebuild = self.params.get("test_with_rebuild", "/run/rebuild/*") self.log.info("Offline Drain : During Rebuild") - self.run_offline_drain_test(1, data=True) + ranks = self.get_random_test_ranks() + self.run_offline_drain_test(num_pool=1, data=True, ranks=ranks) def test_osa_offline_drain_after_snapshot(self): """Test ID: DAOS-8057. @@ -249,7 +247,8 @@ def test_osa_offline_drain_after_snapshot(self): """ self.test_with_snapshot = self.params.get("test_with_snapshot", "/run/snapshot/*") self.log.info("Offline Drain : After taking snapshot") - self.run_offline_drain_test(1, data=True) + ranks = self.get_random_test_ranks() + self.run_offline_drain_test(num_pool=1, data=True, ranks=ranks) def test_osa_offline_drain_with_less_pool_space(self): """Test ID: DAOS-7160. @@ -264,7 +263,9 @@ def test_osa_offline_drain_with_less_pool_space(self): self.log.info("Offline Drain : Test with less pool space") oclass = self.params.get("pool_test_oclass", '/run/pool_capacity/*') pool_fillup = self.params.get("pool_fillup", '/run/pool_capacity/*') - self.run_offline_drain_test(1, data=True, oclass=oclass, pool_fillup=pool_fillup) + ranks = self.get_random_test_ranks() + self.run_offline_drain_test(num_pool=1, data=True, ranks=ranks, oclass=oclass, + pool_fillup=pool_fillup) def test_osa_offline_drain_with_multiple_ranks(self): """Test ID: DAOS-4753. @@ -277,4 +278,5 @@ def test_osa_offline_drain_with_multiple_ranks(self): :avocado: tags=OSAOfflineDrain,test_osa_offline_drain_with_multiple_ranks """ self.log.info("Offline Drain : Test with multiple ranks") - self.run_offline_drain_test(1, data=True, num_ranks=2) + ranks = self.get_random_test_ranks(join_ranks=False) + self.run_offline_drain_test(num_pool=1, data=True, ranks=ranks) diff --git a/src/tests/ftest/osa/offline_drain.yaml b/src/tests/ftest/osa/offline_drain.yaml index a8776edd4a4..a9fccca5aee 100644 --- a/src/tests/ftest/osa/offline_drain.yaml +++ b/src/tests/ftest/osa/offline_drain.yaml @@ -100,8 +100,6 @@ checksum: test_with_checksum: false snapshot: test_with_snapshot: true -test_ranks: - rank_list: ["2", "5"] pool_capacity: pool_fillup: 10 pool_test_oclass: RP_2GX diff --git a/src/tests/ftest/osa/offline_reintegration.py b/src/tests/ftest/osa/offline_reintegration.py index 7f39b161f73..825dc17702b 100644 --- a/src/tests/ftest/osa/offline_reintegration.py +++ b/src/tests/ftest/osa/offline_reintegration.py @@ -32,13 +32,14 @@ def setUp(self): self.hostfile_clients = write_host_file(self.hostlist_clients, self.workdir) self.dmg_command.exit_status_exception = True - def run_offline_reintegration_test(self, num_pool, data=False, server_boot=False, oclass=None, - pool_fillup=0, num_ranks=1): + def run_offline_reintegration_test(self, num_pool, ranks, data=False, server_boot=False, + oclass=None, pool_fillup=0): # pylint: disable=too-many-branches """Run the offline reintegration without data. Args: num_pool (int) : total pools to create for testing purposes. + ranks (list) : Ranks to exclude and reintegrate during the testing. data (bool) : whether pool has no data or to create some data in pool. Defaults to False. server_boot (bool) : Perform system stop/start on a rank. Defaults to False. @@ -81,14 +82,6 @@ def run_offline_reintegration_test(self, num_pool, data=False, server_boot=False if self.test_during_aggregation is True: self.run_ior_thread("Write", oclass, test_seq) - if num_ranks > 1: - # Exclude ranks from a random pool - ranklist = list(self.server_managers[0].ranks.keys()) - ranks = [",".join(map(str, self.random.sample(ranklist, k=num_ranks)))] - else: - # Exclude ranks 0 and 3 from a random pool (when num_ranks equal to 1) - ranks = ["0", "3"] - self.pool = self.random.choice(pools) # nosec for loop in range(0, self.loop_test_cnt): self.log.info( @@ -195,7 +188,8 @@ def test_osa_offline_reintegration_without_checksum(self): """ self.test_with_checksum = self.params.get("test_with_checksum", '/run/checksum/*') self.log.info("Offline Reintegration : Without Checksum") - self.run_offline_reintegration_test(1, data=True) + ranks = self.get_random_test_ranks() + self.run_offline_reintegration_test(num_pool=1, data=True, ranks=ranks) def test_osa_offline_reintegration_multiple_pools(self): """Test ID: DAOS-6923. @@ -208,7 +202,8 @@ def test_osa_offline_reintegration_multiple_pools(self): :avocado: tags=OSAOfflineReintegration,test_osa_offline_reintegration_multiple_pools """ self.log.info("Offline Reintegration : Multiple Pools") - self.run_offline_reintegration_test(5, data=True) + ranks = self.get_random_test_ranks() + self.run_offline_reintegration_test(num_pool=5, data=True, ranks=ranks) def test_osa_offline_reintegration_server_stop(self): """Test ID: DAOS-6748. @@ -221,7 +216,8 @@ def test_osa_offline_reintegration_server_stop(self): :avocado: tags=OSAOfflineReintegration,test_osa_offline_reintegration_server_stop """ self.log.info("Offline Reintegration : System Start/Stop") - self.run_offline_reintegration_test(1, data=True, server_boot=True) + ranks = self.get_random_test_ranks() + self.run_offline_reintegration_test(num_pool=1, data=True, server_boot=True, ranks=ranks) def test_osa_offline_reintegrate_during_rebuild(self): """Test ID: DAOS-6923. @@ -236,7 +232,8 @@ def test_osa_offline_reintegrate_during_rebuild(self): self.loop_test_cnt = self.params.get("iterations", '/run/loop_test/*') self.test_during_rebuild = self.params.get("test_with_rebuild", '/run/rebuild/*') self.log.info("Offline Reintegration : Rebuild") - self.run_offline_reintegration_test(1, data=True) + ranks = self.get_random_test_ranks() + self.run_offline_reintegration_test(num_pool=1, data=True, ranks=ranks) def test_osa_offline_reintegration_oclass(self): """Test ID: DAOS-6923. @@ -249,8 +246,10 @@ def test_osa_offline_reintegration_oclass(self): :avocado: tags=OSAOfflineReintegration,test_osa_offline_reintegration_oclass """ self.log.info("Offline Reintegration : Object Class") + ranks = self.get_random_test_ranks() for oclass in self.test_oclass: - self.run_offline_reintegration_test(1, data=True, server_boot=False, oclass=oclass) + self.run_offline_reintegration_test(num_pool=1, data=True, server_boot=False, + oclass=oclass, ranks=ranks) def test_osa_offline_reintegrate_during_aggregation(self): """Test ID: DAOS-6923. @@ -265,7 +264,8 @@ def test_osa_offline_reintegrate_during_aggregation(self): self.test_during_aggregation = self.params.get("test_with_aggregation", '/run/aggregation/*') self.log.info("Offline Reintegration : Aggregation") - self.run_offline_reintegration_test(1, data=True) + ranks = self.get_random_test_ranks() + self.run_offline_reintegration_test(num_pool=1, data=True, ranks=ranks) def test_osa_offline_reintegration_with_rf(self): """Test ID: DAOS-6923. @@ -280,7 +280,8 @@ def test_osa_offline_reintegration_with_rf(self): """ self.log.info("Offline Reintegration : RF") self.test_with_rf = self.params.get("test_with_rf", '/run/test_rf/*') - self.run_offline_reintegration_test(1, data=True) + ranks = self.get_random_test_ranks() + self.run_offline_reintegration_test(num_pool=1, data=True, ranks=ranks) def test_osa_offline_reintegrate_with_blank_node(self): """Test ID: DAOS-6923. @@ -294,7 +295,8 @@ def test_osa_offline_reintegrate_with_blank_node(self): """ self.test_with_blank_node = self.params.get("test_with_blank_node", '/run/blank_node/*') self.log.info("Offline Reintegration : Test with blank node") - self.run_offline_reintegration_test(1, data=True) + ranks = self.get_random_test_ranks() + self.run_offline_reintegration_test(num_pool=1, data=True, ranks=ranks) def test_osa_offline_reintegrate_after_snapshot(self): """Test ID: DAOS-8057. @@ -308,7 +310,8 @@ def test_osa_offline_reintegrate_after_snapshot(self): """ self.test_with_snapshot = self.params.get("test_with_snapshot", '/run/snapshot/*') self.log.info("Offline Reintegration : Test with snapshot") - self.run_offline_reintegration_test(1, data=True) + ranks = self.get_random_test_ranks() + self.run_offline_reintegration_test(num_pool=1, data=True, ranks=ranks) def test_osa_offline_reintegrate_with_less_pool_space(self): """Test ID: DAOS-7160. @@ -323,7 +326,9 @@ def test_osa_offline_reintegrate_with_less_pool_space(self): self.log.info("Offline Reintegration : Test with less pool space") oclass = self.params.get("pool_test_oclass", '/run/pool_capacity/*') pool_fillup = self.params.get("pool_fillup", '/run/pool_capacity/*') - self.run_offline_reintegration_test(1, data=True, oclass=oclass, pool_fillup=pool_fillup) + ranks = self.get_random_test_ranks() + self.run_offline_reintegration_test(num_pool=1, data=True, oclass=oclass, + pool_fillup=pool_fillup, ranks=ranks) def test_osa_offline_reintegrate_with_multiple_ranks(self): """Test ID: DAOS-4753. @@ -336,4 +341,5 @@ def test_osa_offline_reintegrate_with_multiple_ranks(self): :avocado: tags=OSAOfflineReintegration,test_osa_offline_reintegrate_with_multiple_ranks """ self.log.info("Offline Reintegration : Test with multiple ranks") - self.run_offline_reintegration_test(1, data=True, num_ranks=2) + ranks = self.get_random_test_ranks(join_ranks=False) + self.run_offline_reintegration_test(num_pool=1, data=True, ranks=ranks) diff --git a/src/tests/ftest/osa/online_drain.py b/src/tests/ftest/osa/online_drain.py index 98cba3dc94f..f044ad38af5 100644 --- a/src/tests/ftest/osa/online_drain.py +++ b/src/tests/ftest/osa/online_drain.py @@ -32,14 +32,14 @@ def setUp(self): self.dmg_command.exit_status_exception = True self.pool = None - def run_online_drain_test(self, num_pool, oclass=None, app_name="ior", num_ranks=1): + def run_online_drain_test(self, num_pool, ranks, oclass=None, app_name="ior"): """Run the Online drain without data. Args: num_pool (int) : total pools to create for testing purposes. + ranks (list) : list of ranks to drain oclass (str) : Object class type (RP_2G1, etc) app_name (str) : application to run on parallel (ior or mdtest). Defaults to ior. - num_ranks (int): Number of ranks to drain. Defaults to 1. """ # Create a pool pool = {} @@ -51,10 +51,6 @@ def run_online_drain_test(self, num_pool, oclass=None, app_name="ior", num_ranks targets = int(self.server_managers[-1].get_config_value('targets')) t_string = ','.join(map(str, self.random.sample(range(targets), 2))) - # Get random rank(s) from the rank list. - ranklist = list(self.server_managers[0].ranks.keys()) - rank = ",".join(map(str, self.random.sample(ranklist, k=num_ranks))) - for val in range(0, num_pool): pool[val] = add_pool(self, connect=False) pool[val].set_property("reclaim", "disabled") @@ -89,7 +85,7 @@ def run_online_drain_test(self, num_pool, oclass=None, app_name="ior", num_ranks self.log.info("Pool Version at the beginning %s", pver_begin) # Get initial total space (scm+nvme) initial_total_space = self.pool.get_total_space(refresh=True) - output = self.pool.drain(rank, t_string) + output = self.pool.drain(ranks, t_string) self.print_and_assert_on_rebuild_failure(output) total_space_after_drain = self.pool.get_total_space(refresh=True) @@ -127,7 +123,8 @@ def test_osa_online_drain(self): :avocado: tags=OSAOnlineDrain,test_osa_online_drain """ self.log.info("Online Drain : With Checksum") - self.run_online_drain_test(1) + ranks = self.get_random_test_ranks() + self.run_online_drain_test(num_pool=1, ranks=ranks) def test_osa_online_drain_no_csum(self): """Test ID: DAOS-6909 @@ -143,7 +140,8 @@ def test_osa_online_drain_no_csum(self): self.log.info("Online Drain : No Checksum") self.test_with_checksum = self.params.get("test_with_checksum", '/run/checksum/*') - self.run_online_drain_test(1) + ranks = self.get_random_test_ranks() + self.run_online_drain_test(num_pool=1, ranks=ranks) def test_osa_online_drain_oclass(self): """Test ID: DAOS-6909 @@ -157,8 +155,9 @@ def test_osa_online_drain_oclass(self): :avocado: tags=OSAOnlineDrain,test_osa_online_drain_oclass """ self.log.info("Online Drain : Oclass") + ranks = self.get_random_test_ranks() for oclass in self.test_oclass: - self.run_online_drain_test(1, oclass=oclass) + self.run_online_drain_test(num_pool=1, oclass=oclass, ranks=ranks) def test_osa_online_drain_with_aggregation(self): """Test ID: DAOS-6909 @@ -174,7 +173,8 @@ def test_osa_online_drain_with_aggregation(self): self.log.info("Online Drain : Aggregation") self.test_during_aggregation = self.params.get("test_with_aggregation", '/run/aggregation/*') - self.run_online_drain_test(1) + ranks = self.get_random_test_ranks() + self.run_online_drain_test(num_pool=1, ranks=ranks) def test_osa_online_drain_mdtest(self): """Test ID: DAOS-4750 @@ -188,7 +188,8 @@ def test_osa_online_drain_mdtest(self): :avocado: tags=OSAOnlineDrain,test_osa_online_drain_mdtest """ self.log.info("Online Drain : With Mdtest") - self.run_online_drain_test(1, app_name="mdtest") + ranks = self.get_random_test_ranks() + self.run_online_drain_test(1, app_name="mdtest", ranks=ranks) def test_osa_online_drain_with_multiple_ranks(self): """Test ID: DAOS-4753. @@ -201,4 +202,5 @@ def test_osa_online_drain_with_multiple_ranks(self): :avocado: tags=OSAOnlineDrain,test_osa_online_drain_with_multiple_ranks """ self.log.info("Online Drain : Test with multiple ranks") - self.run_online_drain_test(1, num_ranks=2) + ranks = self.get_random_test_ranks(join_ranks=False) + self.run_online_drain_test(num_pool=1, ranks=ranks) diff --git a/src/tests/ftest/osa/online_reintegration.py b/src/tests/ftest/osa/online_reintegration.py index 332e93a871f..4420d12ed56 100644 --- a/src/tests/ftest/osa/online_reintegration.py +++ b/src/tests/ftest/osa/online_reintegration.py @@ -43,17 +43,17 @@ def daos_racer_thread(self): self.daos_racer.get_params(self) self.daos_racer.run() - def run_online_reintegration_test(self, num_pool, racer=False, server_boot=False, oclass=None, - num_ranks=1): + def run_online_reintegration_test(self, num_pool, ranks, racer=False, server_boot=False, + oclass=None): """Run the Online reintegration without data. Args: num_pool (int) : total pools to create for testing purposes. + ranks (list) : list of ranks to reintegrate. racer (bool) : whether pool has no data or to create some data in pool. Defaults to False. server_boot (bool) : Perform system stop/start on a rank. Defaults to False. oclass (str) : daos object class string (eg: "RP_2G8"). Defaults to None. - num_ranks (int): Number of ranks to drain. Defaults to 1. """ if oclass is None: oclass = self.ior_cmd.dfs_oclass.value @@ -61,9 +61,6 @@ def run_online_reintegration_test(self, num_pool, racer=False, server_boot=False # Create a pool pool = {} - ranklist = list(self.server_managers[0].ranks.keys()) - rank = ",".join(map(str, self.random.sample(ranklist, k=num_ranks))) - # Start the daos_racer thread if racer is True: daos_racer_thread = threading.Thread(target=self.daos_racer_thread) @@ -100,13 +97,13 @@ def run_online_reintegration_test(self, num_pool, racer=False, server_boot=False # Get initial total free space (scm+nvme) initial_free_space = self.pool.get_total_free_space(refresh=True) if server_boot is False: - output = self.pool.exclude(rank) + output = self.pool.exclude(ranks) else: - output = self.dmg_command.system_stop(ranks=rank, force=True) + output = self.dmg_command.system_stop(ranks=ranks, force=True) self.pool.wait_for_rebuild_to_start() self.pool.wait_for_rebuild_to_end() self.log.info(output) - output = self.dmg_command.system_start(ranks=rank) + output = self.dmg_command.system_start(ranks=ranks) self.pool.wait_for_rebuild_to_start() self.print_and_assert_on_rebuild_failure(output) @@ -120,7 +117,7 @@ def run_online_reintegration_test(self, num_pool, racer=False, server_boot=False self.assertTrue(pver_exclude > (pver_begin + 8), "Pool Version Error: After exclude") self.assertTrue(initial_free_space > free_space_after_exclude, "Expected space after exclude is less than initial") - output = self.pool.reintegrate(rank) + output = self.pool.reintegrate(ranks) self.print_and_assert_on_rebuild_failure(output) free_space_after_reintegration = self.pool.get_total_free_space(refresh=True) @@ -164,7 +161,8 @@ def test_osa_online_reintegration(self): :avocado: tags=OSAOnlineReintegration,test_osa_online_reintegration """ self.log.info("Online Reintegration : Basic test") - self.run_online_reintegration_test(1) + ranks = self.get_random_test_ranks(total_ranks=1) + self.run_online_reintegration_test(num_pool=1, ranks=ranks) def test_osa_online_reintegration_server_stop(self): """Test ID: DAOS-5920. @@ -177,7 +175,8 @@ def test_osa_online_reintegration_server_stop(self): :avocado: tags=OSAOnlineReintegration,test_osa_online_reintegration_server_stop """ self.log.info("Online Reintegration : System stop/start") - self.run_online_reintegration_test(1, server_boot=True) + ranks = self.get_random_test_ranks(total_ranks=1) + self.run_online_reintegration_test(num_pool=1, server_boot=True, ranks=ranks) def test_osa_online_reintegration_without_csum(self): """Test ID: DAOS-5075. @@ -191,7 +190,8 @@ def test_osa_online_reintegration_without_csum(self): """ self.log.info("Online Reintegration : No Checksum") self.test_with_checksum = self.params.get("test_with_checksum", "/run/checksum/*") - self.run_online_reintegration_test(1) + ranks = self.get_random_test_ranks(total_ranks=1) + self.run_online_reintegration_test(num_pool=1, ranks=ranks) def test_osa_online_reintegration_with_aggregation(self): """Test ID: DAOS-6715. @@ -206,7 +206,8 @@ def test_osa_online_reintegration_with_aggregation(self): self.test_during_aggregation = self.params.get("test_with_aggregation", '/run/aggregation/*') self.log.info("Online Reintegration : Aggregation") - self.run_online_reintegration_test(1) + ranks = self.get_random_test_ranks(total_ranks=1) + self.run_online_reintegration_test(num_pool=1, ranks=ranks) def test_osa_online_reintegration_oclass(self): """Test ID: DAOS-6715. @@ -219,8 +220,9 @@ def test_osa_online_reintegration_oclass(self): :avocado: tags=OSAOnlineReintegration,test_osa_online_reintegration_oclass """ self.log.info("Online Reintegration : Object Class") + ranks = self.get_random_test_ranks(total_ranks=1) for oclass in self.test_oclass: - self.run_online_reintegration_test(1, oclass=oclass) + self.run_online_reintegration_test(num_pool=1, oclass=oclass, ranks=ranks) def test_osa_online_reintegration_with_multiple_ranks(self): """Test ID: DAOS-4753. @@ -233,4 +235,5 @@ def test_osa_online_reintegration_with_multiple_ranks(self): :avocado: tags=OSAOnlineReintegration,test_osa_online_reintegration_with_multiple_ranks """ self.log.info("Online Reintegration : Multiple ranks") - self.run_online_reintegration_test(1, oclass="RP_3G1", num_ranks=2) + ranks = self.get_random_test_ranks(join_ranks=False) + self.run_online_reintegration_test(num_pool=1, oclass="RP_3G1", ranks=ranks) diff --git a/src/tests/ftest/util/osa_utils.py b/src/tests/ftest/util/osa_utils.py index 410b6ce46a2..90a78fa898a 100644 --- a/src/tests/ftest/util/osa_utils.py +++ b/src/tests/ftest/util/osa_utils.py @@ -1,5 +1,6 @@ """ (C) Copyright 2020-2024 Intel Corporation. + (C) Copyright 2025 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -290,6 +291,24 @@ def set_cont_class_properties(self, oclass="S1"): self.ior_cmd.dfs_dir_oclass.update(None, "ior.dfs_dir_oclass") self.container.oclass.update(None) + def get_random_test_ranks(self, total_ranks=2, join_ranks=True): + """Get random list of ranks for OSA tests. + + Args: + total_ranks (list): Random rank list for testing. Defaults to 2. + join_ranks (bool): Stop ranks individual ranks. Defaults to True. + + Returns: + list: a list of random ranks either as individual strings, + or one comma-separated string. + + """ + # Get a random rank(s) based on num_ranks input. + ranklist = list(self.server_managers[0].ranks.keys()) + if join_ranks is True: + return list(map(str, self.random.sample(ranklist, k=total_ranks))) + return [",".join(map(str, self.random.sample(ranklist, k=total_ranks)))] + def assert_on_exception(self, out_queue=None): """Assert on exception while executing an application. From bcefb537677e0c2a219d974f5d9f2c019aae0f8b Mon Sep 17 00:00:00 2001 From: wiliamhuang Date: Thu, 4 Dec 2025 09:55:36 -0600 Subject: [PATCH 061/253] DAOS-13589 client: return error code in init_fd_list() (#17202) and replace error code EAGAIN with EIO in retrieve_handles_from_fuse Signed-off-by: Lei Huang --- src/client/dfuse/pil4dfs/int_dfs.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/client/dfuse/pil4dfs/int_dfs.c b/src/client/dfuse/pil4dfs/int_dfs.c index 3eee719c4ae..0c85addcd03 100644 --- a/src/client/dfuse/pil4dfs/int_dfs.c +++ b/src/client/dfuse/pil4dfs/int_dfs.c @@ -845,7 +845,7 @@ retrieve_handles_from_fuse(int idx) fclose(tmp_file); unlink(fname); if (read_size != hs_reply.fsr_pool_size) { - errno_saved = EAGAIN; + errno_saved = EIO; D_DEBUG(DB_ANY, "fread expected %zu bytes, read %d bytes : %d (%s)\n", hs_reply.fsr_pool_size, read_size, errno_saved, strerror(errno_saved)); @@ -1468,16 +1468,16 @@ init_fd_list(void) rc = D_MUTEX_INIT(&lock_fd, NULL); if (rc) - return 1; + return rc; rc = D_MUTEX_INIT(&lock_dirfd, NULL); if (rc) - return 1; + return rc; rc = D_MUTEX_INIT(&lock_mmap, NULL); if (rc) - return 1; + return rc; rc = D_RWLOCK_INIT(&lock_fd_dup2ed, NULL); if (rc) - return 1; + return rc; /* fatal error above: failure to create mutexes. */ From 4119c4ed6bb646ddb7eef1bf382e608c49a101c8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 4 Dec 2025 11:52:39 -0800 Subject: [PATCH 062/253] DAOS-18303 cq: bump pylint from 4.0.1 to 4.0.4 (#17212) Updates `pylint` from 4.0.1 to 4.0.4 Signed-off-by: dependabot[bot] --- utils/cq/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/cq/requirements.txt b/utils/cq/requirements.txt index dd4ce3ee6be..f98e5c16946 100644 --- a/utils/cq/requirements.txt +++ b/utils/cq/requirements.txt @@ -4,7 +4,7 @@ pyenchant ## https://github.com/pycqa/flake8/issues/1389 https://github.com/PyCQA/flake8/pull/1720 flake8==7.3.0 isort==7.0.0 -pylint==4.0.1 +pylint==4.0.4 yamllint==1.37.1 codespell==2.4.1 # Used by ci/jira_query.py which pip installs it standalone. From 22d10e2f3a24c75c64be00521ce40c00bc897183 Mon Sep 17 00:00:00 2001 From: wiliamhuang Date: Fri, 5 Dec 2025 08:36:42 -0600 Subject: [PATCH 063/253] DAOS-16638 client: support NULL times in utimensat() (#17204) Signed-off-by: Lei Huang --- src/client/dfuse/pil4dfs/int_dfs.c | 24 +++++++++++++++++++----- src/tests/suite/dfuse_test.c | 9 +++++++++ 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/src/client/dfuse/pil4dfs/int_dfs.c b/src/client/dfuse/pil4dfs/int_dfs.c index 0c85addcd03..0276c8fa4f4 100644 --- a/src/client/dfuse/pil4dfs/int_dfs.c +++ b/src/client/dfuse/pil4dfs/int_dfs.c @@ -6106,8 +6106,10 @@ utimens_timespec(const char *path, const struct timespec times[2], int flags) int utimensat(int dirfd, const char *path, const struct timespec times[2], int flags) { - int idx_dfs, error = 0, rc; - char *full_path = NULL; + int idx_dfs, error = 0, rc; + char *full_path = NULL; + struct timespec times_loc[2]; + struct timespec *times_ptr; if (next_utimensat == NULL) { next_utimensat = dlsym(RTLD_NEXT, "utimensat"); @@ -6125,18 +6127,30 @@ utimensat(int dirfd, const char *path, const struct timespec times[2], int flags } _Pragma("GCC diagnostic pop") + /* clang-format off */ + + if (times == NULL) { + clock_gettime(CLOCK_REALTIME, ×_loc[0]); + times_loc[1].tv_sec = times_loc[0].tv_sec; + times_loc[1].tv_nsec = times_loc[0].tv_nsec; + times_ptr = times_loc; + } else { + times_ptr = (struct timespec *)times; + } + /* clang-format on */ + /* absolute path, dirfd is ignored */ if (path[0] == '/') - return utimens_timespec(path, times, flags); + return utimens_timespec(path, times_ptr, flags); idx_dfs = check_path_with_dirfd(dirfd, &full_path, path, &error); if (error) goto out_err; if (idx_dfs >= 0) - rc = utimens_timespec(full_path, times, flags); + rc = utimens_timespec(full_path, times_ptr, flags); else - rc = next_utimensat(dirfd, path, times, flags); + rc = next_utimensat(dirfd, path, times_ptr, flags); error = errno; if (full_path) { diff --git a/src/tests/suite/dfuse_test.c b/src/tests/suite/dfuse_test.c index b963bb0ec9b..bdae8da0db9 100644 --- a/src/tests/suite/dfuse_test.c +++ b/src/tests/suite/dfuse_test.c @@ -586,6 +586,15 @@ do_mtime(void **state) rc = close(fd); assert_return_code(rc, errno); + usleep(10000); + prev_ts.tv_sec = stbuf.st_mtim.tv_sec; + prev_ts.tv_nsec = stbuf.st_mtim.tv_nsec; + rc = utimensat(root, "mtime_file", NULL, 0); + assert_return_code(rc, errno); + rc = fstatat(root, "mtime_file", &stbuf, 0); + assert_return_code(rc, errno); + assert_true(timespec_gt(stbuf.st_mtim, prev_ts)); + rc = unlinkat(root, "mtime_file", 0); assert_return_code(rc, errno); From fa66297d9d1ff228ae5afcc42e3ca96f8ef96753 Mon Sep 17 00:00:00 2001 From: Cedric Koch-Hofer <94527853+knard38@users.noreply.github.com> Date: Mon, 8 Dec 2025 14:21:59 +0100 Subject: [PATCH 064/253] DAOS-18289 ddb: DDB command file with args (#17195) Add/fix following feature of ddb command file parsing: - support of command with arguments - support of comment starting with # - support of blank line Signed-off-by: Cedric Koch-Hofer --- src/control/cmd/ddb/main.go | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/src/control/cmd/ddb/main.go b/src/control/cmd/ddb/main.go index 41fa0b6021f..b7ed3672f81 100644 --- a/src/control/cmd/ddb/main.go +++ b/src/control/cmd/ddb/main.go @@ -17,6 +17,7 @@ import ( "strings" "unsafe" + "github.com/desertbit/go-shlex" "github.com/desertbit/grumble" "github.com/jessevdk/go-flags" "github.com/pkg/errors" @@ -96,24 +97,31 @@ func (cmdStr *ddbCmdStr) UnmarshalFlag(fv string) error { func runFileCmds(log logging.Logger, app *grumble.App, fileName string) error { file, err := os.Open(fileName) if err != nil { - return errors.Wrapf(err, "Error opening file: %s", fileName) + return errors.Wrapf(err, "Error opening file %q", fileName) } defer func() { err = file.Close() if err != nil { - log.Errorf("Error closing %s: %s\n", fileName, err) + log.Errorf("Error closing %q: %s\n", fileName, err) } }() - log.Debugf("Running commands in: %s\n", fileName) + log.Debugf("Running commands in %q\n", fileName) scanner := bufio.NewScanner(file) for scanner.Scan() { - fileCmd := scanner.Text() - log.Debugf("Running Command: %s\n", fileCmd) - err := runCmdStr(app, fileCmd) + lineStr := scanner.Text() + lineCmd, err := shlex.Split(lineStr, true) if err != nil { - return errors.Wrapf(err, "Failed running command %q", fileCmd) + return errors.Wrapf(err, "Failed running command %q", lineStr) + } + if len(lineCmd) == 0 || strings.HasPrefix(lineCmd[0], "#") { + continue + } + log.Debugf("Running Command %q\n", lineStr) + err = runCmdStr(app, lineCmd[0], lineCmd[1:]...) + if err != nil { + return errors.Wrapf(err, "Failed running command %q", lineStr) } } @@ -213,7 +221,7 @@ Example Paths: } else { err := runFileCmds(log, app, opts.CmdFile) if err != nil { - log.Error("Error running command file\n") + log.Errorf("Error running command file: %s\n", err) } } From 436128475ee08d5abfbab6ae9518fd50d77d8c5d Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Mon, 8 Dec 2025 14:10:22 -0600 Subject: [PATCH 065/253] DAOS-18295 pydaos: add destroy call for ddict (#17205) Signed-off-by: Mohamad Chaarawi --- src/client/pydaos/pydaos_core.py | 13 ++++++ src/client/pydaos/pydaos_shim.c | 73 +++++++++++++++++++++++++++++++- utils/node_local_test.py | 1 + 3 files changed, 85 insertions(+), 2 deletions(-) diff --git a/src/client/pydaos/pydaos_core.py b/src/client/pydaos/pydaos_core.py index 384639c1d31..22d296c415a 100644 --- a/src/client/pydaos/pydaos_core.py +++ b/src/client/pydaos/pydaos_core.py @@ -1,4 +1,5 @@ # (C) Copyright 2019-2024 Intel Corporation. +# (C) Copyright 2025 Hewlett Packard Enterprise Development LP # # SPDX-License-Identifier: BSD-2-Clause-Patent # @@ -73,6 +74,10 @@ class DCont(): array(name, kwargs): Create new DArray object. + + destroy() + Destroy the DDict or DArray object. This does not invalidate open objects and using those + objects after destroying it will result in undefined behavior. """ def __init__(self, pool=None, cont=None, path=None, open_mode='RW'): @@ -164,6 +169,14 @@ def array(self, name, v: list = None, cid="0"): return da + def destroy(self, name): + """ Destroy an existing Dict or Array object """ + + # Remove the entry for the container root object and destroy the kv + ret = pydaos_shim.cont_destroyobj(DAOS_MAGIC, self._hdl, name) + if ret != pydaos_shim.DER_SUCCESS: + raise PyDError("failed to destroy DAOS dict", ret) + def __str__(self): return '{}/{}'.format(self.pool, self.cont) diff --git a/src/client/pydaos/pydaos_shim.c b/src/client/pydaos/pydaos_shim.c index 4436e056f9d..370deedf1d6 100644 --- a/src/client/pydaos/pydaos_shim.c +++ b/src/client/pydaos/pydaos_shim.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2019-2024 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -286,8 +287,10 @@ __shim_handle__cont_get(PyObject *self, PyObject *args) struct open_handle *hdl; char *name; struct pydaos_df entry; - size_t size = sizeof(entry); - daos_obj_id_t oid = {0, }; + size_t size = sizeof(entry); + daos_obj_id_t oid = { + 0, + }; unsigned int otype = 0; int rc; @@ -388,6 +391,68 @@ __shim_handle__cont_newobj(PyObject *self, PyObject *args) return return_list; } +static PyObject * +__shim_handle__cont_destroyobj(PyObject *self, PyObject *args) +{ + struct open_handle *hdl; + char *name; + struct pydaos_df entry; + size_t size = sizeof(entry); + daos_obj_id_t oid = { + 0, + }; + daos_handle_t oh; + unsigned int otype = 0; + int rc; + + /* Parse arguments */ + RETURN_NULL_IF_FAILED_TO_PARSE(args, "Ks", &hdl, &name); + + /** Lookup name in root kv */ + rc = daos_kv_get(hdl->oh, DAOS_TX_NONE, 0, name, &size, &entry, NULL); + if (rc != -DER_SUCCESS) + goto out; + + /** Check if entry actually exists */ + if (size == 0) { + rc = -DER_NONEXIST; + goto out; + } + + /** If we fetched a value which isn't an entry ... we have a problem */ + if (size != sizeof(entry)) { + rc = -DER_INVAL; + goto out; + } + + oid = entry.oid; + otype = entry.otype; + + /** we do not support arrays anyway, so we would not be here */ + if (otype == PYDAOS_ARRAY) { + rc = -DER_INVAL; + goto out; + } + + /* Remove name from root kv, use conditional to fail if not exist */ + rc = daos_kv_remove(hdl->oh, DAOS_TX_NONE, DAOS_COND_PUNCH, name, NULL); + if (rc != -DER_SUCCESS) + goto out; + + rc = daos_kv_open(hdl->coh, oid, DAOS_OO_RW, &oh, NULL); + if (rc != -DER_SUCCESS) + goto out; + rc = daos_kv_destroy(oh, DAOS_TX_NONE, NULL); + if (rc != -DER_SUCCESS) { + daos_kv_close(oh, NULL); + goto out; + } + rc = daos_kv_close(oh, NULL); + +out: + return PyLong_FromLong(rc); +} + static PyObject * __shim_handle__cont_close(PyObject *self, PyObject *args) { @@ -740,10 +805,13 @@ do { \ DEFINE_OC_EXPL(EC_2P2G); /** OC_EC_2P2G1, OC_EC_2P2G2, ... */ DEFINE_OC_EXPL(EC_4P1G); /** OC_EC_4P1G1, OC_EC_4P1G2, ... */ DEFINE_OC_EXPL(EC_4P2G); /** OC_EC_4P2G1, OC_EC_4P2G2, ... */ + DEFINE_OC_EXPL(EC_4P3G); /** OC_EC_4P3G1, OC_EC_4P3G2, ... */ DEFINE_OC_EXPL(EC_8P1G); /** OC_EC_8P1G1, OC_EC_8P1G2, ... */ DEFINE_OC_EXPL(EC_8P2G); /** OC_EC_8P2G1, OC_EC_8P2G2, ... */ + DEFINE_OC_EXPL(EC_8P3G); /** OC_EC_8P3G1, OC_EC_8P3G2, ... */ DEFINE_OC_EXPL(EC_16P1G); /** OC_EC_16P1G1, OC_EC_16P1G2, ... */ DEFINE_OC_EXPL(EC_16P2G); /** OC_EC_16P2G1, OC_EC_16P2G2, ... */ + DEFINE_OC_EXPL(EC_16P3G); /** OC_EC_16P3G1, OC_EC_16P3G2, ... */ #define DEFINE_OC_INTERNAL(name)\ do { \ @@ -1393,6 +1461,7 @@ static PyMethodDef daosMethods[] = { EXPORT_PYTHON_METHOD(cont_open_by_path), EXPORT_PYTHON_METHOD(cont_get), EXPORT_PYTHON_METHOD(cont_newobj), + EXPORT_PYTHON_METHOD(cont_destroyobj), EXPORT_PYTHON_METHOD(cont_close), EXPORT_PYTHON_METHOD(cont_check), EXPORT_PYTHON_METHOD(cont_check_by_path), diff --git a/utils/node_local_test.py b/utils/node_local_test.py index 7d4b0ff4dc2..8b6bffa0056 100755 --- a/utils/node_local_test.py +++ b/utils/node_local_test.py @@ -5428,6 +5428,7 @@ def test_pydaos_kv(server, conf): print("That's not good") del kv + container.destroy('my_test_kv') del container print('Running PyDAOS container checker') From 6ec648bdda83c07b20c0fb7fafcdd3bbff46f505 Mon Sep 17 00:00:00 2001 From: Nasf-Fan Date: Tue, 9 Dec 2025 12:45:42 +0800 Subject: [PATCH 066/253] DAOS-18261 vos: save container checksum property copy in vos (#17167) DDB needs checksum property to do checksum related verification under offline mode. Since container checksum property is immutable for user after creating the container, we can save its copy in every vos shard locally. That will much simplify related logic in DDB. Signed-off-by: Fan Yong --- src/container/srv_target.c | 17 ++++++++- src/include/daos_srv/vos.h | 11 ++++++ src/vos/vos_container.c | 78 ++++++++++++++++++++++++++++++++++++++ src/vos/vos_layout.h | 16 ++++++-- 4 files changed, 117 insertions(+), 5 deletions(-) diff --git a/src/container/srv_target.c b/src/container/srv_target.c index d3bc9710632..e29c3750a7e 100644 --- a/src/container/srv_target.c +++ b/src/container/srv_target.c @@ -148,7 +148,6 @@ ds_cont_csummer_init(struct ds_cont_child *cont) /* Check again since IV fetch yield */ if (cont->sc_props_fetched) goto done; - cont->sc_props_fetched = 1; csum_val = cont_props->dcp_csum_type; if (!daos_cont_csum_prop_is_enabled(csum_val)) { @@ -162,9 +161,25 @@ ds_cont_csummer_init(struct ds_cont_child *cont) daos_contprop2hashtype(csum_val), cont_props->dcp_chunksize, cont_props->dcp_srv_verify); + if (rc != 0) + goto done; + if (dedup_only) dedup_configure_csummer(cont->sc_csummer, cont_props); } + + rc = vos_cont_save_props(cont->sc_hdl, cont_props); + if (rc != 0) { + /* + * The failure of saving checksum property copy only potentially affect ddb, but + * it is not fatal for current caller. Let's go ahead with some warning message. + */ + D_WARN("Cannot locally save container property for " DF_UUID ": " DF_RC "\n", + DP_UUID(cont->sc_uuid), DP_RC(rc)); + rc = 0; + } + cont->sc_props_fetched = 1; + done: return rc; } diff --git a/src/include/daos_srv/vos.h b/src/include/daos_srv/vos.h index f152a11f39a..75bbd598bc0 100644 --- a/src/include/daos_srv/vos.h +++ b/src/include/daos_srv/vos.h @@ -1054,6 +1054,17 @@ vos_cont_set_global_stable_epoch(daos_handle_t coh, daos_epoch_t epoch); int vos_cont_set_mod_bound(daos_handle_t coh, uint64_t epoch); +/** + * Save property for the given container. + * + * \param coh [IN] Container open handle + * \param props [IN] Pointer to container property to be saved. + * + * \return Zero on success, negative value if error. + */ +int +vos_cont_save_props(daos_handle_t coh, struct cont_props *props); + /** * Query the gap between the max allowed aggregation epoch and current HLC. * diff --git a/src/vos/vos_container.c b/src/vos/vos_container.c index ad76bb8f6fe..ef359462b68 100644 --- a/src/vos/vos_container.c +++ b/src/vos/vos_container.c @@ -1029,3 +1029,81 @@ vos_cont_set_mod_bound(daos_handle_t coh, uint64_t epoch) return 0; } + +int +vos_cont_save_props(daos_handle_t coh, struct cont_props *props) +{ + struct umem_instance *umm; + struct vos_container *cont; + struct vos_cont_ext_df *ced; + int rc = 0; + + cont = vos_hdl2cont(coh); + D_ASSERT(cont != NULL); + + umm = vos_cont2umm(cont); + ced = umem_off2ptr(umm, cont->vc_cont_df->cd_ext); + + /* Do not allow to save property against old container without extension. */ + if (ced == NULL) + D_GOTO(out, rc = -DER_NOTSUPPORTED); + + /* Currently we only save chunksize and csum_type in vos_container. Maybe more in future. */ + + if (ced->ced_chunksize == props->dcp_chunksize && ced->ced_valid_bits & VCEB_CSUM && + ((props->dcp_csum_enabled == 1 && ced->ced_csum_type == props->dcp_csum_type) || + (props->dcp_csum_enabled == 0 && ced->ced_csum_type == DAOS_PROP_CO_CSUM_OFF))) + D_GOTO(out, rc = 0); + + rc = umem_tx_begin(umm, NULL); + if (rc != 0) + goto out; + + if (ced->ced_chunksize != props->dcp_chunksize) { + rc = umem_tx_add_ptr(umm, &ced->ced_chunksize, sizeof(ced->ced_chunksize)); + if (rc != 0) + goto abort; + + ced->ced_chunksize = props->dcp_chunksize; + } + + if (!(ced->ced_valid_bits & VCEB_CSUM)) { + rc = umem_tx_add_ptr(umm, &ced->ced_valid_bits, sizeof(ced->ced_valid_bits)); + if (rc != 0) + goto abort; + + ced->ced_valid_bits |= VCEB_CSUM; + } + + if (props->dcp_csum_enabled == 1) { + if (ced->ced_csum_type != props->dcp_csum_type) { + rc = umem_tx_add_ptr(umm, &ced->ced_csum_type, sizeof(ced->ced_csum_type)); + if (rc != 0) + goto abort; + + ced->ced_csum_type = props->dcp_csum_type; + } + } else { + if (ced->ced_csum_type != DAOS_PROP_CO_CSUM_OFF) { + rc = umem_tx_add_ptr(umm, &ced->ced_csum_type, sizeof(ced->ced_csum_type)); + if (rc != 0) + goto abort; + + ced->ced_csum_type = DAOS_PROP_CO_CSUM_OFF; + } + } + +abort: + if (rc != 0) + rc = umem_tx_abort(umm, rc); + else + rc = umem_tx_commit(umm); + +out: + DL_CDEBUG(rc != 0, DLOG_ERR, DB_MGMT, rc, + "Save property (csum %s, hash_type %d, chunksize %u) for container " DF_UUID, + props->dcp_csum_enabled == 1 ? "enabled" : "disabled", props->dcp_csum_type, + props->dcp_chunksize, DP_UUID(cont->vc_id)); + + return rc; +} diff --git a/src/vos/vos_layout.h b/src/vos/vos_layout.h index aaae854327b..19335f3df6e 100644 --- a/src/vos/vos_layout.h +++ b/src/vos/vos_layout.h @@ -272,19 +272,27 @@ enum vos_io_stream { VOS_IOS_CNT }; +enum vos_cont_ext_bits { + VCEB_CSUM = (1 << 0), +}; + /* VOS container durable format extension */ struct vos_cont_ext_df { /* GC bucket extension */ - struct vos_gc_bkt_df ced_gc_bkt; + struct vos_gc_bkt_df ced_gc_bkt; + uint32_t ced_valid_bits; + uint32_t ced_padding0; /* * Any modification involved in current target (container shard) under the global * stable epoch have already been persistently stored globally. */ - uint64_t ced_global_stable_epoch; + uint64_t ced_global_stable_epoch; + uint32_t ced_csum_type; + uint32_t ced_chunksize; /* Reserved for potential new features */ - uint64_t ced_paddings[37]; + uint64_t ced_padding1[35]; /* Reserved for future extension */ - uint64_t ced_reserve; + uint64_t ced_reserve; }; /* VOS Container Value */ From 5549deba5dc21bb86c1489eec447b8d0bfcd65b6 Mon Sep 17 00:00:00 2001 From: Alexander Oganezov Date: Tue, 9 Dec 2025 14:07:53 -0800 Subject: [PATCH 067/253] DAOS-18248 cart: Handle failures of crt_rpc_priv_alloc() (#17155) - handle all errors from crt_rpc_priv_alloc() Signed-off-by: Alexander A Oganezov --- src/cart/crt_hg.c | 35 +++++++++++++---------------------- 1 file changed, 13 insertions(+), 22 deletions(-) diff --git a/src/cart/crt_hg.c b/src/cart/crt_hg.c index 62d6c171f89..cdd25909ca1 100644 --- a/src/cart/crt_hg.c +++ b/src/cart/crt_hg.c @@ -1189,33 +1189,24 @@ crt_rpc_handler_common(hg_handle_t hg_hdl) HG_Destroy(rpc_tmp.crp_hg_hdl); D_GOTO(out, hg_ret = HG_SUCCESS); } - D_ASSERT(proc != NULL); - opc = rpc_tmp.crp_req_hdr.cch_opc; - /** - * Set the opcode in the temp RPC so that it can be correctly logged. - */ + D_ASSERT(proc != NULL); + opc = rpc_tmp.crp_req_hdr.cch_opc; rpc_tmp.crp_pub.cr_opc = opc; + /* allocate rpc struct for a given opcode; in/out size will vary per opc */ rc = crt_rpc_priv_alloc(opc, &rpc_priv, false /* forward */); if (unlikely(rc != 0)) { - if (rc == -DER_UNREG) { - D_ERROR("opc: %#x, lookup failed.\n", opc); - /* - * The RPC is not registered on the server, we don't know how to - * process the RPC request, so we send a CART - * level error message to the client. - */ - crt_hg_reply_error_send(&rpc_tmp, rc); - crt_hg_unpack_cleanup(proc); - HG_Destroy(rpc_tmp.crp_hg_hdl); - D_GOTO(out, hg_ret = HG_SUCCESS); - } else if (rc == -DER_NOMEM) { - crt_hg_reply_error_send(&rpc_tmp, -DER_DOS); - crt_hg_unpack_cleanup(proc); - HG_Destroy(rpc_tmp.crp_hg_hdl); - D_GOTO(out, hg_ret = HG_SUCCESS); - } + /* set client rc to denial of service if server is out of mem */ + if (rc == -DER_NOMEM) + rc = -DER_DOS; /* don't log as we are oom already */ + else + D_ERROR("crt_rpc_priv_alloc() failed, rc: %d.\n", rc); + + crt_hg_reply_error_send(&rpc_tmp, rc); + crt_hg_unpack_cleanup(proc); + HG_Destroy(rpc_tmp.crp_hg_hdl); + D_GOTO(out, hg_ret = HG_SUCCESS); } opc_info = rpc_priv->crp_opc_info; From 85615ee7bede1e5a444be83db2bda8228107292e Mon Sep 17 00:00:00 2001 From: Jan Michalski Date: Tue, 9 Dec 2025 22:09:23 +0000 Subject: [PATCH 068/253] DAOS-18281 dlck: make --storage argument obligatory (#17206) Signed-off-by: Jan Michalski --- ci/test_files_to_stash.txt | 1 + src/utils/dlck/dlck_args_engine.c | 4 + src/utils/dlck/tests/SConscript | 24 +++++ src/utils/dlck/tests/dlck_args_ut.c | 148 ++++++++++++++++++++++++++++ utils/utest.yaml | 4 + 5 files changed, 181 insertions(+) create mode 100644 src/utils/dlck/tests/dlck_args_ut.c diff --git a/ci/test_files_to_stash.txt b/ci/test_files_to_stash.txt index feeb0e64992..88680ff14f6 100755 --- a/ci/test_files_to_stash.txt +++ b/ci/test_files_to_stash.txt @@ -43,6 +43,7 @@ build/*/*/src/bio/smd/tests/smd_ut, build/*/*/src/tests/rpc/rpc_tests, build/*/*/src/engine/tests/abt_perf, build/*/*/src/engine/tests/abt_stack, +build/*/*/src/utils/dlck/tests/dlck_args_ut, src/common/tests/btree.sh, src/control/run_go_tests.sh, src/rdb/raft_tests/raft_tests.py, diff --git a/src/utils/dlck/dlck_args_engine.c b/src/utils/dlck/dlck_args_engine.c index f80cc9926ca..ea6d4d263e3 100644 --- a/src/utils/dlck/dlck_args_engine.c +++ b/src/utils/dlck/dlck_args_engine.c @@ -37,6 +37,10 @@ args_engine_init(struct dlck_args_engine *args) static int args_engine_check(struct argp_state *state, struct dlck_args_engine *args) { + if (args->storage_path == NULL) { + RETURN_FAIL(state, EINVAL, "Storage path not provided"); + } + return 0; } diff --git a/src/utils/dlck/tests/SConscript b/src/utils/dlck/tests/SConscript index b447ca3e210..d83805c5390 100644 --- a/src/utils/dlck/tests/SConscript +++ b/src/utils/dlck/tests/SConscript @@ -31,11 +31,35 @@ def build_dlck_test_helper(henv): henv.d_test_program('dlck_test_helper', srcs, LIBS=libs) +def build_dlck_args_ut(henv): + """Build dlck_args_ut""" + henv.Append(OBJPREFIX="dlck_args_ut_") + henv.Append(CPPPATH=[Dir('../../../').srcnode()]) + henv.AppendUnique(LINKFLAGS=['-Wl,--wrap=argp_failure']) + henv.AppendUnique(RPATH_FULL=['$PREFIX/lib64/daos_srv']) + henv.require('cmocka') + + libs = [ + 'gurt', 'daos_common_pmem', 'cmocka', 'uuid' + ] + + srcs = [ + 'dlck_args_ut.c', + '../dlck_args_common.c', + '../dlck_args_engine.c', + '../dlck_args_files.c', + '../dlck_args_parse.c', + '../dlck_args.c', + ] + henv.d_test_program('dlck_args_ut', srcs, LIBS=libs) + + def scons(): """Execute build""" Import('env') build_dlck_test_helper(env.Clone()) + build_dlck_args_ut(env.Clone()) if __name__ == "SCons.Script": diff --git a/src/utils/dlck/tests/dlck_args_ut.c b/src/utils/dlck/tests/dlck_args_ut.c new file mode 100644 index 00000000000..e9bc4f79efc --- /dev/null +++ b/src/utils/dlck/tests/dlck_args_ut.c @@ -0,0 +1,148 @@ +/** + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ +#define D_LOGFAC DD_FAC(tests) + +#include +#include +#include +#include + +#include + +#include "../dlck_args.h" + +/** globals */ + +#define APP_NAME_MOCK "app_name" +#define PARSER_FAILURE EINVAL + +extern struct argp argp_common; +extern struct argp argp_file; +extern struct argp argp_engine; + +struct dlck_control Ctrl; + +argp_parser_t Argp_engine_parser_real; + +/** wrappers and mocks */ + +void +__wrap_argp_failure(const struct argp_state *__restrict __state, int __status, int __errnum, + const char *__restrict __fmt, ...) +{ + check_expected(__state); + assert_int_equal(__status, PARSER_FAILURE); + assert_int_equal(__errnum, PARSER_FAILURE); +} + +static error_t +argp_common_parser_mock(int key, char *arg, struct argp_state *state) +{ + check_expected(key); + assert_non_null(state); + assert_ptr_equal(state->input, &Ctrl.common); + + return 0; +} + +static error_t +argp_file_parser_mock(int key, char *arg, struct argp_state *state) +{ + check_expected(key); + assert_non_null(state); + assert_ptr_equal(state->input, &Ctrl.files); + + return 0; +} + +static error_t +argp_engine_parser_mock(int key, char *arg, struct argp_state *state) +{ + check_expected_ptr(key); + assert_non_null(state); + assert_ptr_equal(state->input, &Ctrl.engine); + + return 0; +} + +/** setups & teardowns */ + +static int +setup_engine_args_default(void **state_ptr) +{ + static struct dlck_args_engine args = {0}; + static struct argp_state state = {0}; + error_t ret; + + /** bind the input */ + state.input = &args; + + /** set defaults */ + ret = Argp_engine_parser_real(ARGP_KEY_INIT, NULL, &state); + assert_int_equal(ret, 0); + + *state_ptr = &state; + + return 0; +} + +/** tests */ + +/** + * Test if all the children parsers are connected properly and if each of them receives all of + * the expected special key values. + */ +static void +test_parser_children_connection(void **unused) +{ + /** special keys as they are provided for each of the parsers in order */ + int keys[] = {ARGP_KEY_INIT, ARGP_KEY_NO_ARGS, ARGP_KEY_END, ARGP_KEY_SUCCESS, + ARGP_KEY_FINI}; + + /** empty argument list */ + int argc = 1; + char *argv[] = {APP_NAME_MOCK}; + + for (int i = 0; i < ARRAY_SIZE(keys); ++i) { + expect_value(argp_common_parser_mock, key, keys[i]); + expect_value(argp_file_parser_mock, key, keys[i]); + expect_value(argp_engine_parser_mock, key, keys[i]); + } + + dlck_args_parse(argc, argv, &Ctrl); +} + +static void +test_engine_parser_END_no_storage_path_fail(void **state_ptr) +{ + struct argp_state *state = *state_ptr; + error_t ret; + + expect_value(__wrap_argp_failure, __state, state); + + ret = Argp_engine_parser_real(ARGP_KEY_END, NULL, state); + assert_int_equal(ret, PARSER_FAILURE); +} + +static const struct CMUnitTest dlck_args_tests[] = { + {"DLCK_ARGS100: parser - children connection", test_parser_children_connection, NULL, NULL}, + {"DLCK_ARGS200: engine parser + ARGP_KEY_END + no storage path", + test_engine_parser_END_no_storage_path_fail, setup_engine_args_default, NULL}, +}; + +int +main(int argc, char **argv) +{ + /** collect function pointers to real parsers */ + Argp_engine_parser_real = argp_engine.parser; + + /** overwrite real parsers with mocks */ + argp_common.parser = argp_common_parser_mock; + argp_file.parser = argp_file_parser_mock; + argp_engine.parser = argp_engine_parser_mock; + + return cmocka_run_group_tests_name("dlck_args_ut", dlck_args_tests, NULL, NULL); +} diff --git a/utils/utest.yaml b/utils/utest.yaml index 8710a1bae46..0f2f4e3131a 100644 --- a/utils/utest.yaml +++ b/utils/utest.yaml @@ -190,6 +190,10 @@ tests: - cmd: ["bin/ddb_tests"] - cmd: ["bin/ddb_ut"] +- name: dlck + base: "BUILD_DIR" + tests: + - cmd: ["src/utils/dlck/tests/dlck_args_ut"] - name: Source metadata testing gha: True memcheck: False From c88e51845d9835fe73714d2b1fe6983e22f1a894 Mon Sep 17 00:00:00 2001 From: Tom Nabarro Date: Tue, 9 Dec 2025 22:10:50 +0000 Subject: [PATCH 069/253] DAOS-18300 control: Increase code coverage for srvModule upcalls (#17233) Signed-off-by: Tom Nabarro --- src/control/server/mgmt_drpc.go | 8 +- src/control/server/mgmt_drpc_test.go | 420 +++++++++++++++++++++++---- 2 files changed, 371 insertions(+), 57 deletions(-) diff --git a/src/control/server/mgmt_drpc.go b/src/control/server/mgmt_drpc.go index 03bc64983b1..3789b0dec8c 100644 --- a/src/control/server/mgmt_drpc.go +++ b/src/control/server/mgmt_drpc.go @@ -79,20 +79,20 @@ type srvModule struct { checkerDB checker.FindingStore engines []Engine events *events.PubSub - client *control.Client + rpcClient control.UnaryInvoker msReplicas []string } // newSrvModule creates a new srv module references to the system database, // resident EngineInstances and event publish subscribe reference. -func newSrvModule(log logging.Logger, pdb poolDatabase, cdb checker.FindingStore, engines []Engine, events *events.PubSub, client *control.Client, msReplicas []string) *srvModule { +func newSrvModule(log logging.Logger, pdb poolDatabase, cdb checker.FindingStore, engines []Engine, events *events.PubSub, client control.UnaryInvoker, msReplicas []string) *srvModule { return &srvModule{ log: log, poolDB: pdb, checkerDB: cdb, engines: engines, events: events, - client: client, + rpcClient: client, msReplicas: msReplicas, } } @@ -299,7 +299,7 @@ func (mod *srvModule) handleGetSysProps(reqb []byte) ([]byte, error) { msReq.SetHostList(mod.msReplicas) msReq.SetSystem(req.Sys) - msResp, err := control.SystemGetProp(ctx, mod.client, msReq) + msResp, err := control.SystemGetProp(ctx, mod.rpcClient, msReq) if err != nil { return nil, errors.Wrap(err, "failed to get system properties from MS") } diff --git a/src/control/server/mgmt_drpc_test.go b/src/control/server/mgmt_drpc_test.go index d6d5cff2139..f2a229d6b67 100644 --- a/src/control/server/mgmt_drpc_test.go +++ b/src/control/server/mgmt_drpc_test.go @@ -13,12 +13,16 @@ import ( "testing" "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" "github.com/pkg/errors" "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/testing/protocmp" + mgmtpb "github.com/daos-stack/daos/src/control/common/proto/mgmt" srvpb "github.com/daos-stack/daos/src/control/common/proto/srv" "github.com/daos-stack/daos/src/control/common/test" "github.com/daos-stack/daos/src/control/drpc" + "github.com/daos-stack/daos/src/control/lib/control" "github.com/daos-stack/daos/src/control/lib/daos" "github.com/daos-stack/daos/src/control/lib/ranklist" "github.com/daos-stack/daos/src/control/logging" @@ -60,7 +64,7 @@ func addEngineInstances(mod *srvModule, numInstances int, log logging.Logger) { } } -func TestSrvModule_HandleNotifyReady_Invalid(t *testing.T) { +func TestSrvModule_handleNotifyReady_Invalid(t *testing.T) { log, buf := logging.NewTestLogger(t.Name()) defer test.ShowBufferOnFailure(t, buf) @@ -86,7 +90,7 @@ func TestSrvModule_HandleNotifyReady_Invalid(t *testing.T) { } } -func TestSrvModule_HandleNotifyReady_BadSockPath(t *testing.T) { +func TestSrvModule_handleNotifyReady_BadSockPath(t *testing.T) { log, buf := logging.NewTestLogger(t.Name()) defer test.ShowBufferOnFailure(t, buf) @@ -108,7 +112,7 @@ func TestSrvModule_HandleNotifyReady_BadSockPath(t *testing.T) { } } -func TestSrvModule_HandleNotifyReady_Success_Single(t *testing.T) { +func TestSrvModule_handleNotifyReady_Success_Single(t *testing.T) { log, buf := logging.NewTestLogger(t.Name()) defer test.ShowBufferOnFailure(t, buf) @@ -134,7 +138,7 @@ func TestSrvModule_HandleNotifyReady_Success_Single(t *testing.T) { waitForEngineReady(t, mod.engines[0].(*EngineInstance)) } -func TestSrvModule_HandleNotifyReady_Success_Multi(t *testing.T) { +func TestSrvModule_handleNotifyReady_Success_Multi(t *testing.T) { log, buf := logging.NewTestLogger(t.Name()) defer test.ShowBufferOnFailure(t, buf) @@ -171,7 +175,7 @@ func TestSrvModule_HandleNotifyReady_Success_Multi(t *testing.T) { } } -func TestSrvModule_HandleNotifyReady_IdxOutOfRange(t *testing.T) { +func TestSrvModule_handleNotifyReady_IdxOutOfRange(t *testing.T) { log, buf := logging.NewTestLogger(t.Name()) defer test.ShowBufferOnFailure(t, buf) @@ -204,7 +208,7 @@ func TestSrvModule_HandleNotifyReady_IdxOutOfRange(t *testing.T) { } } -func TestSrvModule_HandleClusterEvent_Invalid(t *testing.T) { +func TestSrvModule_handleClusterEvent_Invalid(t *testing.T) { log, buf := logging.NewTestLogger(t.Name()) defer test.ShowBufferOnFailure(t, buf) @@ -238,64 +242,76 @@ func getTestBytes(t *testing.T, msg proto.Message) []byte { return testBytes } -func TestSrvModule_handleGetPoolServiceRanks(t *testing.T) { - log, buf := logging.NewTestLogger(t.Name()) - defer test.ShowBufferOnFailure(t, buf) +func cmpTestResp(t *testing.T, respBytes []byte, resp, expResp proto.Message) { + t.Helper() + + if err := proto.Unmarshal(respBytes, resp); err != nil { + t.Fatal(err) + } + if diff := cmp.Diff(expResp, resp, protocmp.Transform()); diff != "" { + t.Fatalf("unexpected response (-want, +got):\n%s\n", diff) + } +} +func TestSrvModule_handleGetPoolServiceRanks(t *testing.T) { for name, tc := range map[string]struct { - reqBytes []byte + req *srvpb.GetPoolSvcReq + badReq bool testPool *system.PoolService - expResp []byte + expResp *srvpb.GetPoolSvcResp expErr error }{ "bad request bytes": { - reqBytes: []byte("bad bytes"), - expErr: drpc.UnmarshalingPayloadFailure(), + badReq: true, + expErr: drpc.UnmarshalingPayloadFailure(), }, "bad pool uuid in request": { - reqBytes: getTestBytes(t, &srvpb.GetPoolSvcReq{ + req: &srvpb.GetPoolSvcReq{ Uuid: "bad-uuid", - }), + }, expErr: errors.New("invalid pool uuid"), }, "not found": { - reqBytes: getTestBytes(t, &srvpb.GetPoolSvcReq{ + req: &srvpb.GetPoolSvcReq{ Uuid: test.MockUUID(), - }), - expResp: getTestBytes(t, &srvpb.GetPoolSvcResp{ + }, + expResp: &srvpb.GetPoolSvcResp{ Status: int32(daos.Nonexistent), - }), + }, }, "found, but not Ready": { - reqBytes: getTestBytes(t, &srvpb.GetPoolSvcReq{ + req: &srvpb.GetPoolSvcReq{ Uuid: test.MockUUID(), - }), + }, testPool: &system.PoolService{ PoolUUID: test.MockPoolUUID(), PoolLabel: "testlabel", State: system.PoolServiceStateCreating, Replicas: []ranklist.Rank{0, 1, 2}, }, - expResp: getTestBytes(t, &srvpb.GetPoolSvcResp{ + expResp: &srvpb.GetPoolSvcResp{ Status: int32(daos.Nonexistent), - }), + }, }, "success": { - reqBytes: getTestBytes(t, &srvpb.GetPoolSvcReq{ + req: &srvpb.GetPoolSvcReq{ Uuid: test.MockUUID(), - }), + }, testPool: &system.PoolService{ PoolUUID: test.MockPoolUUID(), PoolLabel: "testlabel", State: system.PoolServiceStateReady, Replicas: []ranklist.Rank{0, 1, 2}, }, - expResp: getTestBytes(t, &srvpb.GetPoolSvcResp{ + expResp: &srvpb.GetPoolSvcResp{ Svcreps: []uint32{0, 1, 2}, - }), + }, }, } { t.Run(name, func(t *testing.T) { + log, buf := logging.NewTestLogger(t.Name()) + defer test.ShowBufferOnFailure(t, buf) + ctx := test.Context(t) db := raft.MockDatabase(t, log) @@ -314,72 +330,76 @@ func TestSrvModule_handleGetPoolServiceRanks(t *testing.T) { } } - resp, err := mod.handleGetPoolServiceRanks(tc.reqBytes) + reqBytes := []byte("bad bytes") + if !tc.badReq { + reqBytes = getTestBytes(t, tc.req) + } + + respBytes, err := mod.handleGetPoolServiceRanks(reqBytes) test.CmpErr(t, tc.expErr, err) if err != nil { return } - if diff := cmp.Diff(tc.expResp, resp); diff != "" { - t.Fatalf("unexpected response (-want, +got):\n%s\n", diff) - } + cmpTestResp(t, respBytes, new(srvpb.GetPoolSvcResp), tc.expResp) }) } } func TestSrvModule_handlePoolFindByLabel(t *testing.T) { - log, buf := logging.NewTestLogger(t.Name()) - defer test.ShowBufferOnFailure(t, buf) - for name, tc := range map[string]struct { - reqBytes []byte + req *srvpb.PoolFindByLabelReq + badReq bool testPool *system.PoolService - expResp []byte + expResp *srvpb.PoolFindByLabelResp expErr error }{ "bad request bytes": { - reqBytes: []byte("bad bytes"), - expErr: drpc.UnmarshalingPayloadFailure(), + badReq: true, + expErr: drpc.UnmarshalingPayloadFailure(), }, "not found": { - reqBytes: getTestBytes(t, &srvpb.PoolFindByLabelReq{ + req: &srvpb.PoolFindByLabelReq{ Label: "testlabel", - }), - expResp: getTestBytes(t, &srvpb.PoolFindByLabelResp{ + }, + expResp: &srvpb.PoolFindByLabelResp{ Status: int32(daos.Nonexistent), - }), + }, }, "found, but not Ready": { - reqBytes: getTestBytes(t, &srvpb.PoolFindByLabelReq{ + req: &srvpb.PoolFindByLabelReq{ Label: "testlabel", - }), + }, testPool: &system.PoolService{ PoolUUID: test.MockPoolUUID(), PoolLabel: "testlabel", State: system.PoolServiceStateCreating, Replicas: []ranklist.Rank{0, 1, 2}, }, - expResp: getTestBytes(t, &srvpb.PoolFindByLabelResp{ + expResp: &srvpb.PoolFindByLabelResp{ Status: int32(daos.Nonexistent), - }), + }, }, "success": { - reqBytes: getTestBytes(t, &srvpb.PoolFindByLabelReq{ + req: &srvpb.PoolFindByLabelReq{ Label: "testlabel", - }), + }, testPool: &system.PoolService{ PoolUUID: test.MockPoolUUID(), PoolLabel: "testlabel", State: system.PoolServiceStateReady, Replicas: []ranklist.Rank{0, 1, 2}, }, - expResp: getTestBytes(t, &srvpb.PoolFindByLabelResp{ + expResp: &srvpb.PoolFindByLabelResp{ Uuid: test.MockPoolUUID().String(), Svcreps: []uint32{0, 1, 2}, - }), + }, }, } { t.Run(name, func(t *testing.T) { + log, buf := logging.NewTestLogger(t.Name()) + defer test.ShowBufferOnFailure(t, buf) + ctx := test.Context(t) db := raft.MockDatabase(t, log) @@ -398,14 +418,308 @@ func TestSrvModule_handlePoolFindByLabel(t *testing.T) { } } - resp, err := mod.handlePoolFindByLabel(tc.reqBytes) + reqBytes := []byte("bad bytes") + if !tc.badReq { + reqBytes = getTestBytes(t, tc.req) + } + + respBytes, err := mod.handlePoolFindByLabel(reqBytes) + test.CmpErr(t, tc.expErr, err) + if err != nil { + return + } + + cmpTestResp(t, respBytes, new(srvpb.PoolFindByLabelResp), tc.expResp) + }) + } +} + +func TestSrvModule_handleListPools(t *testing.T) { + for name, tc := range map[string]struct { + req *srvpb.ListPoolsReq + badReq bool + testPools []*system.PoolService + expResp *srvpb.ListPoolsResp + expErr error + }{ + "bad request bytes": { + badReq: true, + expErr: drpc.UnmarshalingPayloadFailure(), + }, + "no pools": { + req: &srvpb.ListPoolsReq{ + IncludeAll: false, + }, + expResp: &srvpb.ListPoolsResp{ + Pools: []*srvpb.ListPoolsResp_Pool{}, + }, + }, + "single pool": { + req: &srvpb.ListPoolsReq{ + IncludeAll: false, + }, + testPools: []*system.PoolService{ + { + PoolUUID: test.MockPoolUUID(1), + PoolLabel: "pool1", + State: system.PoolServiceStateReady, + Replicas: []ranklist.Rank{0, 1, 2}, + }, + }, + expResp: &srvpb.ListPoolsResp{ + Pools: []*srvpb.ListPoolsResp_Pool{ + { + Uuid: test.MockPoolUUID(1).String(), + Label: "pool1", + Svcreps: []uint32{0, 1, 2}, + }, + }, + }, + }, + "multiple pools": { + req: &srvpb.ListPoolsReq{ + IncludeAll: true, + }, + testPools: []*system.PoolService{ + { + PoolUUID: test.MockPoolUUID(1), + PoolLabel: "pool1", + State: system.PoolServiceStateReady, + Replicas: []ranklist.Rank{0, 1, 2}, + }, + { + PoolUUID: test.MockPoolUUID(2), + PoolLabel: "pool2", + State: system.PoolServiceStateCreating, + Replicas: []ranklist.Rank{3, 4, 5}, + }, + }, + expResp: &srvpb.ListPoolsResp{ + Pools: []*srvpb.ListPoolsResp_Pool{ + { + Uuid: test.MockPoolUUID(1).String(), + Label: "pool1", + Svcreps: []uint32{0, 1, 2}, + }, + { + Uuid: test.MockPoolUUID(2).String(), + Label: "pool2", + Svcreps: []uint32{3, 4, 5}, + }, + }, + }, + }, + } { + t.Run(name, func(t *testing.T) { + log, buf := logging.NewTestLogger(t.Name()) + defer test.ShowBufferOnFailure(t, buf) + + ctx := test.Context(t) + + db := raft.MockDatabase(t, log) + mod := &srvModule{ + log: log, + poolDB: db, + } + for _, pool := range tc.testPools { + lock, err := db.TakePoolLock(ctx, pool.PoolUUID) + if err != nil { + t.Fatal(err) + } + if err := db.AddPoolService(lock.InContext(ctx), pool); err != nil { + lock.Release() + t.Fatal(err) + } + lock.Release() + } + + reqBytes := []byte("bad bytes") + if !tc.badReq { + reqBytes = getTestBytes(t, tc.req) + } + + respBytes, err := mod.handleListPools(reqBytes) + test.CmpErr(t, tc.expErr, err) + if err != nil { + return + } + + resp := new(srvpb.ListPoolsResp) + if err := proto.Unmarshal(respBytes, resp); err != nil { + t.Fatal(err) + } + + if len(tc.expResp.Pools) != len(resp.Pools) { + t.Fatal("unexpected number of pools returned") + } + for _, pool := range tc.expResp.Pools { + found := false + for _, expPool := range resp.Pools { + if pool.Uuid != expPool.Uuid { + continue + } + if diff := cmp.Diff(expPool, pool, protocmp.Transform()); diff != "" { + t.Fatalf("unexpected pool in response (-want, +got):\n%s\n", diff) + } + found = true + break + } + if !found { + t.Fatalf("pool %v not found", pool) + } + } + }) + } +} + +func TestSrvModule_handleGetSysProps(t *testing.T) { + mockMSReplicas := []string{"host1:10001"} + + for name, tc := range map[string]struct { + req *mgmtpb.SystemGetPropReq + badReq bool + mic *control.MockInvokerConfig // For control-API SystemGetProp + expCtlCall *control.SystemGetPropReq + expResp *mgmtpb.SystemGetPropResp + expErr error + }{ + "bad request bytes": { + badReq: true, + expErr: drpc.UnmarshalingPayloadFailure(), + }, + "invalid system property key": { + req: &mgmtpb.SystemGetPropReq{ + Sys: "daos_server", + Keys: []string{"invalid-key"}, + }, + expErr: errors.New("invalid system property key"), + }, + "control API error": { + req: &mgmtpb.SystemGetPropReq{ + Sys: "daos_server", + Keys: []string{"self_heal"}, + }, + mic: &control.MockInvokerConfig{ + UnaryError: errors.New("control API failed"), + }, + expCtlCall: &control.SystemGetPropReq{}, + expErr: errors.New("failed to get system properties from MS"), + }, + "success with single property": { + req: &mgmtpb.SystemGetPropReq{ + Sys: "daos_server", + Keys: []string{"self_heal"}, + }, + mic: &control.MockInvokerConfig{ + UnaryResponse: control.MockMSResponse("host1:10001", nil, + &mgmtpb.SystemGetPropResp{ + Properties: map[string]string{ + "self_heal": "exclude", + }, + }), + }, + expCtlCall: &control.SystemGetPropReq{ + Keys: []daos.SystemPropertyKey{ + daos.SystemPropertySelfHeal, + }, + }, + expResp: &mgmtpb.SystemGetPropResp{ + Properties: map[string]string{ + "self_heal": "exclude", + }, + }, + }, + "success with multiple properties": { + req: &mgmtpb.SystemGetPropReq{ + Sys: "marigolds", + Keys: []string{"self_heal", "pool_scrub_thresh"}, + }, + mic: &control.MockInvokerConfig{ + UnaryResponse: control.MockMSResponse("host1:10001", nil, + &mgmtpb.SystemGetPropResp{ + Properties: map[string]string{ + "self_heal": "exclude", + "pool_scrub_thresh": "0", + }, + }), + }, + expCtlCall: &control.SystemGetPropReq{ + Keys: []daos.SystemPropertyKey{ + daos.SystemPropertySelfHeal, + daos.SystemPropertyPoolScrubThresh, + }, + }, + expResp: &mgmtpb.SystemGetPropResp{ + Properties: map[string]string{ + "self_heal": "exclude", + "pool_scrub_thresh": "0", + }, + }, + }, + "empty request returns empty response": { + req: &mgmtpb.SystemGetPropReq{ + Sys: "daos_server", + Keys: []string{}, + }, + mic: &control.MockInvokerConfig{ + UnaryResponse: control.MockMSResponse("host1:10001", nil, + &mgmtpb.SystemGetPropResp{ + Properties: map[string]string{}, + }), + }, + expCtlCall: &control.SystemGetPropReq{ + Keys: []daos.SystemPropertyKey{}, + }, + expResp: &mgmtpb.SystemGetPropResp{ + Properties: map[string]string{}, + }, + }, + } { + t.Run(name, func(t *testing.T) { + log, buf := logging.NewTestLogger(t.Name()) + defer test.ShowBufferOnFailure(t, buf) + + mi := control.NewMockInvoker(log, tc.mic) + mod := &srvModule{ + log: log, + rpcClient: mi, + msReplicas: mockMSReplicas, + } + + reqBytes := []byte("bad bytes") + if !tc.badReq { + reqBytes = getTestBytes(t, tc.req) + } + + respBytes, err := mod.handleGetSysProps(reqBytes) test.CmpErr(t, tc.expErr, err) if err != nil { return } - if diff := cmp.Diff(tc.expResp, resp); diff != "" { - t.Fatalf("unexpected response (-want, +got):\n%s\n", diff) + cmpTestResp(t, respBytes, new(mgmtpb.SystemGetPropResp), tc.expResp) + + switch mi.GetInvokeCount() { + case 0: + if tc.expCtlCall != nil { + t.Fatal("expected control API call but got none") + } + case 1: + if tc.expCtlCall == nil { + t.Fatal("unexpected control API call") + } + getPropReqSent := mi.SentReqs[0].(*control.SystemGetPropReq) + cmpOpt := cmpopts.IgnoreFields(control.SystemGetPropReq{}, + "unaryRequest", "msRequest") + if diff := cmp.Diff(tc.expCtlCall, getPropReqSent, cmpOpt); diff != "" { + t.Fatalf("unexpected control API call (-want, +got):\n%s\n", + diff) + } + test.AssertEqual(t, tc.req.Sys, getPropReqSent.Sys, + "system name mismatch") + default: + t.Fatalf("unexpected number of control API calls: %d", + mi.GetInvokeCount()) } }) } From a3fa2b871cbd205ea4f346faae1024097e7290d4 Mon Sep 17 00:00:00 2001 From: Tom Nabarro Date: Tue, 9 Dec 2025 22:17:22 +0000 Subject: [PATCH 070/253] DAOS-18223 control: Align control-plane and engine rebuild_state enums (#17189) Match enum assignments in control-plane and engine so that dmg -j/json output state IDs match the engine side assignments. Signed-off-by: Tom Nabarro --- src/control/common/proto/mgmt/pool.pb.go | 29 ++++++++++++------------ src/mgmt/pool.pb-c.c | 12 +++++----- src/mgmt/pool.pb-c.h | 20 ++++++++++++---- src/mgmt/srv_drpc.c | 14 ++++++------ src/mgmt/tests/srv_drpc_tests.c | 5 ++-- src/proto/mgmt/pool.proto | 9 ++++---- 6 files changed, 50 insertions(+), 39 deletions(-) diff --git a/src/control/common/proto/mgmt/pool.pb.go b/src/control/common/proto/mgmt/pool.pb.go index 18cf729a41d..67a85f294c6 100644 --- a/src/control/common/proto/mgmt/pool.pb.go +++ b/src/control/common/proto/mgmt/pool.pb.go @@ -131,22 +131,22 @@ func (PoolServiceState) EnumDescriptor() ([]byte, []int) { type PoolRebuildStatus_State int32 const ( - PoolRebuildStatus_IDLE PoolRebuildStatus_State = 0 - PoolRebuildStatus_DONE PoolRebuildStatus_State = 1 - PoolRebuildStatus_BUSY PoolRebuildStatus_State = 2 + PoolRebuildStatus_BUSY PoolRebuildStatus_State = 0 // DRS_IN_PROGRESS + PoolRebuildStatus_IDLE PoolRebuildStatus_State = 1 // DRS_NOT_STARTED + PoolRebuildStatus_DONE PoolRebuildStatus_State = 2 // DRS_COMPLETED ) // Enum value maps for PoolRebuildStatus_State. var ( PoolRebuildStatus_State_name = map[int32]string{ - 0: "IDLE", - 1: "DONE", - 2: "BUSY", + 0: "BUSY", + 1: "IDLE", + 2: "DONE", } PoolRebuildStatus_State_value = map[string]int32{ - "IDLE": 0, - "DONE": 1, - "BUSY": 2, + "BUSY": 0, + "IDLE": 1, + "DONE": 2, } ) @@ -1778,7 +1778,8 @@ func (x *StorageUsageStats) GetMediaType() StorageMediaType { return StorageMediaType_SCM } -// PoolRebuildStatus represents a pool's rebuild status. +// PoolRebuildStatus represents a pool's rebuild status, translates to enum daos_rebuild_state_t +// IN_PROGRESS/NOT_STARTED/COMPLETED states. type PoolRebuildStatus struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -1833,7 +1834,7 @@ func (x *PoolRebuildStatus) GetState() PoolRebuildStatus_State { if x != nil { return x.State } - return PoolRebuildStatus_IDLE + return PoolRebuildStatus_BUSY } func (x *PoolRebuildStatus) GetObjects() uint64 { @@ -3289,9 +3290,9 @@ var file_mgmt_pool_proto_rawDesc = []byte{ 0x65, 0x63, 0x74, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x04, 0x52, 0x07, 0x6f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x72, 0x65, 0x63, 0x6f, 0x72, 0x64, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x04, 0x52, 0x07, 0x72, 0x65, 0x63, 0x6f, 0x72, 0x64, 0x73, 0x22, 0x25, 0x0a, - 0x05, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x08, 0x0a, 0x04, 0x49, 0x44, 0x4c, 0x45, 0x10, 0x00, - 0x12, 0x08, 0x0a, 0x04, 0x44, 0x4f, 0x4e, 0x45, 0x10, 0x01, 0x12, 0x08, 0x0a, 0x04, 0x42, 0x55, - 0x53, 0x59, 0x10, 0x02, 0x22, 0x89, 0x07, 0x0a, 0x0d, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, + 0x05, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x08, 0x0a, 0x04, 0x42, 0x55, 0x53, 0x59, 0x10, 0x00, + 0x12, 0x08, 0x0a, 0x04, 0x49, 0x44, 0x4c, 0x45, 0x10, 0x01, 0x12, 0x08, 0x0a, 0x04, 0x44, 0x4f, + 0x4e, 0x45, 0x10, 0x02, 0x22, 0x89, 0x07, 0x0a, 0x0d, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x75, 0x75, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x75, 0x75, diff --git a/src/mgmt/pool.pb-c.c b/src/mgmt/pool.pb-c.c index c2ecb62f393..aecd13dd3f1 100644 --- a/src/mgmt/pool.pb-c.c +++ b/src/mgmt/pool.pb-c.c @@ -3329,18 +3329,18 @@ const ProtobufCMessageDescriptor mgmt__storage_usage_stats__descriptor = }; static const ProtobufCEnumValue mgmt__pool_rebuild_status__state__enum_values_by_number[3] = { - { "IDLE", "MGMT__POOL_REBUILD_STATUS__STATE__IDLE", 0 }, - { "DONE", "MGMT__POOL_REBUILD_STATUS__STATE__DONE", 1 }, - { "BUSY", "MGMT__POOL_REBUILD_STATUS__STATE__BUSY", 2 }, + { "BUSY", "MGMT__POOL_REBUILD_STATUS__STATE__BUSY", 0 }, + { "IDLE", "MGMT__POOL_REBUILD_STATUS__STATE__IDLE", 1 }, + { "DONE", "MGMT__POOL_REBUILD_STATUS__STATE__DONE", 2 }, }; static const ProtobufCIntRange mgmt__pool_rebuild_status__state__value_ranges[] = { {0, 0},{0, 3} }; static const ProtobufCEnumValueIndex mgmt__pool_rebuild_status__state__enum_values_by_name[3] = { - { "BUSY", 2 }, - { "DONE", 1 }, - { "IDLE", 0 }, + { "BUSY", 0 }, + { "DONE", 2 }, + { "IDLE", 1 }, }; const ProtobufCEnumDescriptor mgmt__pool_rebuild_status__state__descriptor = { diff --git a/src/mgmt/pool.pb-c.h b/src/mgmt/pool.pb-c.h index cdabee8b51e..fbfd62b33ae 100644 --- a/src/mgmt/pool.pb-c.h +++ b/src/mgmt/pool.pb-c.h @@ -57,9 +57,18 @@ typedef struct _Mgmt__PoolSelfHealEvalReq Mgmt__PoolSelfHealEvalReq; /* --- enums --- */ typedef enum _Mgmt__PoolRebuildStatus__State { - MGMT__POOL_REBUILD_STATUS__STATE__IDLE = 0, - MGMT__POOL_REBUILD_STATUS__STATE__DONE = 1, - MGMT__POOL_REBUILD_STATUS__STATE__BUSY = 2 + /* + * DRS_IN_PROGRESS + */ + MGMT__POOL_REBUILD_STATUS__STATE__BUSY = 0, + /* + * DRS_NOT_STARTED + */ + MGMT__POOL_REBUILD_STATUS__STATE__IDLE = 1, + /* + * DRS_COMPLETED + */ + MGMT__POOL_REBUILD_STATUS__STATE__DONE = 2 PROTOBUF_C__FORCE_ENUM_TO_BE_INT_SIZE(MGMT__POOL_REBUILD_STATUS__STATE) } Mgmt__PoolRebuildStatus__State; typedef enum _Mgmt__PoolQueryTargetInfo__TargetType { @@ -777,7 +786,8 @@ struct _Mgmt__StorageUsageStats /* - * PoolRebuildStatus represents a pool's rebuild status. + * PoolRebuildStatus represents a pool's rebuild status, translates to enum daos_rebuild_state_t + * IN_PROGRESS/NOT_STARTED/COMPLETED states. */ struct _Mgmt__PoolRebuildStatus { @@ -792,7 +802,7 @@ struct _Mgmt__PoolRebuildStatus }; #define MGMT__POOL_REBUILD_STATUS__INIT \ { PROTOBUF_C_MESSAGE_INIT (&mgmt__pool_rebuild_status__descriptor) \ - , 0, MGMT__POOL_REBUILD_STATUS__STATE__IDLE, 0, 0 } + , 0, MGMT__POOL_REBUILD_STATUS__STATE__BUSY, 0, 0 } /* diff --git a/src/mgmt/srv_drpc.c b/src/mgmt/srv_drpc.c index 568aa87755a..d71f44d1c66 100644 --- a/src/mgmt/srv_drpc.c +++ b/src/mgmt/srv_drpc.c @@ -1756,14 +1756,14 @@ pool_rebuild_status_from_info(Mgmt__PoolRebuildStatus *rebuild, if (rebuild->status == 0) { rebuild->objects = info->rs_obj_nr; rebuild->records = info->rs_rec_nr; - - if (info->rs_version == 0) - rebuild->state = MGMT__POOL_REBUILD_STATUS__STATE__IDLE; - else if (info->rs_state == DRS_COMPLETED) - rebuild->state = MGMT__POOL_REBUILD_STATUS__STATE__DONE; - else - rebuild->state = MGMT__POOL_REBUILD_STATUS__STATE__BUSY; } + + if ((info->rs_version == 0) || (info->rs_state == DRS_NOT_STARTED)) + rebuild->state = MGMT__POOL_REBUILD_STATUS__STATE__IDLE; + else if (info->rs_state == DRS_COMPLETED) + rebuild->state = MGMT__POOL_REBUILD_STATUS__STATE__DONE; + else + rebuild->state = MGMT__POOL_REBUILD_STATUS__STATE__BUSY; } static void diff --git a/src/mgmt/tests/srv_drpc_tests.c b/src/mgmt/tests/srv_drpc_tests.c index 36c6535b9a2..821f9ab5765 100644 --- a/src/mgmt/tests/srv_drpc_tests.c +++ b/src/mgmt/tests/srv_drpc_tests.c @@ -1503,6 +1503,7 @@ test_drpc_pool_query_success_rebuild_err(void **state) init_test_pool_info(&exp_info); exp_info.pi_rebuild_st.rs_version = 1; exp_info.pi_rebuild_st.rs_errno = -DER_MISC; + exp_info.pi_rebuild_st.rs_state = DRS_IN_PROGRESS; ds_mgmt_pool_query_info_out = exp_info; ds_mgmt_pool_query_mem_bytes = 11; @@ -1517,9 +1518,7 @@ test_drpc_pool_query_success_rebuild_err(void **state) ds_mgmt_drpc_pool_query(&call, &resp); - expect_query_resp_with_info(&exp_info, - MGMT__POOL_REBUILD_STATUS__STATE__IDLE, - &resp); + expect_query_resp_with_info(&exp_info, MGMT__POOL_REBUILD_STATUS__STATE__BUSY, &resp); D_FREE(call.body.data); D_FREE(resp.body.data); diff --git a/src/proto/mgmt/pool.proto b/src/proto/mgmt/pool.proto index 8a67ef1f825..bc6d041aa12 100644 --- a/src/proto/mgmt/pool.proto +++ b/src/proto/mgmt/pool.proto @@ -199,13 +199,14 @@ message StorageUsageStats { StorageMediaType media_type = 6; } -// PoolRebuildStatus represents a pool's rebuild status. +// PoolRebuildStatus represents a pool's rebuild status, translates to enum daos_rebuild_state_t +// IN_PROGRESS/NOT_STARTED/COMPLETED states. message PoolRebuildStatus { int32 status = 1; // DAOS error code enum State { - IDLE = 0; - DONE = 1; - BUSY = 2; + BUSY = 0; // DRS_IN_PROGRESS + IDLE = 1; // DRS_NOT_STARTED + DONE = 2; // DRS_COMPLETED } State state = 2; uint64 objects = 3; From 5a208edadc4514ab5c24861690b5b5d78859d8e7 Mon Sep 17 00:00:00 2001 From: Dalton Bohning Date: Tue, 9 Dec 2025 16:30:55 -0800 Subject: [PATCH 071/253] DAOS-623 cq: change assignees to reviewers in dependabot cfg (#17210) A team cannot be an assignee but can be a reviewer, which was the original intention. Signed-off-by: Dalton Bohning --- .github/dependabot.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/dependabot.yml b/.github/dependabot.yml index a67faa6f34a..a9bc4ac0020 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -22,7 +22,7 @@ updates: gha-versions: patterns: - "*" - assignees: + reviewers: - daos-stack/actions-watchers commit-message: prefix: "Doc-only: true \n" @@ -36,7 +36,7 @@ updates: gha-versions: patterns: - "*" - assignees: + reviewers: - daos-stack/actions-watchers commit-message: prefix: "Doc-only: true \n" From 10397498c83e7a5239997f4d01ada7b006c0ed3d Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Wed, 10 Dec 2025 08:41:00 +0800 Subject: [PATCH 072/253] DAOS-18263 pool: fix version compatibility issue (#17218) 2.6 client might connect to 2.8 server, resulting in rpc version being DAOS_POOL_VERSION - 1. Updated assertion to support version downgrade compatibility. Additional cleanup performed: Initialized each array item explicitly in daos_rpc_protocol_tables for better code clarity Signed-off-by: Wang Shilong --- src/include/daos/rpc.h | 44 +++++++++++++++++++++++++++++------------- src/pool/srv_pool.c | 5 +---- 2 files changed, 32 insertions(+), 17 deletions(-) diff --git a/src/include/daos/rpc.h b/src/include/daos/rpc.h index 26860fb6664..f0f967c388f 100644 --- a/src/include/daos/rpc.h +++ b/src/include/daos/rpc.h @@ -87,21 +87,39 @@ struct daos_protocol_table { }; static const struct daos_protocol_table daos_rpc_protocol_tables[] = { - { - /* Latest protocol */ - .protocol = DAOS_VERSION_PROTOCAL, - .versions = {DAOS_VOS_VERSION, DAOS_MGMT_VERSION, DAOS_POOL_VERSION, DAOS_CONT_VERSION, - DAOS_OBJ_VERSION, DAOS_REBUILD_VERSION, DAOS_RSVC_VERSION, DAOS_RDB_VERSION, - DAOS_RDBT_VERSION, DAOS_SEC_VERSION, DAOS_DTX_VERSION, DAOS_PIPELINE_VERSION, - DAOS_CHK_VERSION}, - }, + {/* Latest protocol */ + .protocol = DAOS_VERSION_PROTOCAL, + .versions = {[DAOS_VOS_MODULE] = DAOS_VOS_VERSION, + [DAOS_MGMT_MODULE] = DAOS_MGMT_VERSION, + [DAOS_POOL_MODULE] = DAOS_POOL_VERSION, + [DAOS_CONT_MODULE] = DAOS_CONT_VERSION, + [DAOS_OBJ_MODULE] = DAOS_OBJ_VERSION, + [DAOS_REBUILD_MODULE] = DAOS_REBUILD_VERSION, + [DAOS_RSVC_MODULE] = DAOS_RSVC_VERSION, + [DAOS_RDB_MODULE] = DAOS_RDB_VERSION, + [DAOS_RDBT_MODULE] = DAOS_RDBT_VERSION, + [DAOS_SEC_MODULE] = DAOS_SEC_VERSION, + [DAOS_DTX_MODULE] = DAOS_DTX_VERSION, + [DAOS_PIPELINE_MODULE] = DAOS_PIPELINE_VERSION, + [DAOS_CHK_MODULE] = DAOS_CHK_VERSION}}, /* Please update DAOS_VERSION_PROTOCOL - 1 table when rolling upgrade is supported. { - .protocol = DAOS_VERSION_PROTOCAL - 1; - .versions = {DAOS_VOS_VERSION, DAOS_MGMT_VERSION, DAOS_POOL_VERSION, DAOS_CONT_VERSION, - DAOS_OBJ_VERSION, DAOS_REBUILD_VERSION, DAOS_RSVC_VERSION, DAOS_RDB_VERSION, - DAOS_RDBT_VERSION, DAOS_SEC_VERSION, DAOS_DTX_VERSION, DAOS_PIPELINE_VERSION, - DAOS_CHK_VERSION}, + .protocol = DAOS_VERSION_PROTOCAL - 1; + .versions = { + [DAOS_VOS_MODULE] = DAOS_VOS_VERSION, + [DAOS_MGMT_MODULE] = DAOS_MGMT_VERSION, + [DAOS_POOL_MODULE] = DAOS_POOL_VERSION, + [DAOS_CONT_MODULE] = DAOS_CONT_VERSION, + [DAOS_OBJ_MODULE] = DAOS_OBJ_VERSION, + [DAOS_REBUILD_MODULE] = DAOS_REBUILD_VERSION, + [DAOS_RSVC_MODULE] = DAOS_RSVC_VERSION, + [DAOS_RDB_MODULE] = DAOS_RDB_VERSION, + [DAOS_RDBT_MODULE] = DAOS_RDBT_VERSION, + [DAOS_SEC_MODULE] = DAOS_SEC_VERSION, + [DAOS_DTX_MODULE] = DAOS_DTX_VERSION, + [DAOS_PIPELINE_MODULE] = DAOS_PIPELINE_VERSION, + [DAOS_CHK_MODULE] = DAOS_CHK_VERSION + } }, */ }; diff --git a/src/pool/srv_pool.c b/src/pool/srv_pool.c index 8ff21acf226..e015bc388bc 100644 --- a/src/pool/srv_pool.c +++ b/src/pool/srv_pool.c @@ -4738,10 +4738,7 @@ pool_disconnect_handler(crt_rpc_t *rpc, int handler_version) void ds_pool_disconnect_handler(crt_rpc_t *rpc) { - uint8_t rpc_ver = opc_get_rpc_ver(rpc->cr_opc); - - D_ASSERT(rpc_ver == DAOS_POOL_VERSION); - pool_disconnect_handler(rpc, rpc_ver); + pool_disconnect_handler(rpc, opc_get_rpc_ver(rpc->cr_opc)); } static int From 0c713109001df05bad1fce710b37e07176f64808 Mon Sep 17 00:00:00 2001 From: Niu Yawei Date: Wed, 10 Dec 2025 10:44:16 +0800 Subject: [PATCH 073/253] DAOS-18264 pool: non-NULL check for sp_map (#17216) The ds_pool::sp_map could be NULL if the pool map hasn't been populated, a non-NULL check must be performed before accessing. Signed-off-by: Niu Yawei --- src/common/pool_map.c | 4 ++++ src/pool/srv_util.c | 9 ++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/common/pool_map.c b/src/common/pool_map.c index 594a0599079..13976ab23ca 100644 --- a/src/common/pool_map.c +++ b/src/common/pool_map.c @@ -2040,6 +2040,7 @@ pool_map_find_domain(struct pool_map *map, pool_comp_type_t type, uint32_t id, struct pool_domain *tmp; int i; + D_ASSERT(map != NULL); if (pool_map_empty(map)) { D_ERROR("Uninitialized pool map\n"); return 0; @@ -2092,6 +2093,7 @@ int pool_map_find_ranks(struct pool_map *map, uint32_t id, struct pool_domain **domain_pp) { + D_ASSERT(map != NULL); return pool_map_find_domain(map, PO_COMP_TP_RANK, id, domain_pp); } @@ -2150,6 +2152,7 @@ pool_map_find_dom_by_rank(struct pool_map *map, uint32_t rank) int doms_cnt; int i; + D_ASSERT(map != NULL); doms_cnt = pool_map_find_ranks(map, PO_COMP_ID_ALL, &doms); if (doms_cnt <= 0) return NULL; @@ -2232,6 +2235,7 @@ pool_map_find_target_by_rank_idx(struct pool_map *map, uint32_t rank, { struct pool_domain *dom; + D_ASSERT(map != NULL); dom = pool_map_find_dom_by_rank(map, rank); if (dom == NULL) return 0; diff --git a/src/pool/srv_util.c b/src/pool/srv_util.c index f9faa28bae9..11e29f45bee 100644 --- a/src/pool/srv_util.c +++ b/src/pool/srv_util.c @@ -1533,6 +1533,13 @@ check_pool_targets(uuid_t pool_id, int *tgt_ids, int tgt_cnt, bool reint, nr_downout = nr_down = nr_upin = nr_up = 0; ABT_rwlock_rdlock(pool->sp_lock); + + if (pool->sp_map == NULL) { + D_ERROR(DF_UUID ": Pool map not populated\n", DP_UUID(pool_id)); + rc = -DER_UNINIT; + goto done; + } + for (i = 0; i < tgt_cnt; i++) { nr = pool_map_find_target_by_rank_idx(pool->sp_map, rank, tgt_ids[i], &target); @@ -1561,7 +1568,7 @@ check_pool_targets(uuid_t pool_id, int *tgt_ids, int tgt_cnt, bool reint, break; } } - +done: if (pool->sp_iv_ns != NULL) { *pl_rank = pool->sp_iv_ns->iv_master_rank; } else { From 6e131625ecb572d590a71f5ea537a22a10b64c10 Mon Sep 17 00:00:00 2001 From: Niu Yawei Date: Wed, 10 Dec 2025 12:45:23 +0800 Subject: [PATCH 074/253] DAOS-18012 vea: tune VEA paramters (#17011) Tune VEA parameters to accelerate space reclamation. Signed-off-by: Niu Yawei Signed-off-by: Cedric Koch-Hofer --- src/container/srv_target.c | 9 ++++++--- src/pool/srv_target.c | 4 ++-- src/vea/vea_api.c | 2 +- src/vea/vea_free.c | 7 ++++++- 4 files changed, 15 insertions(+), 7 deletions(-) diff --git a/src/container/srv_target.c b/src/container/srv_target.c index e29c3750a7e..906f852a75b 100644 --- a/src/container/srv_target.c +++ b/src/container/srv_target.c @@ -515,8 +515,11 @@ cont_aggregate_interval(struct ds_cont_child *cont, cont_aggregate_cb_t cb, DP_CONT(cont->sc_pool->spc_uuid, cont->sc_uuid), param->ap_vos_agg ? "VOS" : "EC"); } else if (sched_req_space_check(req) != SCHED_SPACE_PRESS_NONE) { - /* Don't sleep when there is space pressure */ - msecs = 0; + /* + * Introduce a small sleep interval between each round to yield CPU time + * for the flush & GC ULTs, irrespective of space pressure. DAOS-18012. + */ + msecs = 200; } if (param->ap_vos_agg) @@ -532,7 +535,7 @@ cont_aggregate_interval(struct ds_cont_child *cont, cont_aggregate_cb_t cb, * if no space pressure. */ if (ds_pool_is_rebuilding(cont->sc_pool->spc_pool) && !param->ap_vos_agg && - msecs != 0) + msecs != 200) msecs = 18000; if (msecs != 0) diff --git a/src/pool/srv_target.c b/src/pool/srv_target.c index f48ccf8228c..a3a89c56f39 100644 --- a/src/pool/srv_target.c +++ b/src/pool/srv_target.c @@ -242,9 +242,9 @@ flush_ult(void *arg) } else if (rc) { /* This pool doesn't have NVMe partition */ sleep_ms = 60000; } else if (sched_req_space_check(child->spc_flush_req) == SCHED_SPACE_PRESS_NONE) { - sleep_ms = 500; + sleep_ms = 5000; } else { - sleep_ms = (nr_flushed < nr_flush) ? 50 : 0; + sleep_ms = (nr_flushed < nr_flush) ? 1000 : 0; } if (dss_ult_exiting(child->spc_flush_req)) diff --git a/src/vea/vea_api.c b/src/vea/vea_api.c index e26bc353eb5..cbbf6e377ad 100644 --- a/src/vea/vea_api.c +++ b/src/vea/vea_api.c @@ -380,7 +380,7 @@ vea_load(struct umem_instance *umem, struct umem_tx_stage_data *txd, return rc; } -#define FLUSH_INTVL 2 /* seconds */ +#define FLUSH_INTVL 5 /* seconds */ static inline bool need_aging_flush(struct vea_space_info *vsi, bool force) diff --git a/src/vea/vea_free.c b/src/vea/vea_free.c index fafa656d76c..14681680989 100644 --- a/src/vea/vea_free.c +++ b/src/vea/vea_free.c @@ -767,7 +767,12 @@ aggregated_free(struct vea_space_info *vsi, struct vea_free_entry *vfe) return 0; } -#define EXPIRE_INTVL 3 /* seconds */ +/* + * Tune the EXPIRE_INTVL & FLUSH_INTVL with extreme care. While decreasing these + * values could be beneficial for accelerating space reclamation in overwrite mode, + * it is generally detrimental in punch mode. DAOS-18012. + */ +#define EXPIRE_INTVL 10 /* seconds */ #define UNMAP_SIZE_THRESH (1UL << 20) /* 1MB */ static int From b4ece564f4d20b76745237c525e8a3d85ac24d1b Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Wed, 10 Dec 2025 07:25:38 -0600 Subject: [PATCH 075/253] DAOS-17582 client: return err instead of assert if dep tasks > 65535 (#17235) Signed-off-by: Mohamad Chaarawi --- src/common/tse.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/common/tse.c b/src/common/tse.c index c936459e4be..e8472eef5fa 100644 --- a/src/common/tse.c +++ b/src/common/tse.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2016-2024 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -907,7 +908,12 @@ tse_task_add_dependent(tse_task_t *task, tse_task_t *dep) D_DEBUG(DB_TRACE, "Add dependent %p ---> %p\n", dep, task); D_MUTEX_LOCK(&dtp->dtp_sched->dsp_lock); - D_ASSERT(dtp->dtp_dep_cnt < UINT16_MAX); + if (dtp->dtp_dep_cnt >= UINT16_MAX || dtp->dtp_refcnt >= UINT16_MAX) { + D_ERROR("Max dependent tasks reached: %" PRIu16 "\n", dtp->dtp_dep_cnt); + D_MUTEX_UNLOCK(&dtp->dtp_sched->dsp_lock); + D_FREE(tlink); + return -DER_NOMEM; + } tse_task_addref_locked(dtp); tlink->tl_task = task; dtp->dtp_dep_cnt++; From d970bdf7dbbd012c69cc8b04801a3a024e297484 Mon Sep 17 00:00:00 2001 From: Jan Michalski Date: Wed, 10 Dec 2025 19:09:21 +0000 Subject: [PATCH 076/253] DAOS-18328 doc: update DDB readme + fix prov_mem example (#17253) Signed-off-by: Jan Michalski --- src/utils/ddb/README.md | 168 ++++++++++++++++++++++++---------------- 1 file changed, 100 insertions(+), 68 deletions(-) diff --git a/src/utils/ddb/README.md b/src/utils/ddb/README.md index d0355115005..8fbabf37cfa 100644 --- a/src/utils/ddb/README.md +++ b/src/utils/ddb/README.md @@ -24,11 +24,11 @@ The primary layers for the application are: The golang interface which handles parsing most of the user input. The github.com/jessevdk/go-flags module handles the user input from the command -line. This includes determining if the -R and -f options are passed and if a +line. This includes determining if the -f option is passed and if a path to a vos file was supplied. The github.com/desertbit/grumble module handles the execution of the commands, -whether from interactive mode or from the values of -R or -f. It also supplies +whether from interactive mode or from the -f value. It also supplies the interactive mode, managing history, input keys, etc. The golang code also calls the c code functions to initialize daos and vos. @@ -45,109 +45,141 @@ VOS api. This layer will adapt the needs of the ddb commands to the current VOS API implementation, making the VOS interaction a bit nicer for ddb. -# Help and Usage +## Help and Usage ``` $ ddb -h Usage: - ddb [OPTIONS] [] + ddb [OPTIONS] [vos_file_path] [ddb_command] [ddb_command_args...] The DAOS Debug Tool (ddb) allows a user to navigate through and modify a file in the VOS format. It offers both a command line and interactive -shell mode. If the '-R' or '-f' options are not provided, then it will -run in interactive mode. In order to modify the file, the '-w' option -must be included. The optional will be opened before running -commands supplied by '-R' or '-f' or entering interactive mode. +shell mode. If neither a single command or '-f' option is provided, then +the tool will run in interactive mode. In order to modify the VOS file, +the '-w' option must be included. If supplied, the VOS file supplied in +the first positional parameter will be opened before commands are executed. + +Many of the commands take a vos tree path. The format for this path +is [cont]/[obj]/[dkey]/[akey]/[extent]. +- cont - the full container uuid. +- obj - the object id. +- keys (akey, dkey) - there are multiple types of keys +-- string keys are simply the string value. If the size of the +key is greater than strlen(key), then the size is included at +the end of the string value. Example: 'akey{5}' is the key: akey +with a null terminator at the end. +-- number keys are formatted as '{[type]: NNN}' where type is +'uint8, uint16, uint32, or uint64'. NNN can be a decimal or +hex number. Example: '{uint32: 123456}' +-- binary keys are formatted as '{bin: 0xHHH}' where HHH is the hex +representation of the binary key. Example: '{bin: 0x1a2b}' +- extent for array values - in the format {lo-hi}. + +To make it easier to navigate the tree, indexes can be +used instead of the path part. The index is in the format [i]. Indexes +and actual path values can be used together + +Example Paths: +/3550f5df-e6b1-4415-947e-82e15cf769af/939000573846355970.0.13.1/dkey/akey/[0-1023] +[0]/[1]/[2]/[1]/[9] +/[0]/939000573846355970.0.13.1/[2]/akey{5}/[0-1023] + Application Options: - -R, --run_cmd= Execute the single command , then exit - -f, --file_cmd= Path to a file container a list of ddb commands, one - command per line, then exit. - -w, --write_mode Open the vos file in write mode. + --debug enable debug output + -w, --write_mode Open the vos file in write mode. + -f, --cmd_file= Path to a file containing a sequence of ddb commands to execute. + -p, --db_path= Path to the sys db. + -v, --version Show version Help Options: - -h, --help Show this help message + -h, --help Show this help message ``` -Interactive mode help +### Interactive mode help + ``` $ help -The DAOS Debug Tool (ddb) allows a user to navigate through and modify -a file in the VOS format. In order to modify the file, the '-w' option must -be included when opening the vos file. - -Many of the commands take a vos tree path. The format for this path -is 'cont_uuid/obj_id/dkey/akey/recx'. The keys currently only support string -keys. The recx for array values is the format {lo-hi}. To make it easier to -navigate the tree, indexes can be used instead of the path part. The index -is in the format '[i]', for example '[0]/[0]/[0]' - Commands: - clear clear the screen - clear_cmt_dtx Clear the dtx committed table - close Close the currently opened vos pool shard - commit_ilog Process the ilog - dtx_abort Mark the active dtx entry as aborted - dtx_commit Mark the active dtx entry as committed - dump_dtx Dump the dtx tables - dump_ilog Dump the ilog - dump_superblock Dump the pool superblock information - dump_value Dump a value to a file - dump_vea Dump information from the vea about free regions - exit exit the shell - help use 'help [command]' for command help - load Load a value to a vos path. - ls List containers, objects, dkeys, akeys, and values - open Opens the vos file at - prov_mem Prepare memory environment for md-on-ssd mode - rm Remove a branch of the VOS tree. - rm_ilog Remove all the ilog entries - smd_sync Restore the SMD file with backup from blob - update_vea Alter the VEA tree to mark a region as free. + clear clear the screen + exit exit the shell + help use 'help [command]' for command help + quit, q exit the shell + version Print ddb version + +smd + smd_sync Restore the SMD file with backup from blob + +vos + close Close the currently opened vos pool shard + dev_list List all devices + dev_replace Replace an old device with a new unused device + dtx_act_abort Mark the active dtx entry as aborted + dtx_act_commit Mark the active dtx entry as committed + dtx_act_discard_invalid Discard the active DTX entry's records if invalid. + dtx_aggr Aggregate DTX entries + dtx_cmt_clear Clear the dtx committed table + dtx_dump Dump the dtx tables + dtx_stat Stat on DTX entries + feature Manage vos pool features + ilog_clear Remove all the ilog entries + ilog_commit Process the ilog + ilog_dump Dump the ilog + ls List containers, objects, dkeys, akeys, and values + open Opens the vos file at + prov_mem Prepare the memory environment for md-on-ssd mode + rm Remove a branch of the VOS tree. + rm_pool Remove a vos pool. + superblock_dump Dump the pool superblock information + value_dump Dump a value + value_load Load a value to a vos path. + vea_dump Dump information from the vea about free regions + vea_update Alter the VEA tree to mark a region as free. ``` -## prov_mem Command - -The `prov_mem` command prepares the memory environment for md-on-ssd mode by setting up a tmpfs mount and recreating VOS files on it. - -### Usage +## `prov_mem` command ``` -prov_mem [Options] -``` - -### Arguments +Prepare the memory environment for md-on-ssd mode -- `` - Path to the sys db -- `` - Path to the tmpfs mountpoint +Usage: + prov_mem [flags] db_path tmpfs_mount -### Options +Args: + db_path string Path to the sys db. + tmpfs_mount string Path to the tmpfs mountpoint. -- `-s, --tmpfs_size` - Specify tmpfs size in GiB for tmpfs_mount. By default, the total size of all VOS files will be automatically calculated and used. +Flags: + -h, --help display help + -s, --tmpfs_size uint Specify tmpfs size(GiB) for mount. By default, the total size of all VOS files will be used. +``` ### Description This command is used when working with DAOS in md-on-ssd (metadata-on-SSD) mode. It: -1. Verifies the system is running in MD-on-SSD mode -2. Creates a tmpfs mount at the specified path (if not already mounted) -3. Sets up the necessary directory structure -4. Recreates VOS pool target files on the tmpfs mount +1. Verifies the system is running in MD-on-SSD mode. +2. Creates a tmpfs mount at the specified path (if not already mounted). +3. Sets up the necessary directory structure. +4. Recreates VOS pool target files on the tmpfs mount. ### Examples +**Note**: Please do not omit the first empty argument. + +**Note**: The user you use have to have access to specified resources and be able to mount(2). + ```bash # Prepare memory environment with auto-calculated tmpfs size -ddb -R "prov_mem /path/to/sys/db /mnt/tmpfs" +ddb "" prov_mem /path/to/sys/db /mnt/tmpfs # Prepare memory environment with specific tmpfs size of 16 GiB -ddb -R "prov_mem -s 16 /path/to/sys/db /mnt/tmpfs" +ddb "" prov_mem -s 16 /path/to/sys/db /mnt/tmpfs ``` ### Notes -- The tmpfs_mount path must not already be a mountpoint, otherwise the command will fail with a busy error -- If tmpfs_size is not specified, the size will be automatically calculated based on the total size of all VOS files -- This command requires the system to be configured for MD-on-SSD mode -``` \ No newline at end of file +- The `tmpfs_mount` path must not already be a mount point; otherwise, the command will fail with a "busy" error. +- If `tmpfs_size` is not specified, the size will be automatically calculated based on the total size of all VOS files. +- This command requires the system to be configured for MD-on-SSD mode. From 2ead6acd8c9bc76265e233766f1909092ddfb4ae Mon Sep 17 00:00:00 2001 From: Tom Nabarro Date: Thu, 11 Dec 2025 11:20:00 +0000 Subject: [PATCH 077/253] DAOS-18337 bio: Enable auto-faulty reaction by default (#17252) Auto-faulty reaction with default threshold values should be enabled in the engine by default and only disabled if enable is set to false in the daos_nvme.conf file. Signed-off-by: Tom Nabarro --- src/bio/bio_config.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/bio/bio_config.c b/src/bio/bio_config.c index 80485070c83..f4a33fd1f21 100644 --- a/src/bio/bio_config.c +++ b/src/bio/bio_config.c @@ -960,10 +960,7 @@ bio_read_auto_faulty_criteria(const char *nvme_conf, bool *enable, uint32_t *max if (rc != 0) { if (rc == JSON_NOT_FOUND) { rc = 0; - *enable = false; - *max_io_errs = UINT32_MAX; - *max_csum_errs = UINT32_MAX; - D_DEBUG(DB_MGMT, "bdev auto-faulty criteria disabled as not configured\n"); + D_DEBUG(DB_MGMT, "bdev auto-faulty criteria not set in config\n"); } return rc; } From 07566936800e538428e9ed228d03098752c06d66 Mon Sep 17 00:00:00 2001 From: Li Wei Date: Fri, 12 Dec 2025 14:49:02 +0900 Subject: [PATCH 078/253] DAOS-10139 rdb: Avoid reusing raft node IDs (#17178) Overview Module rdb uses ranks as Raft node IDs. If we destroy an RDB replica X on a rank, and later create a replica Y of the same RDB on the same rank, then X and Y share the same Raft node ID. That is potentially unsafe, as explained in the Jira ticket. The current patch extends the Raft node ID type with a 32-bit generation number, so that X and Y above would have different Raft node IDs. In module raft, the node ID type is raft_node_id_t: int --> long. In module rdb, the replica ID type is d_rank_t --> rdb_replica_id_t (rank << 32 | generation). The RDB layout and protocol versions need to change, as a result. The current patch handles old pools (i.e., those created by DAOS 2.6) in the old RDB layout. Upgrading from the old RDB layout to the new one is not possible yet. We define the generation of an old-layout RDB replicas as zero. The Generation Allocator We need to produce replica generation numbers when creating new replicas. There are only two such cases: - [ds_pool_svc_dist_create] When creating an initial set of N replicas, we assign 1, 2, ..., N as generation numbers to the N replicas. - [ds_rsvc_add_replicas] When creating a new replica to be added to an existing RDB, we obtain a new generation number from an allocator, initialized to N + 1 in the previous case, stored in the RDB LC (i.e., replicated via Raft along with user data). See rdb_raft_init and rdb_alloc_replica_gen. Hence, any two replicas have different generation numbers. The implementation consists of the following pieces: - [rdb_lc_replica_gen_next] stores next generation persistently in the LC. - [rdb_alloc_replica_gen] is the RDB API function. - [rdb.d_gen_lock] does concurrency control for the allocation TX. - [rdb_path_attrs] refers to the parent of the root KVS, RDB_LC_ATTRS. Because no Raft entry type allows us to access rdb_lc_replica_gen_next, we essentially extends RDB TXs through rdb_path_attrs so that they are able to access RDB_LC_ATTRS when used by module rdb itself. The Protocol Changes In module rdb, besides the implicit changes caused by raft_node_id_t, each request and reply now passes the source and destination replica IDs, so that we can check if a replica is receiving a request or reply sent to a different generation on its rank. In module ds_rsvc, we need to pass replica IDs in addition to ranks when creating replicas. And, we need to pass RDB layout versions so that when working with old pools we create new replicas with the old RDB layout. Misc Changes In ds_rsvc_start, when creating a new replica, we must ensure that any existing replica has a matching replica ID---its generation may be different. This area is still sloppy compared to the rest of the code. In ds_rsvc_dist_start, ds_rsvc_start, and rdb_create, after adding the layout version, there are too many create parameters, so we employ two new create_params types to maintain readability. Signed-off-by: Li Wei --- src/chk/chk_leader.c | 3 +- src/include/daos/rpc.h | 4 +- src/include/daos_srv/rdb.h | 79 +++- src/include/daos_srv/rsvc.h | 31 +- src/pool/srv_pool.c | 70 ++- src/rdb/raft | 2 +- src/rdb/rdb.c | 340 +++++++++----- src/rdb/rdb_internal.h | 108 +++-- src/rdb/rdb_kvs.c | 26 +- src/rdb/rdb_layout.c | 3 + src/rdb/rdb_layout.h | 39 +- src/rdb/rdb_path.c | 4 + src/rdb/rdb_raft.c | 893 +++++++++++++++++++++++------------- src/rdb/rdb_rpc.c | 51 +- src/rdb/rdb_tx.c | 12 +- src/rdb/tests/rdb_test.c | 90 +++- src/rdb/tests/rdbt.c | 3 + src/rsvc/rpc.h | 12 +- src/rsvc/srv.c | 292 ++++++++---- 19 files changed, 1452 insertions(+), 610 deletions(-) diff --git a/src/chk/chk_leader.c b/src/chk/chk_leader.c index 261924e2135..6a4e9fcdae6 100644 --- a/src/chk/chk_leader.c +++ b/src/chk/chk_leader.c @@ -1401,8 +1401,7 @@ chk_leader_start_pool_svc(struct chk_pool_rec *cpr) } rc = ds_rsvc_dist_start(DS_RSVC_CLASS_POOL, &psid, cpr->cpr_uuid, ranks, RDB_NIL_TERM, - cpr->cpr_healthy ? DS_RSVC_START : DS_RSVC_DICTATE, - false /* bootstrap */, 0 /* size */, 0 /* vos_df_version */); + cpr->cpr_healthy ? DS_RSVC_START : DS_RSVC_DICTATE, NULL); out: d_rank_list_free(ranks); diff --git a/src/include/daos/rpc.h b/src/include/daos/rpc.h index f0f967c388f..1db29cf0901 100644 --- a/src/include/daos/rpc.h +++ b/src/include/daos/rpc.h @@ -72,8 +72,8 @@ enum daos_module_id { #define DAOS_CONT_VERSION 8 #define DAOS_OBJ_VERSION 10 #define DAOS_REBUILD_VERSION 4 -#define DAOS_RSVC_VERSION 4 -#define DAOS_RDB_VERSION 4 +#define DAOS_RSVC_VERSION 5 +#define DAOS_RDB_VERSION 5 #define DAOS_RDBT_VERSION 3 #define DAOS_SEC_VERSION 1 #define DAOS_DTX_VERSION 4 diff --git a/src/include/daos_srv/rdb.h b/src/include/daos_srv/rdb.h index 9ee58895332..6a828caa65f 100644 --- a/src/include/daos_srv/rdb.h +++ b/src/include/daos_srv/rdb.h @@ -1,5 +1,6 @@ /* * (C) Copyright 2017-2023 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -114,6 +115,50 @@ */ struct rdb_storage; +/** + * Replica ID + * + * This 64-bit ID type is designed to be passed around by value, rather than + * address, even through it is a struct. + */ +typedef struct { + d_rank_t rri_rank; /**< rank */ + uint32_t rri_gen; /**< generation (see rdb_alloc_replica_gen) */ +} rdb_replica_id_t; + +#define RDB_F_RID "%u.%u" +#define RDB_P_RID(id) id.rri_rank, id.rri_gen + +static inline int +rdb_replica_id_compare(rdb_replica_id_t x, rdb_replica_id_t y) +{ + if (x.rri_rank < y.rri_rank) + return -1; + if (x.rri_rank > y.rri_rank) + return 1; + + if (x.rri_gen < y.rri_gen) + return -1; + if (x.rri_gen > y.rri_gen) + return 1; + + return 0; +} + +/* clang-format off */ +int crt_proc_rdb_replica_id_t(crt_proc_t proc, crt_proc_op_t proc_op, rdb_replica_id_t *p); +/* clang-format on */ + +/** Parameters for creating database storage */ +struct rdb_create_params { + size_t rcp_size; /**< VOS pool size in bytes */ + uint32_t rcp_vos_df_version; /**< VOS durable format version */ + uint32_t rcp_layout_version; /**< layout version (0 for default) */ + rdb_replica_id_t rcp_id; /**< self ID */ + rdb_replica_id_t *rcp_replicas; /**< replica IDs if bootstrapping */ + int rcp_replicas_len; /**< length of rcp_replicas[] */ +}; + struct rdb_cbs; /** @@ -138,15 +183,17 @@ struct rdb_clue { }; /** Database storage methods */ -int rdb_create(const char *path, const uuid_t uuid, uint64_t caller_term, size_t size, - uint32_t vos_df_version, const d_rank_list_t *replicas, struct rdb_cbs *cbs, - void *arg, struct rdb_storage **storagep); +/* clang-format off */ +int rdb_create(const char *path, const uuid_t uuid, uint64_t caller_term, + struct rdb_create_params *params, struct rdb_cbs *cbs, void *arg, + struct rdb_storage **storagep); int rdb_open(const char *path, const uuid_t uuid, uint64_t caller_term, struct rdb_cbs *cbs, void *arg, struct rdb_storage **storagep); void rdb_close(struct rdb_storage *storage); int rdb_destroy(const char *path, const uuid_t uuid); int rdb_glance(struct rdb_storage *storage, struct rdb_clue *clue); int rdb_dictate(struct rdb_storage *storage); +/* clang-format on */ /** Database (opaque) */ struct rdb; @@ -179,7 +226,14 @@ struct rdb_cbs { void (*dc_stop)(struct rdb *db, int err, void *arg); }; +/** Operation for \a rdb_modify_replicas */ +enum rdb_replica_op { + RDB_REPLICA_ADD, /**< add voting replicas */ + RDB_REPLICA_REMOVE /**< remove voting replicas */ +}; + /** Database methods */ +/* clang-format off */ int rdb_start(struct rdb_storage *storage, struct rdb **dbp); void rdb_stop(struct rdb *db, struct rdb_storage **storagep); void rdb_stop_and_close(struct rdb *db); @@ -187,12 +241,17 @@ void rdb_resign(struct rdb *db, uint64_t term); int rdb_campaign(struct rdb *db); bool rdb_is_leader(struct rdb *db, uint64_t *term); int rdb_get_leader(struct rdb *db, uint64_t *term, d_rank_t *rank); +rdb_replica_id_t rdb_get_replica_id(struct rdb *db); +int rdb_get_replicas(struct rdb *db, rdb_replica_id_t **replicas, int *replicas_len); int rdb_get_ranks(struct rdb *db, d_rank_list_t **ranksp); int rdb_get_size(struct rdb *db, size_t *sizep); -int rdb_add_replicas(struct rdb *db, d_rank_list_t *replicas); -int rdb_remove_replicas(struct rdb *db, d_rank_list_t *replicas); +uint32_t rdb_get_version(struct rdb *db); +int rdb_alloc_replica_gen(struct rdb *db, uint64_t term, uint32_t *gen_out); +int rdb_modify_replicas(struct rdb *db, enum rdb_replica_op op, rdb_replica_id_t *replicas, + int *replica_len); int rdb_ping(struct rdb *db, uint64_t caller_term); int rdb_upgrade_vos_pool(struct rdb *db, uint32_t df_version); +/* clang-format on */ /** * Path (opaque) @@ -210,16 +269,19 @@ typedef d_iov_t rdb_path_t; extern d_iov_t rdb_path_root_key; /** Path methods */ +/* clang-format off */ int rdb_path_init(rdb_path_t *path); void rdb_path_fini(rdb_path_t *path); int rdb_path_clone(const rdb_path_t *path, rdb_path_t *new_path); int rdb_path_push(rdb_path_t *path, const d_iov_t *key); +/* clang-format on */ /** * Define a d_iov_t object, named \a prefix + \a name, that represents a * constant string key. See rdb_layout.[ch] for an example of the usage of this * helper macro. */ +/* clang-format off */ #define RDB_STRING_KEY(prefix, name) \ static char prefix ## name ## _buf[] = #name; \ d_iov_t prefix ## name = { \ @@ -227,6 +289,7 @@ d_iov_t prefix ## name = { \ .iov_buf_len = sizeof(prefix ## name ## _buf), \ .iov_len = sizeof(prefix ## name ## _buf) \ } +/* clang-format on */ /** KVS classes */ enum rdb_kvs_class { @@ -261,13 +324,16 @@ struct rdb_tx { #define RDB_NIL_TERM UINT64_MAX /** TX methods */ +/* clang-format off */ int rdb_tx_begin(struct rdb *db, uint64_t term, struct rdb_tx *tx); int rdb_tx_begin_local(struct rdb_storage *storage, struct rdb_tx *tx); void rdb_tx_discard(struct rdb_tx *tx); int rdb_tx_commit(struct rdb_tx *tx); void rdb_tx_end(struct rdb_tx *tx); +/* clang-format on */ /** TX update methods */ +/* clang-format off */ int rdb_tx_create_root(struct rdb_tx *tx, const struct rdb_kvs_attr *attr); int rdb_tx_destroy_root(struct rdb_tx *tx); int rdb_tx_create_kvs(struct rdb_tx *tx, const rdb_path_t *parent, @@ -280,6 +346,7 @@ int rdb_tx_update_critical(struct rdb_tx *tx, const rdb_path_t *kvs, const d_iov_t *key, const d_iov_t *value); int rdb_tx_delete(struct rdb_tx *tx, const rdb_path_t *kvs, const d_iov_t *key); +/* clang-format on */ /** Probe operation codes */ enum rdb_probe_opc { @@ -306,6 +373,7 @@ typedef int (*rdb_iterate_cb_t)(daos_handle_t ih, d_iov_t *key, d_iov_t *val, void *arg); /** TX query methods */ +/* clang-format off */ int rdb_tx_lookup(struct rdb_tx *tx, const rdb_path_t *kvs, const d_iov_t *key, d_iov_t *value); int rdb_tx_fetch(struct rdb_tx *tx, const rdb_path_t *kvs, @@ -315,5 +383,6 @@ int rdb_tx_query_key_max(struct rdb_tx *tx, const rdb_path_t *kvs, d_iov_t *key) int rdb_tx_iterate(struct rdb_tx *tx, const rdb_path_t *kvs, bool backward, rdb_iterate_cb_t cb, void *arg); int rdb_tx_revalidate(struct rdb_tx *tx); +/* clang-format on */ #endif /* DAOS_SRV_RDB_H */ diff --git a/src/include/daos_srv/rsvc.h b/src/include/daos_srv/rsvc.h index e7f7ac4c6d2..7f66d66b329 100644 --- a/src/include/daos_srv/rsvc.h +++ b/src/include/daos_srv/rsvc.h @@ -135,29 +135,40 @@ enum ds_rsvc_start_mode { DS_RSVC_DICTATE /**< DANGEROUSLY reset and start the service (see rdb_dictate) */ }; +/* clang-format off */ int ds_rsvc_start(enum ds_rsvc_class_id class, d_iov_t *id, uuid_t db_uuid, uint64_t caller_term, - enum ds_rsvc_start_mode mode, size_t size, uint32_t vos_df_version, - d_rank_list_t *replicas, void *arg); + enum ds_rsvc_start_mode mode, struct rdb_create_params *create_params, void *arg); int ds_rsvc_stop(enum ds_rsvc_class_id class, d_iov_t *id, uint64_t caller_term, bool destroy); int ds_rsvc_stop_leader(enum ds_rsvc_class_id class, d_iov_t *id, struct rsvc_hint *hint); +/* clang-format on */ + +/** Parameters used for creating an rsvc */ +struct ds_rsvc_create_params { + bool scp_bootstrap; /**< create with an initial list of replicas */ + size_t scp_size; /**< size of each replica in bytes */ + uint32_t scp_vos_df_version; /**< version of VOS durable format */ + uint32_t scp_layout_version; /**< version of RDB layout */ + rdb_replica_id_t *scp_replicas; /**< replicas IDs */ + int scp_replicas_len; /**< length of scp_replicas[] */ +}; + +/* clang-format off */ + int ds_rsvc_dist_start(enum ds_rsvc_class_id class, d_iov_t *id, const uuid_t dbid, const d_rank_list_t *ranks, uint64_t caller_term, - enum ds_rsvc_start_mode mode, bool bootstrap, size_t size, - uint32_t vos_df_version); + enum ds_rsvc_start_mode mode, struct ds_rsvc_create_params *create_params); int ds_rsvc_dist_stop(enum ds_rsvc_class_id class, d_iov_t *id, const d_rank_list_t *ranks, d_rank_list_t *excluded, uint64_t caller_term, bool destroy); enum ds_rsvc_state ds_rsvc_get_state(struct ds_rsvc *svc); void ds_rsvc_set_state(struct ds_rsvc *svc, enum ds_rsvc_state state); -void -ds_rsvc_begin_stepping_up(struct ds_rsvc *svc); -int - ds_rsvc_end_stepping_up(struct ds_rsvc *svc, int rc_in, enum ds_rsvc_state state); +void ds_rsvc_begin_stepping_up(struct ds_rsvc *svc); +int ds_rsvc_end_stepping_up(struct ds_rsvc *svc, int rc_in, enum ds_rsvc_state state); int ds_rsvc_add_replicas_s(struct ds_rsvc *svc, d_rank_list_t *ranks, size_t size, uint32_t vos_df_version); int ds_rsvc_add_replicas(enum ds_rsvc_class_id class, d_iov_t *id, d_rank_list_t *ranks, size_t size, uint32_t vos_df_version, struct rsvc_hint *hint); -int ds_rsvc_remove_replicas_s(struct ds_rsvc *svc, d_rank_list_t *ranks); +int ds_rsvc_remove_replicas_s(struct ds_rsvc *svc, d_rank_list_t *ranks, bool destroy); int ds_rsvc_remove_replicas(enum ds_rsvc_class_id class, d_iov_t *id, d_rank_list_t *ranks, struct rsvc_hint *hint); int ds_rsvc_lookup(enum ds_rsvc_class_id class, d_iov_t *id, @@ -186,4 +197,6 @@ void ds_rsvc_request_map_dist(struct ds_rsvc *svc); void ds_rsvc_query_map_dist(struct ds_rsvc *svc, uint32_t *version, bool *idle); void ds_rsvc_wait_map_dist(struct ds_rsvc *svc); +/* clang-format on */ + #endif /* DAOS_SRV_RSVC_H */ diff --git a/src/pool/srv_pool.c b/src/pool/srv_pool.c index e015bc388bc..c31c3358914 100644 --- a/src/pool/srv_pool.c +++ b/src/pool/srv_pool.c @@ -1017,23 +1017,26 @@ ds_pool_svc_dist_create(const uuid_t pool_uuid, int ntargets, const char *group, d_rank_list_t *target_addrs, int ndomains, uint32_t *domains, daos_prop_t *prop, d_rank_list_t **svc_addrs) { - struct daos_prop_entry *svc_rf_entry; - struct pool_buf *map_buf; - uint32_t map_version = 1; - d_rank_list_t *ranks; - d_iov_t psid; - struct rsvc_client client; - struct dss_module_info *info = dss_get_module_info(); - crt_endpoint_t ep; - crt_rpc_t *rpc; - struct daos_prop_entry *lbl_ent; - struct daos_prop_entry *def_lbl_ent; - struct pool_create_out *out; - struct d_backoff_seq backoff_seq; - uuid_t pi_hdl_uuid; - uint64_t req_time = 0; - int n_attempts = 0; - int rc; + struct daos_prop_entry *svc_rf_entry; + struct pool_buf *map_buf; + uint32_t map_version = 1; + d_rank_list_t *ranks; + rdb_replica_id_t *replicas; + int i; + struct ds_rsvc_create_params create_params; + d_iov_t psid; + struct rsvc_client client; + struct dss_module_info *info = dss_get_module_info(); + crt_endpoint_t ep; + crt_rpc_t *rpc; + struct daos_prop_entry *lbl_ent; + struct daos_prop_entry *def_lbl_ent; + struct pool_create_out *out; + struct d_backoff_seq backoff_seq; + uuid_t pi_hdl_uuid; + uint64_t req_time = 0; + int n_attempts = 0; + int rc; /* Check for default label supplied via property. */ def_lbl_ent = daos_prop_entry_get(&pool_prop_default, DAOS_PROP_PO_LABEL); @@ -1063,20 +1066,37 @@ ds_pool_svc_dist_create(const uuid_t pool_uuid, int ntargets, const char *group, D_DEBUG(DB_MD, DF_UUID": creating PS: ntargets=%d ndomains=%d svc_rf="DF_U64"\n", DP_UUID(pool_uuid), ntargets, ndomains, svc_rf_entry->dpe_val); + /* Determine the ranks and IDs of the PS replicas. */ rc = select_svc_ranks(svc_rf_entry->dpe_val, map_buf, map_version, &ranks); if (rc != 0) goto out_map_buf; + D_ALLOC_ARRAY(replicas, ranks->rl_nr); + if (replicas == NULL) { + rc = -DER_NOMEM; + goto out_ranks; + } + for (i = 0; i < ranks->rl_nr; i++) { + replicas[i].rri_rank = ranks->rl_ranks[i]; + /* Allocate replica generations from 1. See rdb_raft_init. */ + replicas[i].rri_gen = i + 1; + } + + create_params.scp_bootstrap = true; + create_params.scp_size = ds_rsvc_get_md_cap(); + create_params.scp_vos_df_version = ds_pool_get_vos_df_version_default(); + create_params.scp_layout_version = 0 /* default */; + create_params.scp_replicas = replicas; + create_params.scp_replicas_len = ranks->rl_nr; d_iov_set(&psid, (void *)pool_uuid, sizeof(uuid_t)); rc = ds_rsvc_dist_start(DS_RSVC_CLASS_POOL, &psid, pool_uuid, ranks, RDB_NIL_TERM, - DS_RSVC_CREATE, true /* bootstrap */, ds_rsvc_get_md_cap(), - ds_pool_get_vos_df_version_default()); + DS_RSVC_CREATE, &create_params); if (rc != 0) - D_GOTO(out_ranks, rc); + goto out_replicas; rc = rsvc_client_init(&client, ranks); if (rc != 0) - D_GOTO(out_ranks, rc); + goto out_replicas; rc = d_backoff_seq_init(&backoff_seq, 0 /* nzeros */, 16 /* factor */, 8 /* next (ms) */, 1 << 10 /* max (ms) */); @@ -1141,6 +1161,8 @@ ds_pool_svc_dist_create(const uuid_t pool_uuid, int ntargets, const char *group, * Intentionally skip cleaning up the PS replicas. See the function * documentation above. */ +out_replicas: + D_FREE(replicas); out_ranks: d_rank_list_free(ranks); out_map_buf: @@ -1180,8 +1202,8 @@ ds_pool_svc_start(uuid_t uuid) } d_iov_set(&id, uuid, sizeof(uuid_t)); - rc = ds_rsvc_start(DS_RSVC_CLASS_POOL, &id, uuid, RDB_NIL_TERM, DS_RSVC_START, 0 /* size */, - 0 /* vos_df_version */, NULL /* replicas */, NULL /* arg */); + rc = ds_rsvc_start(DS_RSVC_CLASS_POOL, &id, uuid, RDB_NIL_TERM, DS_RSVC_START, + NULL /* create_params */, NULL /* arg */); if (rc == -DER_ALREADY) { D_DEBUG(DB_MD, DF_UUID": pool service already started\n", DP_UUID(uuid)); return 0; @@ -6963,7 +6985,7 @@ pool_svc_reconf_ult(void *varg) DP_UUID(svc->ps_uuid), DP_RC(rc)); goto out_to_add_remove; } - rc = rdb_remove_replicas(svc->ps_rsvc.s_db, tmp); + rc = ds_rsvc_remove_replicas_s(&svc->ps_rsvc, to_remove, false /* destroy */); if (rc != 0) D_ERROR(DF_UUID": failed to remove replicas: "DF_RC"\n", DP_UUID(svc->ps_uuid), DP_RC(rc)); diff --git a/src/rdb/raft b/src/rdb/raft index 12dbc1595fa..27d05255720 160000 --- a/src/rdb/raft +++ b/src/rdb/raft @@ -1 +1 @@ -Subproject commit 12dbc1595fad8b570de1e336205f994f2b0e22f5 +Subproject commit 27d0525572026d66177005506a5a22703a8fd8cf diff --git a/src/rdb/rdb.c b/src/rdb/rdb.c index bdb0872f7e6..c92adbe7b71 100644 --- a/src/rdb/rdb.c +++ b/src/rdb/rdb.c @@ -18,50 +18,49 @@ #include "rdb_internal.h" #include "rdb_layout.h" -static int rdb_open_internal(daos_handle_t pool, daos_handle_t mc, const uuid_t uuid, - uint64_t caller_term, struct rdb_cbs *cbs, void *arg, - struct rdb **dbp); +static int +rdb_open_internal(daos_handle_t pool, daos_handle_t mc, const uuid_t uuid, uint32_t layout_version, + uint64_t caller_term, struct rdb_cbs *cbs, void *arg, struct rdb **dbp); /** - * Create an RDB replica at \a path with \a uuid, \a caller_term, \a size, - * \a vos_df_version, and \a replicas, and open it with \a cbs and \a arg. + * Create an RDB replica at \a path with \a uuid, \a caller_term, and \a params, + * and open it with \a cbs and \a arg. * * \param[in] path replica path * \param[in] uuid database UUID * \param[in] caller_term caller term if not RDB_NIL_TERM (see rdb_open) - * \param[in] size replica size in bytes - * \param[in] vos_df_version version of VOS durable format - * \param[in] replicas list of replica ranks + * \param[in] params parameters for creating the replica * \param[in] cbs callbacks (not copied) * \param[in] arg argument for cbs * \param[out] storagep database storage */ int -rdb_create(const char *path, const uuid_t uuid, uint64_t caller_term, size_t size, - uint32_t vos_df_version, const d_rank_list_t *replicas, struct rdb_cbs *cbs, void *arg, +rdb_create(const char *path, const uuid_t uuid, uint64_t caller_term, + struct rdb_create_params *params, struct rdb_cbs *cbs, void *arg, struct rdb_storage **storagep) { daos_handle_t pool; daos_handle_t mc; d_iov_t value; - uint32_t version = RDB_LAYOUT_VERSION; + uint32_t version; struct rdb *db; int rc; D_DEBUG(DB_MD, - DF_UUID ": creating db %s with %u replicas: caller_term=" DF_X64 " size=" DF_U64 - " vos_df_version=%u\n", - DP_UUID(uuid), path, replicas == NULL ? 0 : replicas->rl_nr, caller_term, size, - vos_df_version); + DF_UUID ": creating db %s with %d replicas: caller_term=" DF_X64 " size=" DF_U64 + " vos_df_version=%u layout_version=%u self=" RDB_F_RID "\n", + DP_UUID(uuid), path, params->rcp_replicas_len, caller_term, params->rcp_size, + params->rcp_vos_df_version, params->rcp_layout_version, RDB_P_RID(params->rcp_id)); /* * Create and open a VOS pool. RDB pools specify VOS_POF_SMALL for * basic system memory reservation and VOS_POF_EXCL for concurrent * access protection. */ - rc = vos_pool_create(path, (unsigned char *)uuid, size, 0 /* data_sz */, 0 /* meta_sz */, + rc = vos_pool_create(path, (unsigned char *)uuid, params->rcp_size, 0 /* data_sz */, + 0 /* meta_sz */, VOS_POF_SMALL | VOS_POF_EXCL | VOS_POF_RDB | VOS_POF_EXTERNAL_CHKPT, - vos_df_version, &pool); + params->rcp_vos_df_version, &pool); if (rc != 0) goto out; ABT_thread_yield(); @@ -75,15 +74,32 @@ rdb_create(const char *path, const uuid_t uuid, uint64_t caller_term, size_t siz goto out_pool_hdl; /* Initialize the layout version. */ + version = params->rcp_layout_version; + if (version == 0) + version = RDB_LAYOUT_VERSION; d_iov_set(&value, &version, sizeof(version)); rc = rdb_mc_update(mc, RDB_MC_ATTRS, 1 /* n */, &rdb_mc_version, &value, NULL /* vtx */); if (rc != 0) goto out_mc_hdl; + /* Initialize the replica ID. */ + if (version >= RDB_LAYOUT_VERSION_REPLICA_ID) { + d_iov_set(&value, ¶ms->rcp_id, sizeof(params->rcp_id)); + rc = rdb_mc_update(mc, RDB_MC_ATTRS, 1 /* n */, &rdb_mc_replica_id, &value, + NULL /* vtx */); + if (rc != 0) { + DL_ERROR(rc, DF_UUID ": failed to initialize replica ID", DP_UUID(uuid)); + goto out_mc_hdl; + } + } + /* Initialize Raft. */ - rc = rdb_raft_init(pool, mc, replicas); - if (rc != 0) + rc = rdb_raft_init((unsigned char *)uuid, pool, mc, params->rcp_replicas, + params->rcp_replicas_len, version); + if (rc != 0) { + DL_ERROR(rc, DF_UUID ": failed to initialize Raft", DP_UUID(uuid)); goto out_mc_hdl; + } /* * Mark this replica as fully initialized by storing its UUID. @@ -94,7 +110,7 @@ rdb_create(const char *path, const uuid_t uuid, uint64_t caller_term, size_t siz if (rc != 0) goto out_mc_hdl; - rc = rdb_open_internal(pool, mc, uuid, caller_term, cbs, arg, &db); + rc = rdb_open_internal(pool, mc, uuid, version, caller_term, cbs, arg, &db); if (rc != 0) goto out_mc_hdl; @@ -129,6 +145,7 @@ rdb_destroy(const char *path, const uuid_t uuid) { int rc; + D_INFO(DF_UUID ": destroying db %s\n", DP_UUID(uuid), path); rc = vos_pool_destroy_ex(path, (unsigned char *)uuid, VOS_POF_RDB); if (rc != 0) D_ERROR(DF_UUID": failed to destroy %s: "DF_RC"\n", @@ -237,11 +254,12 @@ static void rdb_chkptd_stop(struct rdb *db); * the caller shall not close in this case. */ static int -rdb_open_internal(daos_handle_t pool, daos_handle_t mc, const uuid_t uuid, uint64_t caller_term, - struct rdb_cbs *cbs, void *arg, struct rdb **dbp) +rdb_open_internal(daos_handle_t pool, daos_handle_t mc, const uuid_t uuid, uint32_t layout_version, + uint64_t caller_term, struct rdb_cbs *cbs, void *arg, struct rdb **dbp) { struct rdb *db; int rc; + d_iov_t value; struct vos_pool_space vps; uint64_t rdb_extra_sys[DAOS_MEDIA_MAX]; @@ -260,6 +278,7 @@ rdb_open_internal(daos_handle_t pool, daos_handle_t mc, const uuid_t uuid, uint6 db->d_cbs = cbs; db->d_arg = arg; db->d_pool = pool; + db->d_version = layout_version; db->d_mc = mc; rc = ABT_mutex_create(&db->d_mutex); @@ -284,9 +303,28 @@ rdb_open_internal(daos_handle_t pool, daos_handle_t mc, const uuid_t uuid, uint6 goto err_raft_mutex; } + rc = ABT_rwlock_create(&db->d_gen_lock); + if (rc != ABT_SUCCESS) { + D_ERROR(DF_DB ": failed to create gen rwlock: %d\n", DP_DB(db), rc); + rc = dss_abterr2der(rc); + goto err_ref_cv; + } + + if (db->d_version >= RDB_LAYOUT_VERSION_REPLICA_ID) { + d_iov_set(&value, &db->d_replica_id, sizeof(db->d_replica_id)); + rc = rdb_mc_lookup(mc, RDB_MC_ATTRS, &rdb_mc_replica_id, &value); + if (rc != 0) { + DL_ERROR(rc, DF_DB ": failed to look up replica ID", DP_DB(db)); + goto err_gen_lock; + } + } else { + db->d_replica_id.rri_rank = dss_self_rank(); + db->d_replica_id.rri_gen = 0; + } + rc = rdb_chkptd_start(db); if (rc != 0) - goto err_ref_cv; + goto err_gen_lock; rc = rdb_kvs_cache_create(&db->d_kvss); if (rc != 0) @@ -339,6 +377,8 @@ rdb_open_internal(daos_handle_t pool, daos_handle_t mc, const uuid_t uuid, uint6 rdb_kvs_cache_destroy(db->d_kvss); err_chkptd: rdb_chkptd_stop(db); +err_gen_lock: + ABT_rwlock_free(&db->d_gen_lock); err_ref_cv: ABT_cond_free(&db->d_ref_cv); err_raft_mutex: @@ -453,7 +493,7 @@ rdb_open(const char *path, const uuid_t uuid, uint64_t caller_term, struct rdb_c goto err_mc; } - rc = rdb_open_internal(pool, mc, uuid, caller_term, cbs, arg, &db); + rc = rdb_open_internal(pool, mc, uuid, version, caller_term, cbs, arg, &db); if (rc != 0) goto err_mc; @@ -485,6 +525,7 @@ rdb_close(struct rdb_storage *storage) vos_cont_close(db->d_mc); vos_pool_close(db->d_pool); rdb_kvs_cache_destroy(db->d_kvss); + ABT_rwlock_free(&db->d_gen_lock); ABT_cond_free(&db->d_ref_cv); ABT_mutex_free(&db->d_raft_mutex); ABT_mutex_free(&db->d_mutex); @@ -512,15 +553,18 @@ rdb_get_use_leases(void) int rdb_glance(struct rdb_storage *storage, struct rdb_clue *clue) { - struct rdb *db = rdb_from_storage(storage); - d_iov_t value; - uint64_t term; - int vote; - uint64_t last_index = db->d_lc_record.dlr_tail - 1; - uint64_t last_term; - d_rank_list_t *replicas; - uint64_t oid_next; - int rc; + struct rdb *db = rdb_from_storage(storage); + d_iov_t value; + uint64_t term; + rdb_replica_id_t vote; + uint64_t last_index = db->d_lc_record.dlr_tail - 1; + uint64_t last_term; + struct rdb_replica_record *replicas; + int replicas_len; + d_rank_list_t *ranks; + int i; + uint64_t oid_next; + int rc; d_iov_set(&value, &term, sizeof(term)); rc = rdb_mc_lookup(db->d_mc, RDB_MC_ATTRS, &rdb_mc_term, &value); @@ -531,10 +575,11 @@ rdb_glance(struct rdb_storage *storage, struct rdb_clue *clue) goto err; } - d_iov_set(&value, &vote, sizeof(vote)); + rdb_set_mc_vote_lookup_buf(db, &vote, &value); rc = rdb_mc_lookup(db->d_mc, RDB_MC_ATTRS, &rdb_mc_vote, &value); if (rc == -DER_NONEXIST) { - vote = -1; + vote.rri_rank = -1; + vote.rri_gen = -1; } else if (rc != 0) { D_ERROR(DF_DB": failed to look up vote: "DF_RC"\n", DP_DB(db), DP_RC(rc)); goto err; @@ -556,12 +601,23 @@ rdb_glance(struct rdb_storage *storage, struct rdb_clue *clue) last_term = header.dre_term; } - rc = rdb_raft_load_replicas(db->d_lc, last_index, &replicas); + rc = rdb_raft_load_replicas(db->d_uuid, db->d_lc, last_index, db->d_version, &replicas, + &replicas_len); if (rc != 0) { D_ERROR(DF_DB": failed to load replicas at "DF_U64": "DF_RC"\n", DP_DB(db), last_index, DP_RC(rc)); goto err; } + ranks = d_rank_list_alloc(replicas_len); + if (ranks == NULL) { + D_ERROR(DF_DB ": failed to convert replicas to ranks\n", DP_DB(db)); + rc = -DER_NOMEM; + D_FREE(replicas); + goto err; + } + for (i = 0; i < replicas_len; i++) + ranks->rl_ranks[i] = replicas[i].drr_id.rri_rank; + D_FREE(replicas); d_iov_set(&value, &oid_next, sizeof(oid_next)); rc = rdb_lc_lookup(db->d_lc, last_index, RDB_LC_ATTRS, &rdb_lc_oid_next, &value); @@ -569,26 +625,22 @@ rdb_glance(struct rdb_storage *storage, struct rdb_clue *clue) oid_next = RDB_LC_OID_NEXT_INIT; } else if (rc != 0) { D_ERROR(DF_DB": failed to look up next object number: %d\n", DP_DB(db), rc); - goto err_replicas; + goto err_ranks; } - clue->bcl_term = term; - clue->bcl_vote = vote; - /* - * In the future, the self node ID might differ from the rank and need - * to be stored persistently. - */ - clue->bcl_self = dss_self_rank(); + clue->bcl_term = term; + clue->bcl_vote = vote.rri_rank; + clue->bcl_self = db->d_replica_id.rri_rank; clue->bcl_last_index = last_index; - clue->bcl_last_term = last_term; + clue->bcl_last_term = last_term; clue->bcl_base_index = db->d_lc_record.dlr_base; - clue->bcl_base_term = db->d_lc_record.dlr_base_term; - clue->bcl_replicas = replicas; - clue->bcl_oid_next = oid_next; + clue->bcl_base_term = db->d_lc_record.dlr_base_term; + clue->bcl_replicas = ranks; + clue->bcl_oid_next = oid_next; return 0; -err_replicas: - d_rank_list_free(replicas); +err_ranks: + d_rank_list_free(ranks); err: return rc; } @@ -624,7 +676,13 @@ rdb_start(struct rdb_storage *storage, struct rdb **dbp) db->d_use_leases = rdb_get_use_leases(); - D_DEBUG(DB_MD, DF_DB": started db %p: use_leases=%d\n", DP_DB(db), db, db->d_use_leases); + D_INFO(DF_DB ": started: db=%p version=%u use_leases=%d election_timeout=%d " + "request_timeout=%d lease_maintenance_grace=%d compact_thres=" DF_U64 + " ae_max_entries=%u ae_max_size=" DF_U64 "\n", + DP_DB(db), db, db->d_version, db->d_use_leases, + raft_get_election_timeout(db->d_raft), raft_get_request_timeout(db->d_raft), + raft_get_lease_maintenance_grace(db->d_raft), db->d_compact_thres, + db->d_ae_max_entries, db->d_ae_max_size); *dbp = db; return 0; } @@ -641,7 +699,7 @@ rdb_stop(struct rdb *db, struct rdb_storage **storagep) { bool deleted; - D_DEBUG(DB_MD, DF_DB": stopping db %p\n", DP_DB(db), db); + D_INFO(DF_DB ": stopping: db=%p\n", DP_DB(db), db); ABT_mutex_lock(rdb_hash_lock); deleted = d_hash_rec_delete(&rdb_hash, db->d_uuid, sizeof(uuid_t)); @@ -650,7 +708,7 @@ rdb_stop(struct rdb *db, struct rdb_storage **storagep) rdb_raft_stop(db); - D_DEBUG(DB_MD, DF_DB": stopped db %p\n", DP_DB(db), db); + D_INFO(DF_DB ": stopped: db=%p\n", DP_DB(db), db); *storagep = rdb_to_storage(db); } @@ -691,66 +749,75 @@ rdb_dictate(struct rdb_storage *storage) } /** - * Add \a replicas. + * Allocate a replica generation. * * \param[in] db database - * \param[in,out] - * replicas [in] list of replica ranks; - * [out] list of replica ranks that could not be added + * \param[in] term if not RDB_NIL_TERM, term to allocate in + * \param[out] gen_out replica generation */ int -rdb_add_replicas(struct rdb *db, d_rank_list_t *replicas) +rdb_alloc_replica_gen(struct rdb *db, uint64_t term, uint32_t *gen_out) { - int i; - int rc; + struct rdb_tx tx; + d_iov_t value; + uint32_t next; + int rc; + + if (db->d_version < RDB_LAYOUT_VERSION_REPLICA_ID) { + D_DEBUG(DB_MD, DF_DB ": zero for old layout\n", DP_DB(db)); + *gen_out = 0; + rc = 0; + goto out; + } - D_DEBUG(DB_MD, DF_DB": Adding %d replicas\n", - DP_DB(db), replicas->rl_nr); + rc = rdb_tx_begin(db, term, &tx); + if (rc != 0) + goto out; + ABT_rwlock_wrlock(db->d_gen_lock); - ABT_mutex_lock(db->d_raft_mutex); + d_iov_set(&value, &next, sizeof(next)); + rc = rdb_tx_lookup(&tx, &rdb_path_attrs, &rdb_lc_replica_gen_next, &value); + if (rc != 0) + goto out_lock; - rc = rdb_raft_wait_applied(db, db->d_debut, raft_get_current_term(db->d_raft)); - if (rc != 0) { - ABT_mutex_unlock(db->d_raft_mutex); - return rc; - } + next++; - rc = -DER_INVAL; - for (i = 0; i < replicas->rl_nr; ++i) { - rc = rdb_raft_add_replica(db, replicas->rl_ranks[i]); - if (rc != 0) { - D_ERROR(DF_DB": failed to add rank %u: "DF_RC"\n", DP_DB(db), - replicas->rl_ranks[i], DP_RC(rc)); - break; - } - } + rc = rdb_tx_update_critical(&tx, &rdb_path_attrs, &rdb_lc_replica_gen_next, &value); + if (rc != 0) + goto out_lock; - ABT_mutex_unlock(db->d_raft_mutex); + rc = rdb_tx_commit(&tx); - /* Update list to only contain ranks which could not be added. */ - replicas->rl_nr -= i; - if (replicas->rl_nr > 0 && i > 0) - memmove(&replicas->rl_ranks[0], &replicas->rl_ranks[i], - replicas->rl_nr * sizeof(d_rank_t)); +out_lock: + ABT_rwlock_unlock(db->d_gen_lock); + rdb_tx_end(&tx); + if (rc != 0) + goto out; + + D_INFO(DF_DB ": updated next replica generation to %u\n", DP_DB(db), next); + *gen_out = next - 1; +out: return rc; } /** - * Remove \a replicas. + * Modify \a replicas. * - * \param[in] db database - * \param[in,out] - * replicas [in] list of replica ranks; - * [out] list of replica ranks that could not be removed + * \param[in] db database + * \param[in] op operation to perform + * \param[in,out] replicas [in] list of replica ranks; + * [out] list of replica ranks that could not be modified + * \param[in,out] replicas_len length of \a replicas; */ int -rdb_remove_replicas(struct rdb *db, d_rank_list_t *replicas) +rdb_modify_replicas(struct rdb *db, enum rdb_replica_op op, rdb_replica_id_t *replicas, + int *replicas_len) { - int i; - int rc; + raft_logtype_e type; + int i; + int rc; - D_DEBUG(DB_MD, DF_DB": Removing %d replicas\n", - DP_DB(db), replicas->rl_nr); + D_DEBUG(DB_MD, DF_DB ": op=%d replicas=%d\n", DP_DB(db), op, *replicas_len); ABT_mutex_lock(db->d_raft_mutex); @@ -761,22 +828,33 @@ rdb_remove_replicas(struct rdb *db, d_rank_list_t *replicas) } rc = -DER_INVAL; - for (i = 0; i < replicas->rl_nr; ++i) { - rc = rdb_raft_remove_replica(db, replicas->rl_ranks[i]); + switch (op) { + case RDB_REPLICA_ADD: + type = RAFT_LOGTYPE_ADD_NODE; + break; + case RDB_REPLICA_REMOVE: + type = RAFT_LOGTYPE_REMOVE_NODE; + break; + default: + D_ASSERTF(0, "invalid op %d\n", op); + } + for (i = 0; i < *replicas_len; ++i) { + rc = rdb_raft_append_apply_cfg(db, type, replicas[i]); if (rc != 0) { - D_ERROR(DF_DB": failed to remove rank %u: "DF_RC"\n", DP_DB(db), - replicas->rl_ranks[i], DP_RC(rc)); + DL_ERROR(rc, DF_DB ": failed to do op %d on replica " RDB_F_RID, DP_DB(db), + op, RDB_P_RID(replicas[i])); break; } } ABT_mutex_unlock(db->d_raft_mutex); - /* Update list to only contain ranks which could not be removed. */ - replicas->rl_nr -= i; - if (replicas->rl_nr > 0 && i > 0) - memmove(&replicas->rl_ranks[0], &replicas->rl_ranks[i], - replicas->rl_nr * sizeof(d_rank_t)); + /* Update list to only contain replicas which could not be modified. */ + if (i > 0) { + *replicas_len -= i; + if (*replicas_len > 0) + memmove(&replicas[0], &replicas[i], *replicas_len * sizeof(replicas[0])); + } return rc; } @@ -856,8 +934,7 @@ rdb_is_leader(struct rdb *db, uint64_t *term) int rdb_get_leader(struct rdb *db, uint64_t *term, d_rank_t *rank) { - raft_node_t *node; - struct rdb_raft_node *dnode; + raft_node_t *node; ABT_mutex_lock(db->d_raft_mutex); node = raft_get_current_leader_node(db->d_raft); @@ -865,15 +942,41 @@ rdb_get_leader(struct rdb *db, uint64_t *term, d_rank_t *rank) ABT_mutex_unlock(db->d_raft_mutex); return -DER_NONEXIST; } - dnode = raft_node_get_udata(node); - D_ASSERT(dnode != NULL); *term = raft_get_current_term(db->d_raft); - *rank = dnode->dn_rank; + *rank = rdb_replica_id_decode(raft_node_get_id(node)).rri_rank; ABT_mutex_unlock(db->d_raft_mutex); return 0; } +rdb_replica_id_t +rdb_get_replica_id(struct rdb *db) +{ + return db->d_replica_id; +} + +int +rdb_get_replicas(struct rdb *db, rdb_replica_id_t **replicas, int *replicas_len) +{ + return rdb_raft_get_replicas(db, replicas, replicas_len); +} + +static d_rank_list_t * +rdb_replica_id_to_rank_list(rdb_replica_id_t *replicas, int replicas_len) +{ + d_rank_list_t *ranks; + int i; + + ranks = d_rank_list_alloc(replicas_len); + if (ranks == NULL) + return NULL; + + for (i = 0; i < replicas_len; i++) + ranks->rl_ranks[i] = replicas[i].rri_rank; + + return ranks; +} + /** * Get the list of replica ranks. Callers are responsible for * d_rank_list_free(*ranksp). @@ -884,7 +987,22 @@ rdb_get_leader(struct rdb *db, uint64_t *term, d_rank_t *rank) int rdb_get_ranks(struct rdb *db, d_rank_list_t **ranksp) { - return rdb_raft_get_ranks(db, ranksp); + rdb_replica_id_t *replicas; + int replicas_len; + d_rank_list_t *ranks; + int rc; + + rc = rdb_get_replicas(db, &replicas, &replicas_len); + if (rc != 0) + return rc; + + ranks = rdb_replica_id_to_rank_list(replicas, replicas_len); + D_FREE(replicas); + if (ranks == NULL) + return -DER_NOMEM; + + *ranksp = ranks; + return 0; } int @@ -905,6 +1023,12 @@ rdb_get_size(struct rdb *db, uint64_t *sizep) return rc; } +uint32_t +rdb_get_version(struct rdb *db) +{ + return db->d_version; +} + /** Implementation of the RDB pool checkpoint ULT. The ULT * is only active if DAOS is using MD on SSD. */ diff --git a/src/rdb/rdb_internal.h b/src/rdb/rdb_internal.h index 57f9b4ee3a0..cb33675128b 100644 --- a/src/rdb/rdb_internal.h +++ b/src/rdb/rdb_internal.h @@ -78,6 +78,7 @@ struct rdb { /* General fields */ d_list_t d_entry; /* in rdb_hash */ uuid_t d_uuid; /* of database */ + rdb_replica_id_t d_replica_id; /* of this replica */ ABT_mutex d_mutex; /* d_replies, d_replies_cv */ int d_ref; /* of callers and RPCs */ ABT_cond d_ref_cv; /* for d_ref decrements */ @@ -85,6 +86,7 @@ struct rdb { void *d_arg; /* for d_cbs callbacks */ struct daos_lru_cache *d_kvss; /* rdb_kvs cache */ daos_handle_t d_pool; /* VOS pool */ + uint32_t d_version; /* of DB layout */ struct rdb_chkpt_record d_chkpt_record; /* pool checkpoint information */ ABT_thread d_chkptd; /* thread handle for pool checkpoint daemon */ ABT_mutex d_chkpt_mutex; /* mutex for checkpoint synchronization */ @@ -94,6 +96,7 @@ struct rdb { uint64_t d_nospc_ts; /* last time commit observed low/no space (usec) */ bool d_new; /* for skipping lease recovery */ bool d_use_leases; /* when verifying leadership */ + ABT_rwlock d_gen_lock; /* for rdb_lc_replica_gen_next */ /* rdb_raft fields */ raft_server_t *d_raft; @@ -131,21 +134,8 @@ struct rdb { #define RDB_NOAPPEND_FREE_SPACE (1ULL << 22) #define RDB_CRITICAL_FREE_SPACE (1ULL << 14) -/* Current rank */ -#define DF_RANK "%u" -static inline d_rank_t -DP_RANK(void) -{ - d_rank_t rank; - int rc; - - rc = crt_group_rank(NULL, &rank); - D_ASSERTF(rc == 0, "%d\n", rc); - return rank; -} - -#define DF_DB DF_UUID"["DF_RANK"]" -#define DP_DB(db) DP_UUID((db)->d_uuid), DP_RANK() +#define DF_DB DF_UUID "[" RDB_F_RID "]" +#define DP_DB(db) DP_UUID((db)->d_uuid), RDB_P_RID((db)->d_replica_id) /* Number of "base" references that the rdb_stop() path expects to remain */ #define RDB_BASE_REFS 1 @@ -158,6 +148,24 @@ struct rdb *rdb_lookup(const uuid_t uuid); /* rdb_raft.c *****************************************************************/ +D_CASSERT(sizeof(raft_node_id_t) == sizeof(uint64_t)); + +static inline rdb_replica_id_t +rdb_replica_id_decode(raft_node_id_t raft_id) +{ + rdb_replica_id_t id; + + id.rri_rank = (uint64_t)raft_id >> 32; + id.rri_gen = raft_id & 0xffffffff; + return id; +} + +static inline raft_node_id_t +rdb_replica_id_encode(rdb_replica_id_t id) +{ + return (uint64_t)id.rri_rank << 32 | id.rri_gen; +} + /* * Per-raft_node_t INSTALLSNAPSHOT state * @@ -179,9 +187,11 @@ struct rdb_raft_node { struct rdb_raft_is dn_is; }; +/* clang-format off */ void rdb_raft_module_init(void); void rdb_raft_module_fini(void); -int rdb_raft_init(daos_handle_t pool, daos_handle_t mc, const d_rank_list_t *replicas); +int rdb_raft_init(uuid_t db_uuid, daos_handle_t pool, daos_handle_t mc, rdb_replica_id_t *replicas, + int replicas_len, uint32_t layout_version); int rdb_raft_open(struct rdb *db, uint64_t caller_term); int rdb_raft_start(struct rdb *db); void rdb_raft_stop(struct rdb *db); @@ -191,19 +201,21 @@ void rdb_raft_resign(struct rdb *db, uint64_t term); int rdb_raft_campaign(struct rdb *db); int rdb_raft_ping(struct rdb *db, uint64_t caller_term); int rdb_raft_verify_leadership(struct rdb *db); -int rdb_raft_load_replicas(daos_handle_t lc, uint64_t index, d_rank_list_t **replicas); -int rdb_raft_add_replica(struct rdb *db, d_rank_t rank); -int rdb_raft_remove_replica(struct rdb *db, d_rank_t rank); +int rdb_raft_load_replicas(uuid_t db_uuid, daos_handle_t lc, uint64_t index, + uint32_t layout_version, struct rdb_replica_record **replicas_out, + int *replicas_len_out); +int rdb_raft_append_apply_cfg(struct rdb *db, raft_logtype_e type, rdb_replica_id_t id); int rdb_raft_append_apply(struct rdb *db, void *entry, size_t size, void *result); int rdb_raft_wait_applied(struct rdb *db, uint64_t index, uint64_t term); -int rdb_raft_get_ranks(struct rdb *db, d_rank_list_t **ranksp); +int rdb_raft_get_replicas(struct rdb *db, rdb_replica_id_t **replicas_out, int *replicas_len_out); void rdb_requestvote_handler(crt_rpc_t *rpc); void rdb_appendentries_handler(crt_rpc_t *rpc); void rdb_installsnapshot_handler(crt_rpc_t *rpc); void rdb_raft_process_reply(struct rdb *db, crt_rpc_t *rpc); void rdb_raft_free_request(struct rdb *db, crt_rpc_t *rpc); int rdb_raft_trigger_compaction(struct rdb *db, bool compact_all, uint64_t *idx); +/* clang-format on */ /* rdb_rpc.c ******************************************************************/ @@ -231,15 +243,22 @@ enum rdb_operation { RDB_PROTO_SRV_RPC_LIST }; extern struct crt_proto_format rdb_proto_fmt; +/* clang-format off */ #define DAOS_ISEQ_RDB_OP /* input fields */ \ - ((uuid_t) (ri_uuid) CRT_VAR) + ((uuid_t) (ri_uuid) CRT_VAR) \ + ((rdb_replica_id_t) (ri_from) CRT_VAR) \ + ((rdb_replica_id_t) (ri_to) CRT_VAR) #define DAOS_OSEQ_RDB_OP /* output fields */ \ ((int32_t) (ro_rc) CRT_VAR) \ - ((uint32_t) (ro_padding) CRT_VAR) + ((uint32_t) (ro_padding) CRT_VAR) \ + ((rdb_replica_id_t) (ro_from) CRT_VAR) \ + ((rdb_replica_id_t) (ro_to) CRT_VAR) +/* clang-format on */ CRT_RPC_DECLARE(rdb_op, DAOS_ISEQ_RDB_OP, DAOS_OSEQ_RDB_OP) +/* clang-format off */ #define DAOS_ISEQ_RDB_REQUESTVOTE /* input fields */ \ ((struct rdb_op_in) (rvi_op) CRT_VAR) \ ((msg_requestvote_t) (rvi_msg) CRT_RAW) @@ -247,10 +266,12 @@ CRT_RPC_DECLARE(rdb_op, DAOS_ISEQ_RDB_OP, DAOS_OSEQ_RDB_OP) #define DAOS_OSEQ_RDB_REQUESTVOTE /* output fields */ \ ((struct rdb_op_out) (rvo_op) CRT_VAR) \ ((msg_requestvote_response_t) (rvo_msg) CRT_VAR) +/* clang-format on */ CRT_RPC_DECLARE(rdb_requestvote, DAOS_ISEQ_RDB_REQUESTVOTE, DAOS_OSEQ_RDB_REQUESTVOTE) +/* clang-format off */ #define DAOS_ISEQ_RDB_APPENDENTRIES /* input fields */ \ ((struct rdb_op_in) (aei_op) CRT_VAR) \ ((msg_appendentries_t) (aei_msg) CRT_VAR) @@ -258,6 +279,7 @@ CRT_RPC_DECLARE(rdb_requestvote, DAOS_ISEQ_RDB_REQUESTVOTE, #define DAOS_OSEQ_RDB_APPENDENTRIES /* output fields */ \ ((struct rdb_op_out) (aeo_op) CRT_VAR) \ ((msg_appendentries_response_t) (aeo_msg) CRT_RAW) +/* clang-format on */ CRT_RPC_DECLARE(rdb_appendentries, DAOS_ISEQ_RDB_APPENDENTRIES, DAOS_OSEQ_RDB_APPENDENTRIES) @@ -267,6 +289,7 @@ struct rdb_local { d_iov_t rl_data_iov; /* isi_data buffer */ }; +/* clang-format off */ #define DAOS_ISEQ_RDB_INSTALLSNAPSHOT /* input fields */ \ ((struct rdb_op_in) (isi_op) CRT_VAR) \ ((msg_installsnapshot_t) (isi_msg) CRT_VAR) \ @@ -290,23 +313,26 @@ struct rdb_local { ((uint64_t) (iso_seq) CRT_VAR) \ /* last anchor */ \ ((struct rdb_anchor) (iso_anchor) CRT_RAW) +/* clang-format on */ CRT_RPC_DECLARE(rdb_installsnapshot, DAOS_ISEQ_RDB_INSTALLSNAPSHOT, DAOS_OSEQ_RDB_INSTALLSNAPSHOT) -int rdb_create_raft_rpc(crt_opcode_t opc, raft_node_t *node, crt_rpc_t **rpc); +/* clang-format off */ +int rdb_create_raft_rpc(struct rdb *db, crt_opcode_t opc, raft_node_t *node, crt_rpc_t **rpc); int rdb_send_raft_rpc(crt_rpc_t *rpc, struct rdb *db); int rdb_abort_raft_rpcs(struct rdb *db); void rdb_recvd(void *arg); +/* clang-format on */ /* rdb_kvs.c ******************************************************************/ /* KVS cache entry */ struct rdb_kvs { - struct daos_llink de_entry; /* in LRU */ - rdb_path_t de_path; - rdb_oid_t de_object; - uint8_t de_buf[]; /* for de_path */ + struct daos_llink de_entry; /* in LRU (private) */ + rdb_path_t de_path; + rdb_oid_t de_object; + uint8_t de_buf[]; /* for de_path */ }; int rdb_kvs_cache_create(struct daos_lru_cache **cache); @@ -319,6 +345,14 @@ void rdb_kvs_evict(struct rdb *db, struct rdb_kvs *kvs); /* rdb_path.c *****************************************************************/ +extern rdb_path_t rdb_path_attrs; + +static inline bool +rdb_path_is_attrs(const rdb_path_t *path) +{ + return path->iov_len == 0; +} + int rdb_path_clone(const rdb_path_t *path, rdb_path_t *new_path); typedef int (*rdb_path_iterate_cb_t)(d_iov_t *key, void *arg); int rdb_path_iterate(const rdb_path_t *path, rdb_path_iterate_cb_t cb, @@ -489,6 +523,26 @@ rdb_lc_iterate(daos_handle_t lc, uint64_t index, rdb_oid_t oid, bool backward, return rdb_vos_iterate(lc, index, oid, backward, cb, arg); } +static inline void +rdb_set_mc_vote_lookup_buf(struct rdb *db, rdb_replica_id_t *vote, d_iov_t *value) +{ + if (db->d_version < RDB_LAYOUT_VERSION_REPLICA_ID) { + d_iov_set(value, &vote->rri_rank, sizeof(vote->rri_rank)); + vote->rri_gen = 0; + } else { + d_iov_set(value, vote, sizeof(*vote)); + } +} + +static inline void +rdb_set_mc_vote_update_buf(struct rdb *db, rdb_replica_id_t *vote, d_iov_t *value) +{ + if (db->d_version < RDB_LAYOUT_VERSION_REPLICA_ID) + d_iov_set(value, &vote->rri_rank, sizeof(vote->rri_rank)); + else + d_iov_set(value, vote, sizeof(*vote)); +} + int rdb_scm_left(struct rdb *db, daos_size_t *scm_left_outp); int diff --git a/src/rdb/rdb_kvs.c b/src/rdb/rdb_kvs.c index f07cd4de877..c4619ab1179 100644 --- a/src/rdb/rdb_kvs.c +++ b/src/rdb/rdb_kvs.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2017-2022 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -18,6 +19,14 @@ #include "rdb_internal.h" #include "rdb_layout.h" +/* + * Special static entry for RDB_LC_ATTRS + * + * Because rdb_path_attrs is a special, empty path, we can't store it in the + * LRU cache. Thankfully, it always maps to RDB_LC_ATTRS, which always exists. + */ +static struct rdb_kvs rdb_kvs_attrs = {.de_object = RDB_LC_ATTRS}; + struct rdb_kvs_open_arg { struct rdb *deo_db; rdb_oid_t deo_parent; @@ -28,9 +37,9 @@ struct rdb_kvs_open_arg { static int rdb_kvs_open_path_cb(d_iov_t *key, void *varg) { - struct rdb_kvs_open_arg *arg = varg; - rdb_oid_t parent = arg->deo_parent; - d_iov_t value; + struct rdb_kvs_open_arg *arg = varg; + rdb_oid_t parent = arg->deo_parent; + d_iov_t value; if (key->iov_len == 0) { D_ASSERTF(parent == RDB_LC_ATTRS, DF_X64"\n", parent); @@ -202,6 +211,11 @@ rdb_kvs_lookup(struct rdb *db, const rdb_path_t *path, uint64_t index, D_DEBUG(DB_TRACE, DF_DB": looking up "DF_IOV": alloc=%d\n", DP_DB(db), DP_IOV(path), alloc); + if (rdb_path_is_attrs(path)) { + *kvs = &rdb_kvs_attrs; + return 0; + } + arg.dea_db = db; arg.dea_index = index; arg.dea_alloc = alloc; @@ -217,11 +231,13 @@ rdb_kvs_lookup(struct rdb *db, const rdb_path_t *path, uint64_t index, void rdb_kvs_put(struct rdb *db, struct rdb_kvs *kvs) { - daos_lru_ref_release(db->d_kvss, &kvs->de_entry); + if (kvs != &rdb_kvs_attrs) + daos_lru_ref_release(db->d_kvss, &kvs->de_entry); } void rdb_kvs_evict(struct rdb *db, struct rdb_kvs *kvs) { - daos_lru_ref_evict(db->d_kvss, &kvs->de_entry); + if (kvs != &rdb_kvs_attrs) + daos_lru_ref_evict(db->d_kvss, &kvs->de_entry); } diff --git a/src/rdb/rdb_layout.c b/src/rdb/rdb_layout.c index 22092735609..dcabd64cd35 100644 --- a/src/rdb/rdb_layout.c +++ b/src/rdb/rdb_layout.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2017-2021 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -17,6 +18,7 @@ RDB_STRING_KEY(rdb_, dkey); RDB_STRING_KEY(rdb_mc_, uuid); RDB_STRING_KEY(rdb_mc_, version); +RDB_STRING_KEY(rdb_mc_, replica_id); RDB_STRING_KEY(rdb_mc_, term); RDB_STRING_KEY(rdb_mc_, vote); RDB_STRING_KEY(rdb_mc_, lc); @@ -27,4 +29,5 @@ RDB_STRING_KEY(rdb_lc_, entry_header); RDB_STRING_KEY(rdb_lc_, entry_data); RDB_STRING_KEY(rdb_lc_, nreplicas); RDB_STRING_KEY(rdb_lc_, replicas); +RDB_STRING_KEY(rdb_lc_, replica_gen_next); RDB_STRING_KEY(rdb_lc_, root); diff --git a/src/rdb/rdb_layout.h b/src/rdb/rdb_layout.h index 66fb9b5788c..f84d2e805a3 100644 --- a/src/rdb/rdb_layout.h +++ b/src/rdb/rdb_layout.h @@ -1,5 +1,6 @@ /* * (C) Copyright 2017-2021 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -17,6 +18,7 @@ * D-key rdb_dkey * A-key rdb_mc_uuid // (see rdb_create()) * A-key rdb_mc_version // layout version + * A-key rdb_mc_replica_id // replica ID * A-key rdb_mc_term // term * A-key rdb_mc_vote // vote for term * A-key rdb_mc_lc // log container record @@ -26,7 +28,9 @@ * A-key rdb_lc_entry_header // log entry header * A-key rdb_lc_entry_data // log entry data * A-key rdb_lc_nreplicas // number of replicas - * A-key rdb_lc_replicas // replica ranks + * A-key rdb_lc_replicas // replicas + * A-key rdb_lc_replica_gen_next + * // result for next replica generation allocation * A-key rdb_lc_oid_next // result for next object ID allocation * A-key rdb_lc_root // * Object // root KVS @@ -72,11 +76,16 @@ #define RDB_LAYOUT_H /* Default layout version */ -#define RDB_LAYOUT_VERSION 1 +#define RDB_LAYOUT_VERSION 2 /* Lowest compatible layout version */ #define RDB_LAYOUT_VERSION_LOW 1 +/* Layout version that introduces replica IDs with generations */ +#define RDB_LAYOUT_VERSION_REPLICA_ID 2 + +D_CASSERT(sizeof(rdb_replica_id_t) == sizeof(uint64_t)); + /* * Object ID * @@ -122,8 +131,9 @@ struct rdb_anchor { */ extern d_iov_t rdb_mc_uuid; /* uuid_t */ extern d_iov_t rdb_mc_version; /* uint32_t */ +extern d_iov_t rdb_mc_replica_id; /* rdb_replica_id_t or absent (< v2) */ extern d_iov_t rdb_mc_term; /* uint64_t */ -extern d_iov_t rdb_mc_vote; /* int */ +extern d_iov_t rdb_mc_vote; /* rdb_replica_id_t or int (< v2) */ extern d_iov_t rdb_mc_lc; /* rdb_lc_record */ extern d_iov_t rdb_mc_slc; /* rdb_lc_record */ @@ -154,15 +164,34 @@ struct rdb_lc_record { extern d_iov_t rdb_lc_entry_header; /* rdb_entry */ extern d_iov_t rdb_lc_entry_data; /* uint8_t[] */ extern d_iov_t rdb_lc_nreplicas; /* uint8_t */ -extern d_iov_t rdb_lc_replicas; /* uint32_t[] */ +extern d_iov_t rdb_lc_replicas; /* rdb_replica_record[] or uint32_t[] (< v2) */ +extern d_iov_t rdb_lc_replica_gen_next; /* uint32_t or absent (< v2)*/ extern d_iov_t rdb_lc_oid_next; /* rdb_oid_t (classless) */ extern d_iov_t rdb_lc_root; /* rdb_oid_t */ -/* Log entry */ +/* Log entry header */ struct rdb_entry { uint64_t dre_term; uint32_t dre_type; uint32_t dre_size; /* of entry data */ }; +/* + * Log normal entry data + * + * See rdb_tx_append. + */ + +/* + * Log cfg entry data + * + * rdb_replica_id or d_rank_t (< v2). + */ + +/* Replica record in rdb_lc_replicas */ +struct rdb_replica_record { + rdb_replica_id_t drr_id; + uint64_t drr_reserved; /* for future non-voting support, etc. */ +}; + #endif /* RDB_LAYOUT_H */ diff --git a/src/rdb/rdb_path.c b/src/rdb/rdb_path.c index f2afe62b501..8d8b00189cf 100644 --- a/src/rdb/rdb_path.c +++ b/src/rdb/rdb_path.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2017-2021 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -13,6 +14,9 @@ #include "rdb_internal.h" +/* Special path for RDB_LC_ATTRS (available only for internal use within rdb) */ +rdb_path_t rdb_path_attrs; + /* Key for the root KVS */ d_iov_t rdb_path_root_key; diff --git a/src/rdb/rdb_raft.c b/src/rdb/rdb_raft.c index cd778108c58..811b3a2207c 100644 --- a/src/rdb/rdb_raft.c +++ b/src/rdb/rdb_raft.c @@ -87,6 +87,7 @@ rdb_raft_cb_send_requestvote(raft_server_t *raft, void *arg, raft_node_t *node, msg_requestvote_t *msg) { struct rdb *db = arg; + rdb_replica_id_t rdb_node_id; struct rdb_raft_node *rdb_node = raft_node_get_udata(node); char *s = msg->prevote ? " (prevote)" : ""; crt_rpc_t *rpc; @@ -94,24 +95,25 @@ rdb_raft_cb_send_requestvote(raft_server_t *raft, void *arg, raft_node_t *node, int rc; D_ASSERT(db->d_raft == raft); - D_DEBUG(DB_TRACE, DF_DB": sending rv%s to node %d rank %u: term=%ld\n", - DP_DB(db), s, raft_node_get_id(node), rdb_node->dn_rank, - msg->term); + D_ASSERT(node != NULL); + D_ASSERT(rdb_node != NULL); + rdb_node_id = rdb_replica_id_decode(raft_node_get_id(node)); + D_DEBUG(DB_TRACE, DF_DB ": sending rv%s to node " RDB_F_RID ": term=%ld\n", DP_DB(db), s, + RDB_P_RID(rdb_node_id), msg->term); - rc = rdb_create_raft_rpc(RDB_REQUESTVOTE, node, &rpc); + rc = rdb_create_raft_rpc(db, RDB_REQUESTVOTE, node, &rpc); if (rc != 0) { - D_ERROR(DF_DB": failed to create RV%s RPC to node %d: %d\n", - DP_DB(db), s, raft_node_get_id(node), rc); + DL_ERROR(rc, DF_DB ": failed to create RV%s RPC to node " RDB_F_RID, DP_DB(db), s, + RDB_P_RID(rdb_node_id)); return rc; } - in = crt_req_get(rpc); - uuid_copy(in->rvi_op.ri_uuid, db->d_uuid); + in = crt_req_get(rpc); in->rvi_msg = *msg; rc = rdb_send_raft_rpc(rpc, db); if (rc != 0) { - D_ERROR(DF_DB": failed to send RV%s RPC to node %d: %d\n", - DP_DB(db), s, raft_node_get_id(node), rc); + DL_ERROR(rc, DF_DB ": failed to send RV%s RPC to node " RDB_F_RID, DP_DB(db), s, + RDB_P_RID(rdb_node_id)); crt_req_decref(rpc); } return rc; @@ -184,27 +186,29 @@ rdb_raft_cb_send_appendentries(raft_server_t *raft, void *arg, raft_node_t *node, msg_appendentries_t *msg) { struct rdb *db = arg; + rdb_replica_id_t rdb_node_id; struct rdb_raft_node *rdb_node = raft_node_get_udata(node); crt_rpc_t *rpc; struct rdb_appendentries_in *in; int rc; D_ASSERT(db->d_raft == raft); - D_DEBUG(DB_TRACE, DF_DB": sending ae to node %u rank %u: term=%ld\n", - DP_DB(db), raft_node_get_id(node), rdb_node->dn_rank, - msg->term); + D_ASSERT(node != NULL); + D_ASSERT(rdb_node != NULL); + rdb_node_id = rdb_replica_id_decode(raft_node_get_id(node)); + D_DEBUG(DB_TRACE, DF_DB ": sending ae to node " RDB_F_RID ": term=%ld\n", DP_DB(db), + RDB_P_RID(rdb_node_id), msg->term); if (DAOS_FAIL_CHECK(DAOS_RDB_SKIP_APPENDENTRIES_FAIL)) D_GOTO(err, rc = 0); - rc = rdb_create_raft_rpc(RDB_APPENDENTRIES, node, &rpc); + rc = rdb_create_raft_rpc(db, RDB_APPENDENTRIES, node, &rpc); if (rc != 0) { - D_ERROR(DF_DB": failed to create AE RPC to node %d: %d\n", - DP_DB(db), raft_node_get_id(node), rc); + DL_ERROR(rc, DF_DB ": failed to create AE RPC to node " RDB_F_RID, DP_DB(db), + RDB_P_RID(rdb_node_id)); D_GOTO(err, rc); } in = crt_req_get(rpc); - uuid_copy(in->aei_op.ri_uuid, db->d_uuid); rc = rdb_raft_clone_ae(db, msg, &in->aei_msg); if (rc != 0) { D_ERROR(DF_DB": failed to allocate entry array\n", DP_DB(db)); @@ -213,8 +217,8 @@ rdb_raft_cb_send_appendentries(raft_server_t *raft, void *arg, rc = rdb_send_raft_rpc(rpc, db); if (rc != 0) { - D_ERROR(DF_DB": failed to send AE RPC to node %d: %d\n", - DP_DB(db), raft_node_get_id(node), rc); + DL_ERROR(rc, DF_DB ": failed to send AE RPC to node " RDB_F_RID, DP_DB(db), + RDB_P_RID(rdb_node_id)); D_GOTO(err_in, rc); } return 0; @@ -228,60 +232,186 @@ rdb_raft_cb_send_appendentries(raft_server_t *raft, void *arg, } static int -rdb_raft_store_replicas(daos_handle_t lc, uint64_t index, const d_rank_list_t *replicas, - rdb_vos_tx_t vtx) +rdb_replica_record_compare_void(const void *vx, const void *vy) +{ + const struct rdb_replica_record *x = vx; + const struct rdb_replica_record *y = vy; + + return rdb_replica_id_compare(x->drr_id, y->drr_id); +} + +/* Just some defensive sanity checks. */ +static int +rdb_raft_check_replicas(uuid_t db_uuid, uint32_t layout_version, + struct rdb_replica_record *replicas, int replicas_len) +{ + struct rdb_replica_record *rs; + int rs_len; + int i; + int rc; + + if (replicas_len <= 0 || replicas_len > UINT8_MAX) { + D_ERROR(DF_UUID ": invalid replicas_len: %d\n", DP_UUID(db_uuid), replicas_len); + rc = -DER_INVAL; + goto out; + } + + rs_len = replicas_len; + D_ALLOC_ARRAY(rs, rs_len); + if (rs == NULL) { + rc = -DER_NOMEM; + goto out; + } + memcpy(rs, replicas, sizeof(*rs) * replicas_len); + qsort(rs, rs_len, sizeof(*rs), rdb_replica_record_compare_void); + + for (i = 0; i < replicas_len; i++) { + if (i > 0 && rs[i].drr_id.rri_rank == rs[i - 1].drr_id.rri_rank) { + D_ERROR(DF_UUID ": duplicate replica rank: %u\n", DP_UUID(db_uuid), + rs[i].drr_id.rri_rank); + rc = -DER_INVAL; + goto out_rs; + } + if (layout_version < RDB_LAYOUT_VERSION_REPLICA_ID && + replicas[i].drr_id.rri_gen != 0) { + D_ERROR(DF_UUID ": unexpected replica gen: " RDB_F_RID "\n", + DP_UUID(db_uuid), RDB_P_RID(replicas[i].drr_id)); + rc = -DER_INVAL; + goto out_rs; + } + } + + rc = 0; +out_rs: + D_FREE(rs); +out: + return rc; +} + +static int +rdb_raft_store_replicas(uuid_t db_uuid, daos_handle_t lc, uint64_t index, uint32_t layout_version, + struct rdb_replica_record *replicas, int replicas_len, rdb_vos_tx_t vtx) { - d_iov_t keys[2]; - d_iov_t vals[2]; - uint8_t nreplicas; + d_iov_t keys[2]; + d_iov_t vals[2]; + uint8_t nreplicas; + d_rank_t *ranks = NULL; + int i; + int rc; + + rc = rdb_raft_check_replicas(db_uuid, layout_version, replicas, replicas_len); + if (rc != 0) + return rc; - D_ASSERTF(replicas->rl_nr <= UINT8_MAX, "nreplicas = %u", - replicas->rl_nr); - nreplicas = replicas->rl_nr; + D_ASSERTF(0 < replicas_len && replicas_len <= UINT8_MAX, "replicas_len = %u", replicas_len); + nreplicas = replicas_len; keys[0] = rdb_lc_nreplicas; d_iov_set(&vals[0], &nreplicas, sizeof(nreplicas)); + keys[1] = rdb_lc_replicas; - d_iov_set(&vals[1], replicas->rl_ranks, sizeof(*replicas->rl_ranks) * nreplicas); - return rdb_lc_update(lc, index, RDB_LC_ATTRS, true /* crit */, 2 /* n */, keys, vals, vtx); + if (layout_version < RDB_LAYOUT_VERSION_REPLICA_ID) { + D_ALLOC_ARRAY(ranks, replicas_len); + if (ranks == NULL) + return -DER_NOMEM; + for (i = 0; i < replicas_len; i++) + ranks[i] = replicas[i].drr_id.rri_rank; + d_iov_set(&vals[1], ranks, sizeof(*ranks) * replicas_len); + } else { + d_iov_set(&vals[1], replicas, sizeof(*replicas) * replicas_len); + } + + rc = rdb_lc_update(lc, index, RDB_LC_ATTRS, true /* crit */, 2 /* n */, keys, vals, vtx); + if (rc == 0) { + D_DEBUG(DB_MD, DF_UUID ": stored nreplicas and replicas at " DF_U64 ":\n", + DP_UUID(db_uuid), index); + for (i = 0; i < replicas_len; i++) + D_DEBUG(DB_MD, DF_UUID ": [%d]: id=" RDB_F_RID " reserved=" DF_X64 "\n", + DP_UUID(db_uuid), i, RDB_P_RID(replicas[i].drr_id), + replicas[i].drr_reserved); + } else { + DL_ERROR(rc, DF_UUID ": failed to update nreplicas and replicas", DP_UUID(db_uuid)); + } + + D_FREE(ranks); + return rc; } +/* The caller must free *replicas_out with D_FREE. */ int -rdb_raft_load_replicas(daos_handle_t lc, uint64_t index, d_rank_list_t **replicas) +rdb_raft_load_replicas(uuid_t db_uuid, daos_handle_t lc, uint64_t index, uint32_t layout_version, + struct rdb_replica_record **replicas_out, int *replicas_len_out) { - d_iov_t value; - uint8_t nreplicas; - d_rank_list_t *r; - int rc; + d_iov_t value; + uint8_t nreplicas; + struct rdb_replica_record *replicas = NULL; + d_rank_t *ranks = NULL; + int i; + int rc; d_iov_set(&value, &nreplicas, sizeof(nreplicas)); rc = rdb_lc_lookup(lc, index, RDB_LC_ATTRS, &rdb_lc_nreplicas, &value); if (rc == -DER_NONEXIST) { - D_DEBUG(DB_MD, "no replicas in "DF_U64"\n", index); + D_DEBUG(DB_MD, DF_UUID ": no replicas at " DF_U64 "\n", DP_UUID(db_uuid), index); nreplicas = 0; + rc = 0; } else if (rc != 0) { - return rc; + DL_ERROR(rc, DF_UUID ": failed to look up nreplicas", DP_UUID(db_uuid)); + goto out; } - r = daos_rank_list_alloc(nreplicas); - if (r == NULL) - return -DER_NOMEM; - if (nreplicas > 0) { - d_iov_set(&value, r->rl_ranks, sizeof(*r->rl_ranks) * nreplicas); + D_ALLOC_ARRAY(replicas, nreplicas); + if (replicas == NULL) { + rc = -DER_NOMEM; + goto out; + } + + if (layout_version < RDB_LAYOUT_VERSION_REPLICA_ID) { + D_ALLOC_ARRAY(ranks, nreplicas); + if (ranks == NULL) { + rc = -DER_NOMEM; + goto out; + } + d_iov_set(&value, ranks, sizeof(*ranks) * nreplicas); + } else { + d_iov_set(&value, replicas, sizeof(*replicas) * nreplicas); + } + rc = rdb_lc_lookup(lc, index, RDB_LC_ATTRS, &rdb_lc_replicas, &value); if (rc != 0) { - d_rank_list_free(r); - return rc; + DL_ERROR(rc, DF_UUID ": failed to look up replicas", DP_UUID(db_uuid)); + goto out; } + + if (layout_version < RDB_LAYOUT_VERSION_REPLICA_ID) + for (i = 0; i < nreplicas; i++) + replicas[i].drr_id.rri_rank = ranks[i]; + + rc = rdb_raft_check_replicas(db_uuid, layout_version, replicas, nreplicas); + if (rc != 0) + goto out; } - *replicas = r; - return 0; +out: + D_FREE(ranks); + if (rc == 0) { + D_DEBUG(DB_MD, DF_UUID ": loaded nreplicas and replicas at " DF_U64 ":\n", + DP_UUID(db_uuid), index); + for (i = 0; i < nreplicas; i++) + D_DEBUG(DB_MD, DF_UUID ": [%d]: id=" RDB_F_RID " reserved=" DF_X64 "\n", + DP_UUID(db_uuid), i, RDB_P_RID(replicas[i].drr_id), + replicas[i].drr_reserved); + *replicas_out = replicas; + *replicas_len_out = nreplicas; + } else { + D_FREE(replicas); + } + return rc; } /* Caller must hold d_raft_mutex. */ static int -rdb_raft_add_node(struct rdb *db, d_rank_t rank) +rdb_raft_add_node(struct rdb *db, struct rdb_replica_record record) { struct rdb_raft_node *dnode; raft_node_t *node; @@ -295,13 +425,17 @@ rdb_raft_add_node(struct rdb *db, d_rank_t rank) dnode = calloc(1, sizeof(*dnode)); if (dnode == NULL) D_GOTO(out, rc = -DER_NOMEM); - dnode->dn_rank = rank; - node = raft_add_node(db->d_raft, dnode, rank, rank == dss_self_rank()); + dnode->dn_rank = record.drr_id.rri_rank; + + node = raft_add_node(db->d_raft, dnode, rdb_replica_id_encode(record.drr_id), + rdb_replica_id_compare(record.drr_id, db->d_replica_id) == 0); if (node == NULL) { - D_ERROR(DF_DB": failed to add node %u\n", DP_DB(db), rank); + D_ERROR(DF_DB ": failed to add node " RDB_F_RID "\n", DP_DB(db), + RDB_P_RID(record.drr_id)); free(dnode); D_GOTO(out, rc = -DER_NOMEM); } + out: return rc; } @@ -310,9 +444,10 @@ rdb_raft_add_node(struct rdb *db, d_rank_t rank) static int rdb_raft_load_snapshot(struct rdb *db) { - d_rank_list_t *replicas; - int i; - int rc; + struct rdb_replica_record *replicas; + int replicas_len; + int i; + int rc; D_DEBUG(DB_MD, DF_DB": loading snapshot: base="DF_U64" term="DF_U64"\n", DP_DB(db), db->d_lc_record.dlr_base, @@ -323,7 +458,8 @@ rdb_raft_load_snapshot(struct rdb *db) * after the raft_begin_load_snapshot call, which removes all nodes in * raft. */ - rc = rdb_raft_load_replicas(db->d_lc, db->d_lc_record.dlr_base, &replicas); + rc = rdb_raft_load_replicas(db->d_uuid, db->d_lc, db->d_lc_record.dlr_base, db->d_version, + &replicas, &replicas_len); if (rc != 0) { D_ERROR(DF_DB": failed to load replicas in snapshot "DF_U64" (term="DF_U64"): " DF_RC"\n", DP_DB(db), db->d_lc_record.dlr_base, @@ -354,8 +490,8 @@ rdb_raft_load_snapshot(struct rdb *db) } /* Add the corresponding nodes to raft. */ - for (i = 0; i < replicas->rl_nr; i++) { - rc = rdb_raft_add_node(db, replicas->rl_ranks[i]); + for (i = 0; i < replicas_len; i++) { + rc = rdb_raft_add_node(db, replicas[i]); /* TODO: Freeze and shut down db. */ D_ASSERTF(rc == 0, "failed to add node: "DF_RC"\n", DP_RC(rc)); } @@ -364,7 +500,7 @@ rdb_raft_load_snapshot(struct rdb *db) D_ASSERTF(rc == 0, ""DF_RC"\n", DP_RC(rc)); out_replicas: - d_rank_list_free(replicas); + D_FREE(replicas); out: return rc; } @@ -439,6 +575,7 @@ rdb_raft_cb_send_installsnapshot(raft_server_t *raft, void *arg, raft_node_t *node, msg_installsnapshot_t *msg) { struct rdb *db = arg; + rdb_replica_id_t rdb_node_id; struct rdb_raft_node *rdb_node = raft_node_get_udata(node); struct rdb_raft_is *is = &rdb_node->dn_is; crt_rpc_t *rpc; @@ -449,16 +586,20 @@ rdb_raft_cb_send_installsnapshot(raft_server_t *raft, void *arg, struct dss_module_info *info = dss_get_module_info(); int rc; - rc = rdb_create_raft_rpc(RDB_INSTALLSNAPSHOT, node, &rpc); + D_ASSERT(db->d_raft == raft); + D_ASSERT(node != NULL); + D_ASSERT(rdb_node != NULL); + rdb_node_id = rdb_replica_id_decode(raft_node_get_id(node)); + + rc = rdb_create_raft_rpc(db, RDB_INSTALLSNAPSHOT, node, &rpc); if (rc != 0) { - D_ERROR(DF_DB": failed to create IS RPC to rank %u: %d\n", - DP_DB(db), rdb_node->dn_rank, rc); + DL_ERROR(rc, DF_DB ": failed to create IS RPC to node " RDB_F_RID, DP_DB(db), + RDB_P_RID(rdb_node_id)); goto err; } /* Start filling the request. */ - in = crt_req_get(rpc); - uuid_copy(in->isi_op.ri_uuid, db->d_uuid); + in = crt_req_get(rpc); in->isi_msg = *msg; /* @@ -505,8 +646,8 @@ rdb_raft_cb_send_installsnapshot(raft_server_t *raft, void *arg, rc = crt_bulk_create(info->dmi_ctx, &sgl, CRT_BULK_RO, &in->isi_kds); if (rc != 0) { - D_ERROR(DF_DB": failed to create key descriptor bulk for rank " - "%u: %d\n", DP_DB(db), rdb_node->dn_rank, rc); + DL_ERROR(rc, DF_DB ": failed to create key descriptor bulk for node " RDB_F_RID, + DP_DB(db), RDB_P_RID(rdb_node_id)); goto err_data; } data.iov_buf_len = data.iov_len; @@ -515,24 +656,23 @@ rdb_raft_cb_send_installsnapshot(raft_server_t *raft, void *arg, sgl.sg_iovs = &data; rc = crt_bulk_create(info->dmi_ctx, &sgl, CRT_BULK_RO, &in->isi_data); if (rc != 0) { - D_ERROR(DF_DB": failed to create key bulk for rank %u: %d\n", - DP_DB(db), rdb_node->dn_rank, rc); + DL_ERROR(rc, DF_DB ": failed to create key bulk for node " RDB_F_RID, DP_DB(db), + RDB_P_RID(rdb_node_id)); goto err_kds_bulk; } rc = rdb_send_raft_rpc(rpc, db); if (rc != 0) { - D_ERROR(DF_DB": failed to send IS RPC to rank %u: %d\n", - DP_DB(db), rdb_node->dn_rank, rc); + DL_ERROR(rc, DF_DB ": failed to send IS RPC to node " RDB_F_RID, DP_DB(db), + RDB_P_RID(rdb_node_id)); goto err_data_bulk; } D_DEBUG(DB_TRACE, - DF_DB": sent is to node %u rank %u: term=%ld last_idx=%ld seq=" - DF_U64" kds.len="DF_U64" data.len="DF_U64"\n", - DP_DB(db), raft_node_get_id(node), rdb_node->dn_rank, - in->isi_msg.term, in->isi_msg.last_idx, in->isi_seq, - kds.iov_len, data.iov_len); + DF_DB ": sent is to node " RDB_F_RID ": term=%ld last_idx=%ld seq=" DF_U64 + " kds.len=" DF_U64 " data.len=" DF_U64 "\n", + DP_DB(db), RDB_P_RID(rdb_node_id), in->isi_msg.term, in->isi_msg.last_idx, + in->isi_seq, kds.iov_len, data.iov_len); return 0; err_data_bulk: @@ -950,19 +1090,24 @@ rdb_raft_cb_recv_installsnapshot_resp(raft_server_t *raft, void *arg, { struct rdb *db = arg; struct rdb_raft_node *rdb_node = raft_node_get_udata(node); + rdb_replica_id_t rdb_node_id; struct rdb_raft_is *is = &rdb_node->dn_is; struct rdb_installsnapshot_out *out; + D_ASSERT(db->d_raft == raft); + D_ASSERT(node != NULL); + D_ASSERT(rdb_node != NULL); + rdb_node_id = rdb_replica_id_decode(raft_node_get_id(node)); out = container_of(resp, struct rdb_installsnapshot_out, iso_msg); /* If no longer transferring this snapshot, ignore this response. */ if (rdb_node->dn_term != raft_get_current_term(raft) || is->dis_index != resp->last_idx) { D_DEBUG(DB_TRACE, - DF_DB": rank %u: stale term "DF_U64" != %ld or index " - DF_U64" != %ld\n", DP_DB(db), rdb_node->dn_rank, - rdb_node->dn_term, raft_get_current_term(raft), - is->dis_index, resp->last_idx); + DF_DB ": node " RDB_F_RID ": stale term " DF_U64 " != %ld or index " DF_U64 + " != %ld\n", + DP_DB(db), RDB_P_RID(rdb_node_id), rdb_node->dn_term, + raft_get_current_term(raft), is->dis_index, resp->last_idx); return 0; } @@ -974,8 +1119,8 @@ rdb_raft_cb_recv_installsnapshot_resp(raft_server_t *raft, void *arg, * snapshot. */ if (resp->complete) { - D_DEBUG(DB_TRACE, DF_DB": rank %u: completed snapshot %ld\n", DP_DB(db), - rdb_node->dn_rank, resp->last_idx); + D_DEBUG(DB_TRACE, DF_DB ": node " RDB_F_RID ": completed snapshot %ld\n", + DP_DB(db), RDB_P_RID(rdb_node_id), resp->last_idx); return 0; } @@ -983,26 +1128,25 @@ rdb_raft_cb_recv_installsnapshot_resp(raft_server_t *raft, void *arg, * ... and the snapshot is not complete, return a generic error so * that raft will not retry too eagerly. */ - D_DEBUG(DB_TRACE, - DF_DB": rank %u: unsuccessful chunk %ld/"DF_U64"(" - DF_U64")\n", DP_DB(db), rdb_node->dn_rank, - resp->last_idx, out->iso_seq, is->dis_seq); + D_DEBUG( + DB_TRACE, + DF_DB ": node " RDB_F_RID ": unsuccessful chunk %ld/" DF_U64 "(" DF_U64 ")\n", + DP_DB(db), RDB_P_RID(rdb_node_id), resp->last_idx, out->iso_seq, is->dis_seq); return -DER_MISC; } /* Ignore this stale response. */ if (out->iso_seq <= is->dis_seq) { D_DEBUG(DB_TRACE, - DF_DB": rank %u: stale chunk %ld/"DF_U64"("DF_U64")\n", - DP_DB(db), rdb_node->dn_rank, resp->last_idx, - out->iso_seq, is->dis_seq); + DF_DB ": node " RDB_F_RID ": stale chunk %ld/" DF_U64 "(" DF_U64 ")\n", + DP_DB(db), RDB_P_RID(rdb_node_id), resp->last_idx, out->iso_seq, + is->dis_seq); return 0; } D_DEBUG(DB_TRACE, - DF_DB": rank %u: completed chunk %ld/"DF_U64"("DF_U64")\n", - DP_DB(db), rdb_node->dn_rank, resp->last_idx, out->iso_seq, - is->dis_seq); + DF_DB ": node " RDB_F_RID ": completed chunk %ld/" DF_U64 "(" DF_U64 ")\n", + DP_DB(db), RDB_P_RID(rdb_node_id), resp->last_idx, out->iso_seq, is->dis_seq); /* Update the last sequence number and anchor. */ is->dis_seq = out->iso_seq; @@ -1014,18 +1158,19 @@ rdb_raft_cb_recv_installsnapshot_resp(raft_server_t *raft, void *arg, static int rdb_raft_cb_persist_vote(raft_server_t *raft, void *arg, raft_node_id_t vote) { - struct rdb *db = arg; - d_iov_t value; - int rc; + struct rdb *db = arg; + rdb_replica_id_t rdb_vote = rdb_replica_id_decode(vote); + d_iov_t value; + int rc; if (!db->d_raft_loaded) return 0; - d_iov_set(&value, &vote, sizeof(vote)); + rdb_set_mc_vote_update_buf(db, &rdb_vote, &value); rc = rdb_mc_update(db->d_mc, RDB_MC_ATTRS, 1 /* n */, &rdb_mc_vote, &value, NULL /* vtx */); if (rc != 0) - D_ERROR(DF_DB": failed to persist vote %d: %d\n", DP_DB(db), - vote, rc); + DL_ERROR(rc, DF_DB ": failed to persist vote " RDB_F_RID, DP_DB(db), + RDB_P_RID(rdb_vote)); return rc; } @@ -1034,10 +1179,11 @@ static int rdb_raft_cb_persist_term(raft_server_t *raft, void *arg, raft_term_t term, raft_node_id_t vote) { - struct rdb *db = arg; - d_iov_t keys[2]; - d_iov_t values[2]; - int rc; + struct rdb *db = arg; + rdb_replica_id_t rdb_vote = rdb_replica_id_decode(vote); + d_iov_t keys[2]; + d_iov_t values[2]; + int rc; if (!db->d_raft_loaded) return 0; @@ -1046,21 +1192,30 @@ rdb_raft_cb_persist_term(raft_server_t *raft, void *arg, raft_term_t term, keys[0] = rdb_mc_term; d_iov_set(&values[0], &term, sizeof(term)); keys[1] = rdb_mc_vote; - d_iov_set(&values[1], &vote, sizeof(vote)); + rdb_set_mc_vote_update_buf(db, &rdb_vote, &values[1]); rc = rdb_mc_update(db->d_mc, RDB_MC_ATTRS, 2 /* n */, keys, values, NULL /* vtx */); if (rc != 0) - D_ERROR(DF_DB ": failed to update term %ld and vote %d: " DF_RC "\n", DP_DB(db), - term, vote, DP_RC(rc)); + DL_ERROR(rc, DF_DB ": failed to update term %ld and vote " RDB_F_RID, DP_DB(db), + term, RDB_P_RID(rdb_vote)); return rc; } -static d_rank_t -rdb_raft_cfg_entry_rank(raft_entry_t *entry) +static rdb_replica_id_t +rdb_raft_cfg_entry_node_id(raft_entry_t *entry, uint32_t layout_version) { + rdb_replica_id_t id; + D_ASSERT(entry->data.buf != NULL); - D_ASSERTF(entry->data.len == sizeof(d_rank_t), "%u\n", entry->data.len); - return *((d_rank_t *)entry->data.buf); + if (layout_version < RDB_LAYOUT_VERSION_REPLICA_ID) { + D_ASSERTF(entry->data.len == sizeof(id.rri_rank), "%u\n", entry->data.len); + id.rri_rank = *(d_rank_t *)entry->data.buf; + id.rri_gen = 0; + } else { + D_ASSERTF(entry->data.len == sizeof(id), "%u\n", entry->data.len); + id = *(rdb_replica_id_t *)entry->data.buf; + } + return id; } /* See rdb_raft_update_node. */ @@ -1070,50 +1225,92 @@ rdb_raft_cfg_entry_rank(raft_entry_t *entry) static int rdb_raft_update_node(struct rdb *db, uint64_t index, raft_entry_t *entry, rdb_vos_tx_t vtx) { - d_rank_list_t *replicas; - d_rank_t rank = rdb_raft_cfg_entry_rank(entry); - bool found; - void *result; - int rc; + struct rdb_replica_record *replicas; + int replicas_len; + rdb_replica_id_t id = rdb_raft_cfg_entry_node_id(entry, db->d_version); + int i; + struct rdb_replica_record *tmp; + int tmp_len; + void *result; + int rc; - D_DEBUG(DB_MD, DF_DB": cfg entry "DF_U64": term=%ld type=%s rank=%u\n", DP_DB(db), index, - entry->term, rdb_raft_entry_type_str(entry->type), rank); + D_DEBUG(DB_MD, DF_DB ": cfg entry " DF_U64 ": term=%ld type=%s node=" RDB_F_RID "\n", + DP_DB(db), index, entry->term, rdb_raft_entry_type_str(entry->type), RDB_P_RID(id)); - rc = rdb_raft_load_replicas(db->d_lc, index, &replicas); + rc = rdb_raft_load_replicas(db->d_uuid, db->d_lc, index, db->d_version, &replicas, + &replicas_len); if (rc != 0) goto out; - found = d_rank_list_find(replicas, rank, NULL); - if (found && entry->type == RAFT_LOGTYPE_ADD_NODE) { - D_ERROR(DF_DB ": %s: rank %u already exists\n", DP_DB(db), - rdb_raft_entry_type_str(entry->type), rank); - rc = -DER_INVAL; - goto out_replicas; - } else if (!found && entry->type == RAFT_LOGTYPE_REMOVE_NODE) { - D_ERROR(DF_DB ": %s: rank %u does not exist\n", DP_DB(db), - rdb_raft_entry_type_str(entry->type), rank); - rc = -DER_INVAL; - goto out_replicas; - } + switch (entry->type) { + case RAFT_LOGTYPE_ADD_NODE: + /* + * Ensure that no existing replica ID uses id.rri_rank or + * id.rri_gen (if nonzero). Note that nonzero generations + * are unique even for different ranks, because of how we + * produce them. + */ + for (i = 0; i < replicas_len; i++) { + if (replicas[i].drr_id.rri_rank == id.rri_rank || + (id.rri_gen != 0 && replicas[i].drr_id.rri_gen == id.rri_gen)) { + D_ERROR(DF_DB ": %s: replica " RDB_F_RID + " already exists: " RDB_F_RID "\n", + DP_DB(db), rdb_raft_entry_type_str(entry->type), + RDB_P_RID(id), RDB_P_RID(replicas[i].drr_id)); + rc = -DER_INVAL; + goto out_replicas; + } + } - if (entry->type == RAFT_LOGTYPE_ADD_NODE) - rc = d_rank_list_append(replicas, rank); - else if (entry->type == RAFT_LOGTYPE_REMOVE_NODE) - rc = d_rank_list_del(replicas, rank); - if (rc != 0) + /* Append id to replicas. */ + tmp_len = replicas_len + 1; + D_REALLOC_ARRAY(tmp, replicas, replicas_len, tmp_len); + if (tmp == NULL) { + rc = -DER_NOMEM; + goto out_replicas; + } + replicas = tmp; + replicas_len = tmp_len; + replicas[replicas_len - 1].drr_id = id; + replicas[replicas_len - 1].drr_reserved = 0; + break; + case RAFT_LOGTYPE_REMOVE_NODE: + /* Find id in replicas. */ + for (i = 0; i < replicas_len; i++) + if (rdb_replica_id_compare(replicas[i].drr_id, id) == 0) + break; + if (i == replicas_len) { + D_ERROR(DF_DB ": %s: replica " RDB_F_RID " does not exist\n", DP_DB(db), + rdb_raft_entry_type_str(entry->type), RDB_P_RID(id)); + rc = -DER_INVAL; + goto out_replicas; + } + + /* Remove it. */ + if (replicas_len - i - 1 > 0) + memmove(&replicas[i], &replicas[i + 1], + (replicas_len - i - 1) * sizeof(*replicas)); + replicas_len--; + break; + default: + D_ERROR(DF_DB ": entry type %s (%d) not supported: " RDB_F_RID "\n", DP_DB(db), + rdb_raft_entry_type_str(entry->type), entry->type, RDB_P_RID(id)); + rc = -DER_NOTSUPPORTED; goto out_replicas; + } - rc = rdb_raft_store_replicas(db->d_lc, index, replicas, vtx); + rc = rdb_raft_store_replicas(db->d_uuid, db->d_lc, index, db->d_version, replicas, + replicas_len, vtx); out_replicas: - d_rank_list_free(replicas); + D_FREE(replicas); out: result = rdb_raft_lookup_result(db, index); if (result != NULL) *(int *)result = rc; if (rc != 0) - D_ERROR(DF_DB": failed to perform %s on rank %u at index "DF_U64": "DF_RC"\n", - DP_DB(db), rdb_raft_entry_type_str(entry->type), rank, index, DP_RC(rc)); + DL_ERROR(rc, DF_DB ": failed to do %s " RDB_F_RID " at index " DF_U64, DP_DB(db), + rdb_raft_entry_type_str(entry->type), RDB_P_RID(id), index); return rc; } @@ -1421,18 +1618,20 @@ rdb_raft_cb_log_pop(raft_server_t *raft, void *arg, raft_entry_t *entry, } static raft_node_id_t -rdb_raft_cb_log_get_node_id(raft_server_t *raft, void *arg, raft_entry_t *entry, - raft_index_t index) +rdb_raft_cb_log_get_node_id(raft_server_t *raft, void *arg, raft_entry_t *entry, raft_index_t index) { - D_ASSERTF(raft_entry_is_cfg_change(entry), "index=%ld type=%s\n", index, + struct rdb *db = arg; + + D_ASSERTF(raft_entry_is_cfg_change(entry), DF_DB ": index=%ld type=%s\n", DP_DB(db), index, rdb_raft_entry_type_str(entry->type)); - return rdb_raft_cfg_entry_rank(entry); + return rdb_replica_id_encode(rdb_raft_cfg_entry_node_id(entry, db->d_version)); } static void rdb_raft_cb_notify_membership_event(raft_server_t *raft, void *udata, raft_node_t *node, raft_entry_t *entry, raft_membership_e type) { + struct rdb *db = udata; struct rdb_raft_node *rdb_node = raft_node_get_udata(node); switch (type) { @@ -1454,7 +1653,7 @@ rdb_raft_cb_notify_membership_event(raft_server_t *raft, void *udata, raft_node_ * calloc instead of D_ALLOC_PTR to avoid being fault-injected. */ D_ASSERT(rdb_node != NULL); - rdb_node->dn_rank = rdb_raft_cfg_entry_rank(entry); + rdb_node->dn_rank = rdb_raft_cfg_entry_node_id(entry, db->d_version).rri_rank; raft_node_set_udata(node, rdb_node); break; case RAFT_MEMBERSHIP_REMOVE: @@ -1462,7 +1661,7 @@ rdb_raft_cb_notify_membership_event(raft_server_t *raft, void *udata, raft_node_ free(rdb_node); break; default: - D_ASSERTF(false, "invalid raft membership event type %s\n", + D_ASSERTF(false, DF_DB ": invalid raft membership event type %s\n", DP_DB(db), rdb_raft_entry_type_str(type)); } } @@ -1475,8 +1674,8 @@ rdb_raft_cb_log(raft_server_t *raft, raft_node_t *node, void *arg, raft_loglevel if (node == NULL) \ D_DEBUG(flag, DF_DB ": %s\n", DP_DB(db), buf); \ else \ - D_DEBUG(flag, DF_DB ": %s: rank=%u\n", DP_DB(db), buf, \ - ((struct rdb_raft_node *)raft_node_get_udata(node))->dn_rank); + D_DEBUG(flag, DF_DB ": %s: node=" RDB_F_RID "\n", DP_DB(db), buf, \ + RDB_P_RID(rdb_replica_id_decode(raft_node_get_id(node)))); struct rdb *db = raft_get_udata(raft); @@ -2106,31 +2305,24 @@ rdb_raft_append_apply_internal(struct rdb *db, msg_entry_t *mentry, } int -rdb_raft_add_replica(struct rdb *db, d_rank_t rank) +rdb_raft_append_apply_cfg(struct rdb *db, raft_logtype_e type, rdb_replica_id_t id) { - msg_entry_t entry = {}; - int result; - int rc; + msg_entry_t entry = {.type = type}; + int result; + int rc; - D_DEBUG(DB_MD, DF_DB": Replica Rank: %d\n", DP_DB(db), rank); - entry.type = RAFT_LOGTYPE_ADD_NODE; - entry.data.buf = &rank; - entry.data.len = sizeof(d_rank_t); - rc = rdb_raft_append_apply_internal(db, &entry, &result); - return (rc != 0) ? rc : result; -} + D_ASSERTF(raft_entry_is_cfg_change(&entry), "invalid type: %d\n", type); + D_DEBUG(DB_MD, DF_DB ": %s " RDB_F_RID "\n", DP_DB(db), rdb_raft_entry_type_str(type), + RDB_P_RID(id)); -int -rdb_raft_remove_replica(struct rdb *db, d_rank_t rank) -{ - msg_entry_t entry = {}; - int result; - int rc; + if (db->d_version >= RDB_LAYOUT_VERSION_REPLICA_ID) { + entry.data.buf = &id; + entry.data.len = sizeof(id); + } else { + entry.data.buf = &id.rri_rank; + entry.data.len = sizeof(id.rri_rank); + } - D_DEBUG(DB_MD, DF_DB": Replica Rank: %d\n", DP_DB(db), rank); - entry.type = RAFT_LOGTYPE_REMOVE_NODE; - entry.data.buf = &rank; - entry.data.len = sizeof(d_rank_t); rc = rdb_raft_append_apply_internal(db, &entry, &result); return (rc != 0) ? rc : result; } @@ -2326,33 +2518,71 @@ rdb_raft_destroy_lc(daos_handle_t pool, daos_handle_t mc, d_iov_t *key, * error. */ int -rdb_raft_init(daos_handle_t pool, daos_handle_t mc, const d_rank_list_t *replicas) -{ - daos_handle_t lc; - struct rdb_lc_record record; - uint64_t base; - int rc; - int rc_close; +rdb_raft_init(uuid_t db_uuid, daos_handle_t pool, daos_handle_t mc, rdb_replica_id_t *replicas, + int replicas_len, uint32_t layout_version) +{ + d_iov_t value; + daos_handle_t lc; + struct rdb_lc_record record; + uint64_t base; + struct rdb_replica_record *replica_records; + int i; + int rc; + int rc_close; - base = (replicas == NULL || replicas->rl_nr == 0) ? 0 : 1; + /* + * If replicas are specified, we are bootstrapping and shall initialize + * the LC at index 1 with replicas. Otherwise, we are not bootstrapping + * and shall initialize the LC to be empty. + */ + base = (replicas == NULL || replicas_len == 0) ? 0 : 1; - /* Create log container; base is 1 since we store replicas at idx 1 */ rc = rdb_raft_create_lc(pool, mc, &rdb_mc_lc, base, 0 /* base_term */, 0 /* term */, &record /* lc_record */); - /* Return on failure or if there are no replicas to be stored */ - if (base == 0 || rc != 0) + if (rc != 0) return rc; - /* Record the configuration in the LC at index 1. */ + if (base == 0) + return 0; + rc = vos_cont_open(pool, record.dlr_uuid, &lc); - /* This really should not be happening.. */ - D_ASSERTF(rc == 0, "Open VOS container: "DF_RC"\n", DP_RC(rc)); + /* We are opening a container that we've just created. */ + D_ASSERTF(rc == 0, "open LC: " DF_RC "\n", DP_RC(rc)); - /* No initial configuration if rank list empty */ - rc = rdb_raft_store_replicas(lc, 1 /* base */, replicas, NULL /* vtx */); - if (rc != 0) - D_ERROR("failed to create list of replicas: "DF_RC"\n", - DP_RC(rc)); + D_ALLOC_ARRAY(replica_records, replicas_len); + if (replica_records == NULL) { + rc = -DER_NOMEM; + goto out_lc; + } + for (i = 0; i < replicas_len; i++) + replica_records[i].drr_id = replicas[i]; + rc = rdb_raft_store_replicas(db_uuid, lc, base, layout_version, replica_records, + replicas_len, NULL /* vtx */); + D_FREE(replica_records); + if (rc != 0) { + DL_ERROR(rc, DF_UUID ": failed to initialize replicas", DP_UUID(db_uuid)); + goto out_lc; + } + + /* Initialize rdb_lc_replica_gen_next to max{replicas[].rri_gen} + 1. */ + if (layout_version >= RDB_LAYOUT_VERSION_REPLICA_ID) { + uint32_t replica_gen_next = 0; + + for (i = 0; i < replicas_len; i++) + if (replicas[i].rri_gen > replica_gen_next) + replica_gen_next = replicas[i].rri_gen; + replica_gen_next++; + D_DEBUG(DB_MD, DF_UUID ": replica_gen_next=%u\n", DP_UUID(db_uuid), + replica_gen_next); + d_iov_set(&value, &replica_gen_next, sizeof(replica_gen_next)); + rc = rdb_lc_update(lc, base, RDB_LC_ATTRS, false /* crit */, 1, + &rdb_lc_replica_gen_next, &value, NULL /* vtx */); + if (rc != 0) + DL_ERROR(rc, DF_UUID ": failed to initialize next replica generation", + DP_UUID(db_uuid)); + } + +out_lc: rc_close = vos_cont_close(lc); return (rc != 0) ? rc : rc_close; } @@ -2449,9 +2679,18 @@ rdb_raft_load_entry(struct rdb *db, uint64_t index) return rdb_raft_rc(rc); } - D_DEBUG(DB_TRACE, DF_DB ": loaded entry " DF_U64 ": term=%ld type=%s buf=%p len=%u\n", - DP_DB(db), index, entry.term, rdb_raft_entry_type_str(entry.type), entry.data.buf, - entry.data.len); + if (raft_entry_is_cfg_change(&entry)) { + D_DEBUG(DB_MD, + DF_DB ": loaded cfg entry " DF_U64 ": term=%ld type=%s node=" RDB_F_RID + "\n", + DP_DB(db), index, entry.term, rdb_raft_entry_type_str(entry.type), + RDB_P_RID(rdb_raft_cfg_entry_node_id(&entry, db->d_version))); + } else { + D_DEBUG(DB_TRACE, + DF_DB ": loaded entry " DF_U64 ": term=%ld type=%s buf=%p len=%u\n", + DP_DB(db), index, entry.term, rdb_raft_entry_type_str(entry.type), + entry.data.buf, entry.data.len); + } return 0; } @@ -2650,14 +2889,13 @@ rdb_raft_discard_slc(struct rdb *db) int rdb_raft_dictate(struct rdb *db) { - struct rdb_lc_record lc_record = db->d_lc_record; - uint64_t term; - d_rank_list_t replicas; - d_rank_t self = dss_self_rank(); - d_iov_t keys[2]; - d_iov_t value; - uint64_t index = lc_record.dlr_tail; - int rc; + struct rdb_lc_record lc_record = db->d_lc_record; + uint64_t term; + struct rdb_replica_record replicas = {.drr_id = db->d_replica_id}; + d_iov_t keys[2]; + d_iov_t value; + uint64_t index = lc_record.dlr_tail; + int rc; /* * If an SLC exists, discard it, since it must be either stale or @@ -2701,11 +2939,10 @@ rdb_raft_dictate(struct rdb *db) * membership change entry that, for instance, adds a node other than * ourself, which contradicts with the new membership of only ourself. */ - replicas.rl_ranks = &self; - replicas.rl_nr = 1; - rc = rdb_raft_store_replicas(db->d_lc, index, &replicas, NULL /* vtx */); + rc = rdb_raft_store_replicas(db->d_uuid, db->d_lc, index, db->d_version, &replicas, + 1 /* replicas_len */, NULL /* vtx */); if (rc != 0) { - D_ERROR(DF_DB": failed to reset membership: "DF_RC"\n", DP_DB(db), DP_RC(rc)); + DL_ERROR(rc, DF_DB ": failed to reset membership", DP_DB(db)); return rc; } keys[0] = rdb_lc_entry_header; @@ -2868,10 +3105,10 @@ rdb_raft_close(struct rdb *db) static int rdb_raft_load(struct rdb *db) { - d_iov_t value; - uint64_t term; - int vote; - int rc; + d_iov_t value; + uint64_t term; + rdb_replica_id_t vote; + int rc; D_DEBUG(DB_MD, DF_DB": load persistent state: begin\n", DP_DB(db)); D_ASSERT(!db->d_raft_loaded); @@ -2881,16 +3118,21 @@ rdb_raft_load(struct rdb *db) if (rc == 0) { rc = raft_set_current_term(db->d_raft, term); D_ASSERTF(rc == 0, DF_RC"\n", DP_RC(rc)); - } else if (rc != -DER_NONEXIST) { + } else if (rc == -DER_NONEXIST) { + term = 0; + } else { goto out; } - d_iov_set(&value, &vote, sizeof(vote)); + rdb_set_mc_vote_lookup_buf(db, &vote, &value); rc = rdb_mc_lookup(db->d_mc, RDB_MC_ATTRS, &rdb_mc_vote, &value); if (rc == 0) { - rc = raft_vote_for_nodeid(db->d_raft, vote); + rc = raft_vote_for_nodeid(db->d_raft, rdb_replica_id_encode(vote)); D_ASSERTF(rc == 0, DF_RC"\n", DP_RC(rc)); - } else if (rc != -DER_NONEXIST) { + } else if (rc == -DER_NONEXIST) { + vote.rri_rank = -1; + vote.rri_gen = -1; + } else { goto out; } @@ -2899,11 +3141,11 @@ rdb_raft_load(struct rdb *db) goto out; D_DEBUG(DB_MD, - DF_DB ": term=" DF_U64 " vote=%d lc.uuid=" DF_UUID " lc.base=" DF_U64 + DF_DB ": term=" DF_U64 " vote=" RDB_F_RID " lc.uuid=" DF_UUID " lc.base=" DF_U64 " lc.base_term=" DF_U64 " lc.tail=" DF_U64 " lc.aggregated=" DF_U64 " lc.term=" DF_U64 " lc.seq=" DF_U64 "\n", - DP_DB(db), term, vote, DP_UUID(db->d_lc_record.dlr_uuid), db->d_lc_record.dlr_base, - db->d_lc_record.dlr_base_term, db->d_lc_record.dlr_tail, + DP_DB(db), term, RDB_P_RID(vote), DP_UUID(db->d_lc_record.dlr_uuid), + db->d_lc_record.dlr_base, db->d_lc_record.dlr_base_term, db->d_lc_record.dlr_tail, db->d_lc_record.dlr_aggregated, db->d_lc_record.dlr_term, db->d_lc_record.dlr_seq); db->d_raft_loaded = true; @@ -2938,7 +3180,7 @@ rdb_raft_start(struct rdb *db) goto err; } - raft_set_nodeid(db->d_raft, dss_self_rank()); + raft_set_nodeid(db->d_raft, rdb_replica_id_encode(db->d_replica_id)); if (db->d_new) raft_set_first_start(db->d_raft); raft_set_callbacks(db->d_raft, &rdb_raft_cbs, db); @@ -2971,12 +3213,6 @@ rdb_raft_start(struct rdb *db) if (rc != 0) goto err_callbackd; - D_DEBUG(DB_MD, - DF_DB": raft started: election_timeout=%dms request_timeout=%dms " - "lease_maintenance_grace=%dms compact_thres="DF_U64" ae_max_entries=%u " - "ae_max_size="DF_U64"\n", DP_DB(db), election_timeout, request_timeout, - lease_maintenance_grace, db->d_compact_thres, db->d_ae_max_entries, - db->d_ae_max_size); return 0; err_callbackd: @@ -3164,41 +3400,86 @@ rdb_raft_wait_applied(struct rdb *db, uint64_t index, uint64_t term) return rc; } +static int +rdb_replica_id_compare_void(const void *vx, const void *vy) +{ + const rdb_replica_id_t *x = vx; + const rdb_replica_id_t *y = vy; + + return rdb_replica_id_compare(*x, *y); +} + int -rdb_raft_get_ranks(struct rdb *db, d_rank_list_t **ranksp) +rdb_raft_get_replicas(struct rdb *db, rdb_replica_id_t **replicas_out, int *replicas_len_out) { - d_rank_list_t *ranks; - int n; - int i; - int rc; + rdb_replica_id_t *replicas; + int n; + int i; + int rc; ABT_mutex_lock(db->d_raft_mutex); n = raft_get_num_nodes(db->d_raft); - ranks = d_rank_list_alloc(n); - if (ranks == NULL) { + D_ALLOC_ARRAY(replicas, n); + if (replicas == NULL) { rc = -DER_NOMEM; goto mutex; } for (i = 0; i < n; i++) { - raft_node_t *node = raft_get_node_from_idx(db->d_raft, i); - struct rdb_raft_node *rdb_node = raft_node_get_udata(node); + raft_node_t *node = raft_get_node_from_idx(db->d_raft, i); + raft_node_id_t node_id = raft_node_get_id(node); - ranks->rl_ranks[i] = rdb_node->dn_rank; + replicas[i] = rdb_replica_id_decode(node_id); } - ranks->rl_nr = i; - d_rank_list_sort(ranks); + qsort(replicas, n, sizeof(*replicas), rdb_replica_id_compare_void); - *ranksp = ranks; + *replicas_out = replicas; + *replicas_len_out = n; rc = 0; mutex: ABT_mutex_unlock(db->d_raft_mutex); return rc; } +static int +rdb_lookup_for_request(crt_rpc_t *rpc, struct rdb **db_out) +{ + struct rdb_op_in *in = crt_req_get(rpc); + d_rank_t src_rank; + struct rdb *db; + int rc; + + rc = crt_req_src_rank_get(rpc, &src_rank); + D_ASSERTF(rc == 0, "crt_req_src_rank_get: " DF_RC "\n", DP_RC(rc)); + if (src_rank != in->ri_from.rri_rank) { + D_ERROR(DF_UUID ": inconsistent request: src_rank=%u from=" RDB_F_RID "\n", + DP_UUID(in->ri_uuid), src_rank, RDB_P_RID(in->ri_from)); + return -DER_PROTO; + } + + db = rdb_lookup(in->ri_uuid); + if (db == NULL) + return -DER_NONEXIST; + + if (db->d_stop) { + rdb_put(db); + return -DER_CANCELED; + } + + if (rdb_replica_id_compare(db->d_replica_id, in->ri_to) != 0) { + D_DEBUG(DB_MD, DF_DB ": replica ID mismatch: self=" RDB_F_RID " to=" RDB_F_RID "\n", + DP_DB(db), RDB_P_RID(db->d_replica_id), RDB_P_RID(in->ri_to)); + rdb_put(db); + return -DER_BAD_TARGET; + } + + *db_out = db; + return 0; +} + void rdb_requestvote_handler(crt_rpc_t *rpc) { @@ -3206,46 +3487,40 @@ rdb_requestvote_handler(crt_rpc_t *rpc) struct rdb_requestvote_out *out = crt_reply_get(rpc); struct rdb *db; char *s; - struct rdb_raft_state state; - d_rank_t srcrank; + struct rdb_raft_state state; + raft_node_id_t node_id = rdb_replica_id_encode(in->rvi_op.ri_from); int rc; s = in->rvi_msg.prevote ? " (prevote)" : ""; - rc = crt_req_src_rank_get(rpc, &srcrank); - D_ASSERTF(rc == 0, ""DF_RC"\n", DP_RC(rc)); - db = rdb_lookup(in->rvi_op.ri_uuid); - if (db == NULL) - D_GOTO(out, rc = -DER_NONEXIST); - if (db->d_stop) - D_GOTO(out_db, rc = -DER_CANCELED); + rc = rdb_lookup_for_request(rpc, &db); + if (rc != 0) + goto out; - D_DEBUG(DB_TRACE, DF_DB": handling raft rv%s from rank %u\n", - DP_DB(db), s, srcrank); + D_DEBUG(DB_TRACE, DF_DB ": handling raft rv%s from " RDB_F_RID "\n", DP_DB(db), s, + RDB_P_RID(in->rvi_op.ri_from)); ABT_mutex_lock(db->d_raft_mutex); rdb_raft_save_state(db, &state); - rc = raft_recv_requestvote(db->d_raft, - raft_get_node(db->d_raft, - srcrank), - &in->rvi_msg, &out->rvo_msg); + rc = raft_recv_requestvote(db->d_raft, raft_get_node(db->d_raft, node_id), &in->rvi_msg, + &out->rvo_msg); rc = rdb_raft_check_state(db, &state, rc); ABT_mutex_unlock(db->d_raft_mutex); if (rc != 0) { - D_ERROR(DF_DB": failed to process REQUESTVOTE%s from rank %u: " - "%d\n", DP_DB(db), s, srcrank, rc); + DL_ERROR(rc, DF_DB ": failed to process REQUESTVOTE%s from " RDB_F_RID, DP_DB(db), + s, RDB_P_RID(in->rvi_op.ri_from)); /* raft_recv_requestvote() always generates a valid reply. */ rc = 0; } -out_db: rdb_put(db); out: out->rvo_op.ro_rc = rc; + out->rvo_op.ro_from = in->rvi_op.ri_to; + out->rvo_op.ro_to = in->rvi_op.ri_from; rc = crt_reply_send(rpc); if (rc != 0) - D_ERROR(DF_UUID": failed to send REQUESTVOTE%s reply to " - "rank %u: %d\n", DP_UUID(in->rvi_op.ri_uuid), s, - srcrank, rc); + DL_ERROR(rc, DF_UUID ": failed to send REQUESTVOTE%s reply to " RDB_F_RID, + DP_UUID(in->rvi_op.ri_uuid), s, RDB_P_RID(in->rvi_op.ri_from)); } void @@ -3254,44 +3529,38 @@ rdb_appendentries_handler(crt_rpc_t *rpc) struct rdb_appendentries_in *in = crt_req_get(rpc); struct rdb_appendentries_out *out = crt_reply_get(rpc); struct rdb *db; - struct rdb_raft_state state; - d_rank_t srcrank; + struct rdb_raft_state state; + raft_node_id_t node_id = rdb_replica_id_encode(in->aei_op.ri_from); int rc; - rc = crt_req_src_rank_get(rpc, &srcrank); - D_ASSERTF(rc == 0, ""DF_RC"\n", DP_RC(rc)); - - db = rdb_lookup(in->aei_op.ri_uuid); - if (db == NULL) - D_GOTO(out, rc = -DER_NONEXIST); - if (db->d_stop) - D_GOTO(out_db, rc = -DER_CANCELED); + rc = rdb_lookup_for_request(rpc, &db); + if (rc != 0) + goto out; - D_DEBUG(DB_TRACE, DF_DB": handling raft ae from rank %u\n", DP_DB(db), - srcrank); + D_DEBUG(DB_TRACE, DF_DB ": handling raft ae from " RDB_F_RID "\n", DP_DB(db), + RDB_P_RID(in->aei_op.ri_from)); ABT_mutex_lock(db->d_raft_mutex); rdb_raft_save_state(db, &state); - rc = raft_recv_appendentries(db->d_raft, - raft_get_node(db->d_raft, srcrank), - &in->aei_msg, &out->aeo_msg); + rc = raft_recv_appendentries(db->d_raft, raft_get_node(db->d_raft, node_id), &in->aei_msg, + &out->aeo_msg); rc = rdb_raft_check_state(db, &state, rc); ABT_mutex_unlock(db->d_raft_mutex); if (rc != 0) { - D_ERROR(DF_DB": failed to process APPENDENTRIES from rank %u: " - "%d\n", DP_DB(db), srcrank, rc); + DL_ERROR(rc, DF_DB ": failed to process APPENDENTRIES from " RDB_F_RID, DP_DB(db), + RDB_P_RID(in->aei_op.ri_from)); /* raft_recv_appendentries() always generates a valid reply. */ rc = 0; } -out_db: rdb_put(db); out: out->aeo_op.ro_rc = rc; + out->aeo_op.ro_from = in->aei_op.ri_to; + out->aeo_op.ro_to = in->aei_op.ri_from; rc = crt_reply_send(rpc); if (rc != 0) - D_ERROR(DF_UUID": failed to send APPENDENTRIES reply to rank " - "%u: %d\n", DP_UUID(in->aei_op.ri_uuid), - srcrank, rc); + DL_ERROR(rc, DF_UUID ": failed to send APPENDENTRIES reply to " RDB_F_RID, + DP_UUID(in->aei_op.ri_uuid), RDB_P_RID(in->aei_op.ri_from)); } void @@ -3300,46 +3569,38 @@ rdb_installsnapshot_handler(crt_rpc_t *rpc) struct rdb_installsnapshot_in *in = crt_req_get(rpc); struct rdb_installsnapshot_out *out = crt_reply_get(rpc); struct rdb *db; - struct rdb_raft_state state; - d_rank_t srcrank; + struct rdb_raft_state state; + raft_node_id_t node_id = rdb_replica_id_encode(in->isi_op.ri_from); int rc; - rc = crt_req_src_rank_get(rpc, &srcrank); - D_ASSERTF(rc == 0, ""DF_RC"\n", DP_RC(rc)); - - db = rdb_lookup(in->isi_op.ri_uuid); - if (db == NULL) { - rc = -DER_NONEXIST; + rc = rdb_lookup_for_request(rpc, &db); + if (rc != 0) goto out; - } - if (db->d_stop) { - rc = -DER_CANCELED; - goto out_db; - } - D_DEBUG(DB_TRACE, DF_DB": handling raft is from rank %u\n", DP_DB(db), - srcrank); + D_DEBUG(DB_TRACE, DF_DB ": handling raft is from " RDB_F_RID "\n", DP_DB(db), + RDB_P_RID(in->isi_op.ri_from)); /* Receive the bulk data buffers before entering raft. */ rc = rdb_raft_recv_is(db, rpc, &in->isi_local.rl_kds_iov, &in->isi_local.rl_data_iov); if (rc != 0) { - D_ERROR(DF_DB": failed to receive INSTALLSNAPSHOT chunk %ld" - "/"DF_U64": %d\n", DP_DB(db), in->isi_msg.last_idx, - in->isi_seq, rc); + DL_ERROR(rc, + DF_DB ": failed to receive INSTALLSNAPSHOT chunk %ld" + "/" DF_U64 " from " RDB_F_RID, + DP_DB(db), in->isi_msg.last_idx, in->isi_seq, + RDB_P_RID(in->isi_op.ri_from)); goto out_db; } ABT_mutex_lock(db->d_raft_mutex); rdb_raft_save_state(db, &state); - rc = raft_recv_installsnapshot(db->d_raft, - raft_get_node(db->d_raft, srcrank), - &in->isi_msg, &out->iso_msg); + rc = raft_recv_installsnapshot(db->d_raft, raft_get_node(db->d_raft, node_id), &in->isi_msg, + &out->iso_msg); rc = rdb_raft_check_state(db, &state, rc); ABT_mutex_unlock(db->d_raft_mutex); if (rc != 0) { - D_ERROR(DF_DB": failed to process INSTALLSNAPSHOT from rank " - "%u: %d\n", DP_DB(db), srcrank, rc); + DL_ERROR(rc, DF_DB ": failed to process INSTALLSNAPSHOT from " RDB_F_RID, DP_DB(db), + RDB_P_RID(in->isi_op.ri_from)); /* * raft_recv_installsnapshot() always generates a valid reply. */ @@ -3352,11 +3613,12 @@ rdb_installsnapshot_handler(crt_rpc_t *rpc) rdb_put(db); out: out->iso_op.ro_rc = rc; + out->iso_op.ro_from = in->isi_op.ri_to; + out->iso_op.ro_to = in->isi_op.ri_from; rc = crt_reply_send(rpc); if (rc != 0) - D_ERROR(DF_UUID": failed to send INSTALLSNAPSHOT reply to rank " - "%u: %d\n", DP_UUID(in->isi_op.ri_uuid), - srcrank, rc); + DL_ERROR(rc, DF_UUID ": failed to send INSTALLSNAPSHOT reply to " RDB_F_RID, + DP_UUID(in->isi_op.ri_uuid), RDB_P_RID(in->isi_op.ri_from)); } void @@ -3368,18 +3630,28 @@ rdb_raft_process_reply(struct rdb *db, crt_rpc_t *rpc) struct rdb_requestvote_out *out_rv; struct rdb_appendentries_out *out_ae; struct rdb_installsnapshot_out *out_is; - d_rank_t rank; + struct rdb_op_out *out_op = out; + d_rank_t dst_rank; raft_node_t *node; raft_time_t *lease = NULL; int rc; - /* Get the destination of the request - that is the source - * rank of this reply. This CaRT API is based on request hdr. - */ - rc = crt_req_dst_rank_get(rpc, &rank); - D_ASSERTF(rc == 0, ""DF_RC"\n", DP_RC(rc)); + rc = crt_req_dst_rank_get(rpc, &dst_rank); + D_ASSERTF(rc == 0, "crt_req_dst_rank_get: " DF_RC "\n", DP_RC(rc)); + if (dst_rank != out_op->ro_from.rri_rank) { + D_ERROR(DF_DB ": inconsistent reply: dst_rank=%u from=" RDB_F_RID "\n", DP_DB(db), + dst_rank, RDB_P_RID(out_op->ro_from)); + return; + } + + if (rdb_replica_id_compare(db->d_replica_id, out_op->ro_to) != 0) { + D_DEBUG(DB_MD, + DF_DB ": replica ID mismatch: self=" RDB_F_RID " to=" RDB_F_RID " opc=%u\n", + DP_DB(db), RDB_P_RID(db->d_replica_id), RDB_P_RID(out_op->ro_to), opc); + return; + } - rc = ((struct rdb_op_out *)out)->ro_rc; + rc = out_op->ro_rc; if (rc != 0) { D_DEBUG(DB_MD, DF_DB": opc %u failed: %d\n", DP_DB(db), opc, rc); @@ -3404,8 +3676,10 @@ rdb_raft_process_reply(struct rdb *db, crt_rpc_t *rpc) int adjustment = d_hlc2msec(d_hlc_epsilon_get()) + 1 /* ms margin */; if (*lease < adjustment) { - D_ERROR(DF_DB": dropping %s response from rank %u: invalid lease: %ld\n", - DP_DB(db), opc == RDB_APPENDENTRIES ? "AE" : "IS", rank, *lease); + D_ERROR(DF_DB ": dropping %s response from " RDB_F_RID + ": invalid lease: %ld\n", + DP_DB(db), opc == RDB_APPENDENTRIES ? "AE" : "IS", + RDB_P_RID(out_op->ro_from), *lease); return; } *lease -= adjustment; @@ -3413,9 +3687,10 @@ rdb_raft_process_reply(struct rdb *db, crt_rpc_t *rpc) ABT_mutex_lock(db->d_raft_mutex); - node = raft_get_node(db->d_raft, rank); + node = raft_get_node(db->d_raft, rdb_replica_id_encode(out_op->ro_from)); if (node == NULL) { - D_DEBUG(DB_MD, DF_DB": rank %u not in current membership\n", DP_DB(db), rank); + D_DEBUG(DB_MD, DF_DB ": " RDB_F_RID " not in current membership\n", DP_DB(db), + RDB_P_RID(out_op->ro_from)); goto out_mutex; } @@ -3438,8 +3713,8 @@ rdb_raft_process_reply(struct rdb *db, crt_rpc_t *rpc) } rc = rdb_raft_check_state(db, &state, rc); if (rc != 0 && rc != -DER_NOTLEADER) - DL_ERROR(rc, DF_DB ": failed to process opc %u response from rank %u", DP_DB(db), - opc, rank); + DL_ERROR(rc, DF_DB ": failed to process opc %u response from " RDB_F_RID, DP_DB(db), + opc, RDB_P_RID(out_op->ro_from)); out_mutex: ABT_mutex_unlock(db->d_raft_mutex); diff --git a/src/rdb/rdb_rpc.c b/src/rdb/rdb_rpc.c index e4479e258d3..c987c9e8308 100644 --- a/src/rdb/rdb_rpc.c +++ b/src/rdb/rdb_rpc.c @@ -172,6 +172,21 @@ crt_proc_struct_rdb_local(crt_proc_t proc, crt_proc_op_t proc_op, return 0; } +int +crt_proc_rdb_replica_id_t(crt_proc_t proc, crt_proc_op_t proc_op, rdb_replica_id_t *p) +{ + int rc; + + rc = crt_proc_uint32_t(proc, proc_op, &p->rri_rank); + if (unlikely(rc)) + return rc; + rc = crt_proc_uint32_t(proc, proc_op, &p->rri_gen); + if (unlikely(rc)) + return rc; + + return 0; +} + CRT_RPC_DEFINE(rdb_op, DAOS_ISEQ_RDB_OP, DAOS_OSEQ_RDB_OP) static int @@ -218,24 +233,42 @@ struct crt_proto_format rdb_proto_fmt = { .cpf_base = DAOS_RPC_OPCODE(0, DAOS_RDB_MODULE, 0) }; +/* Create an RDB RPC and fill the rdb_op_in fields. */ int -rdb_create_raft_rpc(crt_opcode_t opc, raft_node_t *node, crt_rpc_t **rpc) +rdb_create_raft_rpc(struct rdb *db, crt_opcode_t opc, raft_node_t *node, crt_rpc_t **rpc) { - crt_opcode_t opc_full; - crt_endpoint_t ep; + rdb_replica_id_t id = rdb_replica_id_decode(raft_node_get_id(node)); + crt_opcode_t opc_full; + crt_endpoint_t ep; struct dss_module_info *info = dss_get_module_info(); int rc; uint8_t rdb_ver; + struct rdb_op_in *in; rc = rdb_rpc_protocol(&rdb_ver); - if (rc) + if (rc != 0) { + DL_ERROR(rc, DF_DB ": failed to get RDB RPC protocol", DP_DB(db)); return rc; + } + opc_full = DAOS_RPC_OPCODE(opc, DAOS_RDB_MODULE, rdb_ver); - opc_full = DAOS_RPC_OPCODE(opc, DAOS_RDB_MODULE, rdb_ver); - ep.ep_grp = NULL; - ep.ep_rank = raft_node_get_id(node); - ep.ep_tag = daos_rpc_tag(DAOS_REQ_RDB, 0); - return crt_req_create(info->dmi_ctx, &ep, opc_full, rpc); + ep.ep_grp = NULL; + ep.ep_rank = id.rri_rank; + ep.ep_tag = daos_rpc_tag(DAOS_REQ_RDB, 0); + + rc = crt_req_create(info->dmi_ctx, &ep, opc_full, rpc); + if (rc != 0) { + DL_ERROR(rc, DF_DB ": failed to create RPC %u to " RDB_F_RID, DP_DB(db), opc, + RDB_P_RID(id)); + return rc; + } + + in = crt_req_get(*rpc); + uuid_copy(in->ri_uuid, db->d_uuid); + in->ri_from = db->d_replica_id; + in->ri_to = id; + + return 0; } struct rdb_raft_rpc { diff --git a/src/rdb/rdb_tx.c b/src/rdb/rdb_tx.c index 3e0f0617224..d924ee6b27e 100644 --- a/src/rdb/rdb_tx.c +++ b/src/rdb/rdb_tx.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2017-2023 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -565,6 +566,7 @@ rdb_tx_create_kvs(struct rdb_tx *tx, const rdb_path_t *parent, .dto_attr = (struct rdb_kvs_attr *)attr }; + D_ASSERT(parent == &rdb_path_attrs || !rdb_path_is_attrs(parent)); return rdb_tx_append(tx, &op, false /* is_critical */); } @@ -590,6 +592,7 @@ rdb_tx_destroy_kvs(struct rdb_tx *tx, const rdb_path_t *parent, .dto_attr = NULL }; + D_ASSERT(parent == &rdb_path_attrs || !rdb_path_is_attrs(parent)); return rdb_tx_append(tx, &op, true /* is_critical */); } @@ -615,6 +618,7 @@ rdb_tx_update(struct rdb_tx *tx, const rdb_path_t *kvs, const d_iov_t *key, .dto_attr = NULL }; + D_ASSERT(kvs == &rdb_path_attrs || !rdb_path_is_attrs(kvs)); return rdb_tx_append(tx, &op, false /* is_critical */); } @@ -641,6 +645,7 @@ rdb_tx_update_critical(struct rdb_tx *tx, const rdb_path_t *kvs, const d_iov_t * .dto_attr = NULL }; + D_ASSERT(kvs == &rdb_path_attrs || !rdb_path_is_attrs(kvs)); return rdb_tx_append(tx, &op, true /* is_critical */); } @@ -664,6 +669,7 @@ rdb_tx_delete(struct rdb_tx *tx, const rdb_path_t *kvs, const d_iov_t *key) .dto_attr = NULL }; + D_ASSERT(kvs == &rdb_path_attrs || !rdb_path_is_attrs(kvs)); return rdb_tx_append(tx, &op, true /* is_critical */); } @@ -1116,8 +1122,12 @@ rdb_tx_query_pre(struct rdb_tx *tx, const rdb_path_t *path, } ABT_mutex_unlock(tx->dt_db->d_raft_mutex); - if (path == NULL) + if (path == NULL) { + D_ASSERT(kvs == NULL && index == NULL); return 0; + } + + D_ASSERT(path == &rdb_path_attrs || !rdb_path_is_attrs(path)); rc = rdb_kvs_lookup(tx->dt_db, path, i, true /* alloc */, kvs); if (rc != 0) diff --git a/src/rdb/tests/rdb_test.c b/src/rdb/tests/rdb_test.c index dcae690fc75..bd97be80d1b 100644 --- a/src/rdb/tests/rdb_test.c +++ b/src/rdb/tests/rdb_test.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2017-2022 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -265,10 +266,12 @@ rdbt_test_path(void) static void rdbt_test_rsvc(void) { - char *svc_name = "tmp"; - d_iov_t svc_id; - uuid_t uuid; - int rc; + char *svc_name = "tmp"; + d_iov_t svc_id; + uuid_t uuid; + struct rdb_create_params create_params; + rdb_replica_id_t dummy_replicas[1] = {0}; + int rc; d_iov_set(&svc_id, svc_name, strlen(svc_name) + 1); uuid_generate(uuid); @@ -277,8 +280,15 @@ rdbt_test_rsvc(void) * A leader of an older term can't destroy a replica created by a * leader with a newer term. */ - MUST(ds_rsvc_start(DS_RSVC_CLASS_TEST, &svc_id, uuid, 2 /* term */, true /* create */, - DB_CAP, 0 /* vos_df_version */, NULL /* replicas */, NULL /* arg */)); + create_params.rcp_size = DB_CAP; + create_params.rcp_vos_df_version = 0; + create_params.rcp_layout_version = 0; + create_params.rcp_id.rri_rank = dss_self_rank(); + create_params.rcp_id.rri_gen = 1; + create_params.rcp_replicas = NULL; + create_params.rcp_replicas_len = 0; + MUST(ds_rsvc_start(DS_RSVC_CLASS_TEST, &svc_id, uuid, 2 /* term */, DS_RSVC_CREATE, + &create_params, NULL /* arg */)); rc = ds_rsvc_stop(DS_RSVC_CLASS_TEST, &svc_id, 1 /* term */, true /* destroy */); D_ASSERTF(rc == -DER_STALE, DF_RC"\n", DP_RC(rc)); @@ -286,13 +296,43 @@ rdbt_test_rsvc(void) * A leader of an older term can't destroy a replica touched by a * leader with a newer term. */ - rc = ds_rsvc_start(DS_RSVC_CLASS_TEST, &svc_id, uuid, 3 /* term */, true /* create */, - DB_CAP, 0 /* vos_df_version */, NULL /* replicas */, NULL /* arg */); + rc = ds_rsvc_start(DS_RSVC_CLASS_TEST, &svc_id, uuid, 3 /* term */, DS_RSVC_CREATE, + &create_params, NULL /* arg */); D_ASSERTF(rc == -DER_ALREADY, DF_RC"\n", DP_RC(rc)); rc = ds_rsvc_stop(DS_RSVC_CLASS_TEST, &svc_id, 2 /* term */, true /* destroy */); D_ASSERTF(rc == -DER_STALE, DF_RC"\n", DP_RC(rc)); - MUST(ds_rsvc_stop(DS_RSVC_CLASS_TEST, &svc_id, 3 /* term */, true /* destroy */)); + /* + * When creating and bootstrapping a replica, abort if there's an + * existing replica. + */ + create_params.rcp_replicas = dummy_replicas; + create_params.rcp_replicas_len = 1; + rc = ds_rsvc_start(DS_RSVC_CLASS_TEST, &svc_id, uuid, 4 /* term */, DS_RSVC_CREATE, + &create_params, NULL /* arg */); + D_ASSERTF(rc == -DER_EXIST, DF_RC "\n", DP_RC(rc)); + create_params.rcp_replicas = NULL; + create_params.rcp_replicas_len = 0; + + /* + * When creating a replica, destroy any existing replica with a lower + * generation. + */ + create_params.rcp_id.rri_gen = 2; + rc = ds_rsvc_start(DS_RSVC_CLASS_TEST, &svc_id, uuid, 5 /* term */, DS_RSVC_CREATE, + &create_params, NULL /* arg */); + D_ASSERTF(rc == 0, DF_RC "\n", DP_RC(rc)); + + /* + * When creating a replica, abort if there's an existing replica with a + * higher generation. + */ + create_params.rcp_id.rri_gen = 0; + rc = ds_rsvc_start(DS_RSVC_CLASS_TEST, &svc_id, uuid, 6 /* term */, DS_RSVC_CREATE, + &create_params, NULL /* arg */); + D_ASSERTF(rc == -DER_EXIST, DF_RC "\n", DP_RC(rc)); + + MUST(ds_rsvc_stop(DS_RSVC_CLASS_TEST, &svc_id, 7 /* term */, true /* destroy */)); } struct iterate_cb_arg { @@ -646,10 +686,12 @@ get_all_ranks(d_rank_list_t **list) static void rdbt_init_handler(crt_rpc_t *rpc) { - struct rdbt_init_in *in = crt_req_get(rpc); - d_rank_t rank; - d_rank_t ri; - d_rank_list_t *ranks; + struct rdbt_init_in *in = crt_req_get(rpc); + d_rank_t rank; + int i; + d_rank_list_t *ranks; + rdb_replica_id_t *replicas; + struct ds_rsvc_create_params create_params; MUST(crt_group_rank(NULL /* grp */, &rank)); get_all_ranks(&ranks); @@ -657,13 +699,24 @@ rdbt_init_handler(crt_rpc_t *rpc) if (in->tii_nreplicas < ranks->rl_nr) ranks->rl_nr = in->tii_nreplicas; + D_ALLOC_ARRAY(replicas, ranks->rl_nr); + D_ASSERT(replicas != NULL); + D_WARN("initializing rank %u: nreplicas=%u\n", rank, ranks->rl_nr); - for (ri = 0; ri < ranks->rl_nr; ri++) - D_WARN("ranks[%u]=%u\n", ri, ranks->rl_ranks[ri]); + for (i = 0; i < ranks->rl_nr; i++) { + replicas[i].rri_rank = ranks->rl_ranks[i]; + replicas[i].rri_gen = i + 1; + D_WARN(" replicas[%u]=" RDB_F_RID "\n", i, RDB_P_RID(replicas[i])); + } + create_params.scp_bootstrap = true; + create_params.scp_size = DB_CAP; + create_params.scp_vos_df_version = 0; + create_params.scp_layout_version = 0; + create_params.scp_replicas = replicas; + create_params.scp_replicas_len = ranks->rl_nr; MUST(ds_rsvc_dist_start(DS_RSVC_CLASS_TEST, &test_svc_id, in->tii_uuid, ranks, RDB_NIL_TERM, - DS_RSVC_CREATE, true /* bootstrap */, DB_CAP, - 0 /* vos_df_version*/)); + DS_RSVC_CREATE, &create_params)); crt_reply_send(rpc); } @@ -872,8 +925,7 @@ rdbt_dictate_handler(crt_rpc_t *rpc) ranks->rl_ranks[0] = in->rti_rank; ranks->rl_nr = 1; MUST(ds_rsvc_dist_start(DS_RSVC_CLASS_TEST, &test_svc_id, db_uuid, ranks, RDB_NIL_TERM, - DS_RSVC_DICTATE, false /* bootstrap */, 0 /* size */, - 0 /* vos_df_version */)); + DS_RSVC_DICTATE, NULL)); d_rank_list_free(ranks); out->rto_rc = 0; diff --git a/src/rdb/tests/rdbt.c b/src/rdb/tests/rdbt.c index 0d76aa376ca..4ec816c409e 100644 --- a/src/rdb/tests/rdbt.c +++ b/src/rdb/tests/rdbt.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2017-2022 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -405,6 +406,7 @@ rdbt_add_replica_rank(crt_group_t *grp, d_rank_t ldr_rank, d_rank_t new_rank, if (out->rtmo_failed != NULL) fprintf(stderr, "ERR: adding replica %u (reply rank %u)\n", new_rank, out->rtmo_failed->rl_ranks[0]); + d_rank_list_free(replicas_to_add); destroy_rpc(rpc); return rc; } @@ -433,6 +435,7 @@ rdbt_remove_replica_rank(crt_group_t *group, d_rank_t ldr_rank, if (out->rtmo_failed != NULL) fprintf(stderr, "ERR: removing replica %u (reply rank %u)\n", rem_rank, out->rtmo_failed->rl_ranks[0]); + d_rank_list_free(replicas_to_remove); destroy_rpc(rpc); return rc; } diff --git a/src/rsvc/rpc.h b/src/rsvc/rpc.h index 60fb5f94862..bd7e1d2cbf7 100644 --- a/src/rsvc/rpc.h +++ b/src/rsvc/rpc.h @@ -17,6 +17,7 @@ #include #include #include +#include /* * RPC operation codes @@ -49,6 +50,8 @@ enum rsvc_operation { extern struct crt_proto_format rsvc_proto_fmt; +/* clang-format off */ + #define DAOS_ISEQ_RSVC_START /* input fields */ \ ((d_iov_t) (sai_svc_id) CRT_VAR) \ ((uuid_t) (sai_db_uuid) CRT_VAR) \ @@ -56,17 +59,22 @@ extern struct crt_proto_format rsvc_proto_fmt; ((uint32_t) (sai_mode) CRT_VAR) \ ((uint32_t) (sai_flags) CRT_VAR) \ ((uint32_t) (sai_vos_df_version) CRT_VAR) \ + ((uint32_t) (sai_layout_version) CRT_VAR) \ + ((uint32_t) (sai_padding) CRT_VAR) \ ((uint64_t) (sai_size) CRT_VAR) \ ((uint64_t) (sai_term) CRT_VAR) \ - ((d_rank_list_t) (sai_ranks) CRT_PTR) + ((rdb_replica_id_t) (sai_replicas) CRT_ARRAY) #define DAOS_OSEQ_RSVC_START /* output fields (rc: err count) */ \ ((int32_t) (sao_rc) CRT_VAR) \ ((int32_t) (sao_rc_errval) CRT_VAR) +/* clang-format on */ CRT_RPC_DECLARE(rsvc_start, DAOS_ISEQ_RSVC_START, DAOS_OSEQ_RSVC_START) +/* clang-format off */ + #define DAOS_ISEQ_RSVC_STOP /* input fields */ \ ((d_iov_t) (soi_svc_id) CRT_VAR) \ ((uint32_t) (soi_class) CRT_VAR) \ @@ -76,6 +84,8 @@ CRT_RPC_DECLARE(rsvc_start, DAOS_ISEQ_RSVC_START, DAOS_OSEQ_RSVC_START) #define DAOS_OSEQ_RSVC_STOP /* output fields */ \ ((int32_t) (soo_rc) CRT_VAR) +/* clang-format on */ + CRT_RPC_DECLARE(rsvc_stop, DAOS_ISEQ_RSVC_STOP, DAOS_OSEQ_RSVC_STOP) int diff --git a/src/rsvc/srv.c b/src/rsvc/srv.c index 14a5321424e..3f2b599eb2b 100644 --- a/src/rsvc/srv.c +++ b/src/rsvc/srv.c @@ -802,16 +802,16 @@ start_mode_str(enum ds_rsvc_start_mode mode) } static bool -self_only(d_rank_list_t *replicas) +self_only(struct rdb_create_params *p) { - return (replicas != NULL && replicas->rl_nr == 1 && - replicas->rl_ranks[0] == dss_self_rank()); + return p->rcp_replicas != NULL && p->rcp_replicas_len == 1 && + rdb_replica_id_compare(p->rcp_replicas[0], p->rcp_id) == 0; } static int start(enum ds_rsvc_class_id class, d_iov_t *id, uuid_t db_uuid, uint64_t term, - enum ds_rsvc_start_mode mode, size_t size, uint32_t vos_df_version, d_rank_list_t *replicas, - void *arg, struct ds_rsvc **svcp) + enum ds_rsvc_start_mode mode, struct rdb_create_params *create_params, void *arg, + struct ds_rsvc **svcp) { struct rdb_storage *storage; struct ds_rsvc *svc = NULL; @@ -823,8 +823,8 @@ start(enum ds_rsvc_class_id class, d_iov_t *id, uuid_t db_uuid, uint64_t term, svc->s_ref++; if (mode == DS_RSVC_CREATE) - rc = rdb_create(svc->s_db_path, svc->s_db_uuid, term, size, vos_df_version, - replicas, &rsvc_rdb_cbs, svc, &storage); + rc = rdb_create(svc->s_db_path, svc->s_db_uuid, term, create_params, &rsvc_rdb_cbs, + svc, &storage); else rc = rdb_open(svc->s_db_path, svc->s_db_uuid, term, &rsvc_rdb_cbs, svc, &storage); if (rc != 0) @@ -840,7 +840,7 @@ start(enum ds_rsvc_class_id class, d_iov_t *id, uuid_t db_uuid, uint64_t term, if (rc != 0) goto err_storage; - if (mode == DS_RSVC_CREATE && self_only(replicas) && + if (mode == DS_RSVC_CREATE && self_only(create_params) && rsvc_class(class)->sc_bootstrap != NULL) { rc = bootstrap_self(svc, arg); if (rc != 0) @@ -944,19 +944,15 @@ ds_rsvc_stop_nodb(enum ds_rsvc_class_id class, d_iov_t *id) } /** - * Start a replicated service. If \a mode is not DS_RSVC_CREATE, all remaining - * input parameters are ignored; otherwise, create the replica first. If \a - * replicas is NULL, all remaining input parameters are ignored; otherwise, - * bootstrap the replicated service. + * Start a replicated service. If \a mode is DS_RSVC_CREATE, create the replica + * first; otherwise, \a create_params is ignored. * * \param[in] class replicated service class * \param[in] id replicated service ID * \param[in] db_uuid DB UUID * \param[in] caller_term caller term if not RDB_NIL_TERM (see rdb_open) * \param[in] mode mode of starting the replicated service - * \param[in] size replica size in bytes - * \param[in] vos_df_version version of VOS durable format - * \param[in] replicas optional initial membership + * \param[in] create_params parameters used when \a mode is DS_RSVC_CREATE * \param[in] arg argument for cbs.sc_bootstrap * * \retval -DER_ALREADY replicated service already started @@ -965,8 +961,7 @@ ds_rsvc_stop_nodb(enum ds_rsvc_class_id class, d_iov_t *id) */ int ds_rsvc_start(enum ds_rsvc_class_id class, d_iov_t *id, uuid_t db_uuid, uint64_t caller_term, - enum ds_rsvc_start_mode mode, size_t size, uint32_t vos_df_version, - d_rank_list_t *replicas, void *arg) + enum ds_rsvc_start_mode mode, struct rdb_create_params *create_params, void *arg) { struct ds_rsvc *svc = NULL; d_list_t *entry; @@ -976,15 +971,47 @@ ds_rsvc_start(enum ds_rsvc_class_id class, d_iov_t *id, uuid_t db_uuid, uint64_t entry = d_hash_rec_find(&rsvc_hash, id->iov_buf, id->iov_len); if (entry != NULL) { + rdb_replica_id_t rid; + svc = rsvc_obj(entry); - D_DEBUG(DB_MD, "%s: found: stop=%d mode=%s replicas=%p\n", svc->s_name, svc->s_stop, - start_mode_str(mode), replicas); - if (mode == DS_RSVC_CREATE && replicas != NULL) { + rid = rdb_get_replica_id(svc->s_db); + D_DEBUG(DB_MD, "%s: found " RDB_F_RID ": stop=%d mode=%s replicas=%p\n", + svc->s_name, RDB_P_RID(rid), svc->s_stop, start_mode_str(mode), + mode == DS_RSVC_CREATE ? create_params->rcp_replicas : NULL); + if (mode == DS_RSVC_CREATE && create_params->rcp_replicas != NULL) { D_ERROR("%s: creating and bootstrapping existing replica not allowed\n", svc->s_name); rc = -DER_EXIST; - ds_rsvc_put(svc); - goto out; + goto out_svc; + } else if (mode == DS_RSVC_CREATE && rid.rri_gen < create_params->rcp_id.rri_gen) { + int n = 10; + + /* + * Destroy the older replica and continue. Note that the destroy only + * happens when the last svc reference is released. + */ + D_INFO("%s: destroying older replica " RDB_F_RID " for " RDB_F_RID "\n", + svc->s_name, RDB_P_RID(rid), RDB_P_RID(create_params->rcp_id)); + rc = ds_rsvc_stop(class, id, caller_term, true /* destroy */); + if (rc != 0) { + DL_ERROR(rc, "%s: failed to destroy existing replica", svc->s_name); + goto out_svc; + } + while (svc->s_ref > 1 && n > 0) { + dss_sleep(1000); + n--; + } + if (svc->s_ref > 1) { + D_ERROR("%s: gave up waiting for other service references\n", + svc->s_name); + rc = -DER_CANCELED; + goto out_svc; + } + } else if (mode == DS_RSVC_CREATE && rid.rri_gen > create_params->rcp_id.rri_gen) { + D_ERROR("%s: found newer replica: " RDB_F_RID " > " RDB_F_RID "\n", + svc->s_name, RDB_P_RID(rid), RDB_P_RID(create_params->rcp_id)); + rc = -DER_EXIST; + goto out_svc; } else if (mode == DS_RSVC_DICTATE && !svc->s_stop) { /* * If we need to dictate, and the service is not @@ -992,29 +1019,26 @@ ds_rsvc_start(enum ds_rsvc_class_id class, d_iov_t *id, uuid_t db_uuid, uint64_t * this case, and continue. */ rc = ds_rsvc_stop(class, id, caller_term, false /* destroy */); - D_ASSERTF(rc == 0, DF_RC"\n", DP_RC(rc)); - ds_rsvc_put(svc); + D_ASSERTF(rc == 0, DF_RC "\n", DP_RC(rc)); } else { if (caller_term != RDB_NIL_TERM) { rc = rdb_ping(svc->s_db, caller_term); if (rc != 0) { D_CDEBUG(rc == -DER_STALE, DB_MD, DLOG_ERR, "%s: failed to ping local replica\n", svc->s_name); - ds_rsvc_put(svc); - goto out; + goto out_svc; } } if (svc->s_stop) rc = -DER_CANCELED; else rc = -DER_ALREADY; - ds_rsvc_put(svc); - goto out; + goto out_svc; } + ds_rsvc_put(svc); } - rc = start(class, id, db_uuid, caller_term, mode, size, vos_df_version, replicas, arg, - &svc); + rc = start(class, id, db_uuid, caller_term, mode, create_params, arg, &svc); if (rc != 0) goto out; @@ -1027,6 +1051,7 @@ ds_rsvc_start(enum ds_rsvc_class_id class, d_iov_t *id, uuid_t db_uuid, uint64_t } D_DEBUG(DB_MD, "%s: started replicated service\n", svc->s_name); +out_svc: ds_rsvc_put(svc); out: if (rc != 0 && rc != -DER_ALREADY && !(mode == DS_RSVC_CREATE && rc == -DER_EXIST)) @@ -1168,21 +1193,53 @@ int ds_rsvc_add_replicas_s(struct ds_rsvc *svc, d_rank_list_t *ranks, size_t size, uint32_t vos_df_version) { - int rc; + int i; + int rc = 0; - rc = ds_rsvc_dist_start(svc->s_class, &svc->s_id, svc->s_db_uuid, ranks, svc->s_term, - DS_RSVC_CREATE, false /* bootstrap */, size, vos_df_version); + /* Add one by one to reduce waste of replica generations. */ + for (i = 0; i < ranks->rl_nr; i++) { + d_rank_t r = ranks->rl_ranks[i]; + d_rank_list_t rl; + rdb_replica_id_t id; + int ids_len = 1; + struct ds_rsvc_create_params create_params; - /* TODO: Attempt to only add replicas that were successfully started */ - if (rc != 0) - goto out_stop; - rc = rdb_add_replicas(svc->s_db, ranks); -out_stop: - /* Clean up ranks that were not added */ - if (ranks->rl_nr > 0) { - D_ASSERT(rc != 0); - ds_rsvc_dist_stop(svc->s_class, &svc->s_id, ranks, NULL, svc->s_term, - true /* destroy */); + rl.rl_ranks = &r; + rl.rl_nr = 1; + + id.rri_rank = r; + + /* This allocation cannot be rolled back. */ + rc = rdb_alloc_replica_gen(svc->s_db, svc->s_term, &id.rri_gen); + if (rc != 0) + break; + + create_params.scp_bootstrap = false; + create_params.scp_size = size; + create_params.scp_vos_df_version = vos_df_version; + create_params.scp_layout_version = rdb_get_version(svc->s_db); + create_params.scp_replicas = &id; + create_params.scp_replicas_len = 1; + + rc = ds_rsvc_dist_start(svc->s_class, &svc->s_id, svc->s_db_uuid, &rl, svc->s_term, + DS_RSVC_CREATE, &create_params); + if (rc != 0) + break; + + rc = rdb_modify_replicas(svc->s_db, RDB_REPLICA_ADD, &id, &ids_len); + if (rc != 0) { + ds_rsvc_dist_stop(svc->s_class, &svc->s_id, &rl, NULL, svc->s_term, + true /* destroy */); + break; + } + } + + /* Remove all i successfully-added ranks from ranks. */ + if (i > 0) { + ranks->rl_nr -= i; + if (ranks->rl_nr > 0) + memmove(&ranks->rl_ranks[0], &ranks->rl_ranks[i], + ranks->rl_nr * sizeof(ranks->rl_ranks[0])); } return rc; } @@ -1216,22 +1273,70 @@ ds_rsvc_add_replicas(enum ds_rsvc_class_id class, d_iov_t *id, d_rank_list_t *ra } int -ds_rsvc_remove_replicas_s(struct ds_rsvc *svc, d_rank_list_t *ranks) +ds_rsvc_remove_replicas_s(struct ds_rsvc *svc, d_rank_list_t *ranks, bool destroy) { - d_rank_list_t *stop_ranks; - int rc; + d_rank_list_t *stop_ranks; + rdb_replica_id_t *all; + int all_len; + rdb_replica_id_t *to_remove; + int to_remove_len = 0; + int i; + int rc; - rc = daos_rank_list_dup(&stop_ranks, ranks); + rc = d_rank_list_dup(&stop_ranks, ranks); if (rc != 0) - return rc; - rc = rdb_remove_replicas(svc->s_db, ranks); + goto out; + + /* Fill to_remove with replica IDs of ranks. */ + rc = rdb_get_replicas(svc->s_db, &all, &all_len); + if (rc != 0) + goto out_stop_ranks; + D_ALLOC_ARRAY(to_remove, ranks->rl_nr); + if (to_remove == NULL) { + rc = -DER_NOMEM; + goto out_all; + } + for (i = 0; i < ranks->rl_nr; i++) { + d_rank_t rank = ranks->rl_ranks[i]; + int j; + + for (j = 0; j < all_len; j++) { + if (all[j].rri_rank == rank) { + to_remove[to_remove_len] = all[j]; + to_remove_len++; + break; + } + } + if (j == all_len) { + D_ERROR("%s: rank %u not found in replica list\n", svc->s_name, rank); + rc = -DER_NONEXIST; + goto out_to_remove; + } + } + + rc = rdb_modify_replicas(svc->s_db, RDB_REPLICA_REMOVE, to_remove, &to_remove_len); + + /* Update ranks with to_remove (those that couldn't be removed). */ + D_ASSERTF(ranks->rl_nr >= to_remove_len, "%d >= %d\n", ranks->rl_nr, to_remove_len); + ranks->rl_nr = to_remove_len; + for (i = 0; i < to_remove_len; i++) + ranks->rl_ranks[i] = to_remove[i].rri_rank; - /* filter out failed ranks */ - daos_rank_list_filter(ranks, stop_ranks, true /* exclude */); - if (stop_ranks->rl_nr > 0) - ds_rsvc_dist_stop(svc->s_class, &svc->s_id, stop_ranks, NULL, svc->s_term, - true /* destroy */); + if (destroy) { + /* filter out failed ranks */ + d_rank_list_filter(ranks, stop_ranks, true /* exclude */); + if (stop_ranks->rl_nr > 0) + ds_rsvc_dist_stop(svc->s_class, &svc->s_id, stop_ranks, NULL, svc->s_term, + true /* destroy */); + } + +out_to_remove: + D_FREE(to_remove); +out_all: + D_FREE(all); +out_stop_ranks: d_rank_list_free(stop_ranks); +out: return rc; } @@ -1245,7 +1350,7 @@ ds_rsvc_remove_replicas(enum ds_rsvc_class_id class, d_iov_t *id, rc = ds_rsvc_lookup_leader(class, id, &svc, hint); if (rc != 0) return rc; - rc = ds_rsvc_remove_replicas_s(svc, ranks); + rc = ds_rsvc_remove_replicas_s(svc, ranks, true /* destroy */); ds_rsvc_set_hint(svc, hint); ds_rsvc_put_leader(svc); return rc; @@ -1301,22 +1406,22 @@ bcast_create(crt_opcode_t opc, bool filter_invert, d_rank_list_t *filter_ranks, * \param[in] ranks list of replica ranks * \param[in] caller_term caller term if not RDB_NIL_TERM (see rdb_open) * \param[in] mode mode of starting the replicated service - * \param[in] bootstrap create with an initial list of replicas if \a mode is DS_RSVC_CREATE - * \param[in] size size of each replica in bytes if \a mode is DS_RSVC_CREATE - * \param[in] vos_df_version version of VOS durable format if \a mode is DS_RSVC_CREATE + * \param[in] create_params parameters used when \a mode is DS_RSVC_CREATE */ int ds_rsvc_dist_start(enum ds_rsvc_class_id class, d_iov_t *id, const uuid_t dbid, const d_rank_list_t *ranks, uint64_t caller_term, enum ds_rsvc_start_mode mode, - bool bootstrap, size_t size, uint32_t vos_df_version) + struct ds_rsvc_create_params *create_params) { crt_rpc_t *rpc; struct rsvc_start_in *in; struct rsvc_start_out *out; int rc; - D_ASSERT(!bootstrap || ranks != NULL); - D_ASSERT(mode != DS_RSVC_DICTATE || ranks->rl_nr == 1); + D_ASSERT(mode != DS_RSVC_CREATE || + (create_params != NULL && create_params->scp_replicas != NULL && + create_params->scp_replicas_len > 0)); + D_ASSERT(mode != DS_RSVC_DICTATE || (ranks != NULL && ranks->rl_nr == 1)); D_DEBUG(DB_MD, DF_UUID": %s DB\n", DP_UUID(dbid), start_mode_str(mode)); rc = bcast_create(RSVC_START, ranks != NULL /* filter_invert */, @@ -1325,21 +1430,23 @@ ds_rsvc_dist_start(enum ds_rsvc_class_id class, d_iov_t *id, const uuid_t dbid, goto out; in = crt_req_get(rpc); in->sai_class = class; - rc = daos_iov_copy(&in->sai_svc_id, id); - if (rc != 0) - goto out_rpc; + in->sai_svc_id = *id; uuid_copy(in->sai_db_uuid, dbid); in->sai_mode = mode; - if (mode == DS_RSVC_CREATE && bootstrap) - in->sai_flags |= RDB_AF_BOOTSTRAP; - in->sai_size = size; - in->sai_vos_df_version = vos_df_version; in->sai_term = caller_term; - in->sai_ranks = (d_rank_list_t *)ranks; + if (mode == DS_RSVC_CREATE) { + if (create_params->scp_bootstrap) + in->sai_flags |= RDB_AF_BOOTSTRAP; + in->sai_size = create_params->scp_size; + in->sai_vos_df_version = create_params->scp_vos_df_version; + in->sai_layout_version = create_params->scp_layout_version; + in->sai_replicas.ca_arrays = create_params->scp_replicas; + in->sai_replicas.ca_count = create_params->scp_replicas_len; + } rc = dss_rpc_send(rpc); if (rc != 0) - goto out_mem; + goto out_rpc; out = crt_reply_get(rpc); rc = out->sao_rc; @@ -1352,8 +1459,6 @@ ds_rsvc_dist_start(enum ds_rsvc_class_id class, d_iov_t *id, const uuid_t dbid, rc = out->sao_rc_errval; } -out_mem: - daos_iov_free(&in->sai_svc_id); out_rpc: crt_req_decref(rpc); out: @@ -1365,23 +1470,44 @@ ds_rsvc_start_handler(crt_rpc_t *rpc) { struct rsvc_start_in *in = crt_req_get(rpc); struct rsvc_start_out *out = crt_reply_get(rpc); - bool bootstrap = in->sai_flags & RDB_AF_BOOTSTRAP; + struct rdb_create_params create_params; + bool create = in->sai_mode == DS_RSVC_CREATE; int rc; - if (bootstrap && in->sai_ranks == NULL) { - rc = -DER_PROTO; - goto out; - } + if (create) { + d_rank_t self_rank = dss_self_rank(); + rdb_replica_id_t self; + bool bootstrap = in->sai_flags & RDB_AF_BOOTSTRAP; + int i; - if (in->sai_mode == DS_RSVC_DICTATE && - (in->sai_ranks == NULL || in->sai_ranks->rl_nr != 1)) { - rc = -DER_PROTO; - goto out; + if (in->sai_replicas.ca_arrays == NULL || in->sai_replicas.ca_count == 0) { + D_ERROR(DF_UUID ": no replica IDs\n", DP_UUID(in->sai_db_uuid)); + rc = -DER_PROTO; + goto out; + } + + /* Find self replica ID in in->sai_replicas. */ + for (i = 0; i < in->sai_replicas.ca_count; i++) + if (in->sai_replicas.ca_arrays[i].rri_rank == self_rank) + break; + if (i == in->sai_replicas.ca_count) { + D_ERROR(DF_UUID ": self not in replica IDs: self=%u replicas=" DF_U64 "\n", + DP_UUID(in->sai_db_uuid), self_rank, in->sai_replicas.ca_count); + rc = -DER_PROTO; + goto out; + } + self = in->sai_replicas.ca_arrays[i]; + + create_params.rcp_size = in->sai_size; + create_params.rcp_vos_df_version = in->sai_vos_df_version; + create_params.rcp_layout_version = in->sai_layout_version; + create_params.rcp_id = self; + create_params.rcp_replicas = bootstrap ? in->sai_replicas.ca_arrays : NULL; + create_params.rcp_replicas_len = bootstrap ? in->sai_replicas.ca_count : 0; } rc = ds_rsvc_start(in->sai_class, &in->sai_svc_id, in->sai_db_uuid, in->sai_term, - in->sai_mode, in->sai_size, in->sai_vos_df_version, - bootstrap ? in->sai_ranks : NULL, NULL /* arg */); + in->sai_mode, create ? &create_params : NULL, NULL /* arg */); if (rc == -DER_ALREADY) rc = 0; From 92a0cef072684734443f61c18537ca6c9a88cf59 Mon Sep 17 00:00:00 2001 From: Tom Nabarro Date: Fri, 12 Dec 2025 17:11:59 +0000 Subject: [PATCH 079/253] DAOS-18298 control: Increase system_ram_reserved 26 to 64 (#17234) Increase default to handle high-performance configurations. Adjust system_ram_reserved for pool/rf.py and rebuild/basic.py. Signed-off-by: Tom Nabarro --- src/control/cmd/dmg/auto_test.go | 2 +- src/control/server/storage/scm.go | 2 +- src/control/server/storage/scm_test.go | 9 +++++---- src/tests/ftest/pool/rf.yaml | 2 +- src/tests/ftest/rebuild/basic.yaml | 2 +- utils/config/daos_server.yml | 2 +- 6 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/control/cmd/dmg/auto_test.go b/src/control/cmd/dmg/auto_test.go index 578de0d75a6..92bcc9fde86 100644 --- a/src/control/cmd/dmg/auto_test.go +++ b/src/control/cmd/dmg/auto_test.go @@ -592,7 +592,7 @@ disable_vfio: false disable_vmd: false disable_hotplug: false nr_hugepages: 0 -system_ram_reserved: 26 +system_ram_reserved: 64 disable_hugepages: false allow_numa_imbalance: false control_log_mask: INFO diff --git a/src/control/server/storage/scm.go b/src/control/server/storage/scm.go index 593f93b55d4..48c25e7e6da 100644 --- a/src/control/server/storage/scm.go +++ b/src/control/server/storage/scm.go @@ -52,7 +52,7 @@ const ( // Memory reservation constant defaults to be used when calculating RAM-disk size for DAOS I/O engine. const ( - DefaultSysMemRsvd = humanize.GiByte * 26 // per-system + DefaultSysMemRsvd = humanize.GiByte * 64 // per-system DefaultTgtMemRsvd = humanize.MiByte * 128 // per-engine-target DefaultEngineMemRsvd = humanize.GiByte * 1 // per-engine ) diff --git a/src/control/server/storage/scm_test.go b/src/control/server/storage/scm_test.go index b5061148e88..67651214432 100644 --- a/src/control/server/storage/scm_test.go +++ b/src/control/server/storage/scm_test.go @@ -1,5 +1,6 @@ // // (C) Copyright 2023-2024 Intel Corporation. +// (C) Copyright 2025 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -47,20 +48,20 @@ func Test_CalcRamdiskSize(t *testing.T) { expErr: errors.New("insufficient ram"), // 30 - (14+26+1) = -1 }, "default values; high mem": { - memTotal: humanize.GiByte * 70, + memTotal: humanize.GiByte * 108, memHuge: humanize.GiByte * 30, memSys: DefaultSysMemRsvd, tgtCount: 16, engCount: 2, - expSize: humanize.GiByte * 5, // (70 - (30+26+4)) / 2 + expSize: humanize.GiByte * 5, // (108 - (30+64+4)) / 2 }, "default values; low nr targets": { - memTotal: humanize.GiByte * 70, + memTotal: humanize.GiByte * 108, memHuge: humanize.GiByte * 30, memSys: DefaultSysMemRsvd, tgtCount: 1, engCount: 2, - expSize: humanize.GiByte * 6, // (70 - (30+26+2)) / 2 + expSize: humanize.GiByte * 6, // (108 - (30+64+2)) / 2 }, "custom values; low sys reservation": { memTotal: humanize.GiByte * 60, diff --git a/src/tests/ftest/pool/rf.yaml b/src/tests/ftest/pool/rf.yaml index d3b9761d349..0cb7c5bf389 100644 --- a/src/tests/ftest/pool/rf.yaml +++ b/src/tests/ftest/pool/rf.yaml @@ -6,7 +6,7 @@ timeout: 300 server_config: engines_per_host: 1 - system_ram_reserved: 16 + system_ram_reserved: 21 engines: 0: storage: diff --git a/src/tests/ftest/rebuild/basic.yaml b/src/tests/ftest/rebuild/basic.yaml index 80fbb15d9ce..e1dc513a44e 100644 --- a/src/tests/ftest/rebuild/basic.yaml +++ b/src/tests/ftest/rebuild/basic.yaml @@ -19,7 +19,7 @@ server_config: 0: class: ram scm_mount: /mnt/daos - system_ram_reserved: 1 + system_ram_reserved: 6 pool: size: 1G diff --git a/utils/config/daos_server.yml b/utils/config/daos_server.yml index 16c2760bfc1..9777dfb0905 100644 --- a/utils/config/daos_server.yml +++ b/utils/config/daos_server.yml @@ -240,7 +240,7 @@ ## minimum of 4gib. Increasing the value may help avoid the potential of OOM killer terminating ## engine processes but could also result in stopping DAOS from using available memory resources. # -## default: 26 +## default: 64 #system_ram_reserved: 5 # # From 7f3407cda77c4a8025bfeb9104309dd24fc39fe6 Mon Sep 17 00:00:00 2001 From: Tom Nabarro Date: Fri, 12 Dec 2025 17:14:31 +0000 Subject: [PATCH 080/253] DAOS-18247 control: Supply hostlist in system req for per-pool ops (#17177) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit System DMG commands invoke the control API from server-side gRPC handlers to operate across pools. However, the hostlist with MS-replica addresses isn’t being supplied when calling the control API which prevents port number propagation and causes per-pool requests to fail on custom server port number assignments. The fix is to retrieve MS-replica addresses via leader query and supply them in pool-request hostlist. Signed-off-by: Tom Nabarro --- src/control/cmd/dmg/pool_test.go | 2 +- src/control/cmd/dmg/system_test.go | 56 ++- src/control/common/test/mocks.go | 15 +- src/control/server/mgmt_system.go | 29 +- src/control/server/mgmt_system_test.go | 633 +++++++++++++++++++++++-- 5 files changed, 701 insertions(+), 34 deletions(-) diff --git a/src/control/cmd/dmg/pool_test.go b/src/control/cmd/dmg/pool_test.go index 61711bbe700..5eeae387584 100644 --- a/src/control/cmd/dmg/pool_test.go +++ b/src/control/cmd/dmg/pool_test.go @@ -138,7 +138,7 @@ func createACLFile(t *testing.T, dir string, acl *control.AccessControlList) str return test.CreateTestFile(t, dir, control.FormatACLDefault(acl)) } -func TestPoolCommands(t *testing.T) { +func TestDmg_PoolCommands(t *testing.T) { testSizeStr := "512GiB" testSize := 549755813888 eUsr, err := user.Current() diff --git a/src/control/cmd/dmg/system_test.go b/src/control/cmd/dmg/system_test.go index 9332ec38432..dc61b6c7b91 100644 --- a/src/control/cmd/dmg/system_test.go +++ b/src/control/cmd/dmg/system_test.go @@ -635,7 +635,7 @@ func TestDmg_systemRebuildOpCmd_execute(t *testing.T) { resp: &mgmtpb.SystemRebuildManageResp{}, expInfo: "System-rebuild start request succeeded on 0 pools []", }, - "pool stop failed": { + "rebuild stop failed": { ctlCfg: &control.Config{}, opCode: control.PoolRebuildOpCodeStop, resp: &mgmtpb.SystemRebuildManageResp{ @@ -661,7 +661,7 @@ func TestDmg_systemRebuildOpCmd_execute(t *testing.T) { expErr: errors.New("failed on pool foo: failed, pool-rebuild stop failed on pool bar"), expInfo: "System-rebuild stop request succeeded on 1 pool", }, - "pool start succeeded; verbose": { + "rebuild start succeeded; verbose": { ctlCfg: &control.Config{}, opCode: control.PoolRebuildOpCodeStart, verbose: true, @@ -702,6 +702,9 @@ func TestDmg_systemRebuildOpCmd_execute(t *testing.T) { gotErr := rbldCmd.execute(tc.opCode, tc.force) test.CmpErr(t, tc.expErr, gotErr) + // Note this doesn't verify that the text is on an INFO or DEBUG line + // specifically, just that it appears in log output. + if !strings.Contains(buf.String(), tc.expInfo) { t.Fatalf("expected info log output to contain %s, got %s\n", tc.expInfo, buf.String()) @@ -713,3 +716,52 @@ func TestDmg_systemRebuildOpCmd_execute(t *testing.T) { }) } } + +func TestDmg_systemSelfHealEvalCmd_execute(t *testing.T) { + for name, tc := range map[string]struct { + ctlCfg *control.Config + resp *mgmtpb.DaosResp + msErr error + expErr error + expInfo string + }{ + "no config": { + expErr: errors.New("system self-heal eval failed: no configuration loaded"), + }, + "ms failures": { + ctlCfg: &control.Config{}, + msErr: errors.New("failed"), + expErr: errors.New("failed"), + }, + "success": { + ctlCfg: &control.Config{}, + resp: &mgmtpb.DaosResp{}, + expInfo: "System self-heal eval request succeeded", + }, + "daos error": { + ctlCfg: &control.Config{}, + resp: &mgmtpb.DaosResp{ + Status: -1, + }, + expErr: errors.New("DER_UNKNOWN"), + }, + } { + t.Run(name, func(t *testing.T) { + log, buf := logging.NewTestLogger(t.Name()) + defer test.ShowBufferOnFailure(t, buf) + + mi := control.NewMockInvoker(log, &control.MockInvokerConfig{ + UnaryResponse: control.MockMSResponse("10.0.0.1:10001", + tc.msErr, tc.resp), + }) + + cmd := new(systemSelfHealEvalCmd) + cmd.setInvoker(mi) + cmd.SetLog(log) + cmd.setConfig(tc.ctlCfg) + + gotErr := cmd.Execute(nil) + test.CmpErr(t, tc.expErr, gotErr) + }) + } +} diff --git a/src/control/common/test/mocks.go b/src/control/common/test/mocks.go index 10a733dfaa4..ab3bf3e6094 100644 --- a/src/control/common/test/mocks.go +++ b/src/control/common/test/mocks.go @@ -1,5 +1,6 @@ // // (C) Copyright 2020-2022 Intel Corporation. +// (C) Copyright 2025 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -61,7 +62,7 @@ func MockHostAddr(varIdx ...int32) *net.TCPAddr { return hostAddrs[idx] } -// MockPCIAddr returns mock PCIAddr values for use in tests. +// MockPCIAddr returns mock PCIAddr value for use in tests. func MockPCIAddr(varIdx ...int32) string { idx := GetIndex(varIdx...) @@ -94,6 +95,17 @@ func MockVMDPCIAddrs(dom int, idxs ...int) (addrs []string) { return } +// MockTCPAddr returns mock TCPAddr value for use in tests. Create a mock IPv4 address +// (e.g., 127.0.0.1 on port 8080) +func MockTCPAddr(port int, varIdx ...int32) *net.TCPAddr { + idx := GetIndex(varIdx...) + + return &net.TCPAddr{ + IP: net.ParseIP(fmt.Sprintf("127.0.0.%d", idx)), + Port: port, + } +} + // MockWriter is a mock io.Writer that can be used to inject errors and check // values written. type MockWriter struct { @@ -105,6 +117,7 @@ func (w *MockWriter) Write(p []byte) (int, error) { if w.WriteErr != nil { return 0, w.WriteErr } + return w.builder.Write(p) } diff --git a/src/control/server/mgmt_system.go b/src/control/server/mgmt_system.go index c0243810c50..27110136c38 100644 --- a/src/control/server/mgmt_system.go +++ b/src/control/server/mgmt_system.go @@ -1316,6 +1316,11 @@ type poolRanksOpSig func(context.Context, control.UnaryInvoker, *control.PoolRan func (svc *mgmtSvc) getPoolRanksResps(ctx context.Context, sys string, poolIDs []string, poolRanks poolRanksMap, ctlApiCall poolRanksOpSig) ([]*control.PoolRanksResp, error) { resps := []*control.PoolRanksResp{} + _, replicas, err := svc.sysdb.LeaderQuery() + if err != nil { + return nil, err + } + for _, id := range poolIDs { rs := poolRanks[id] if rs.Count() == 0 { @@ -1327,6 +1332,9 @@ func (svc *mgmtSvc) getPoolRanksResps(ctx context.Context, sys string, poolIDs [ Ranks: rs.Ranks(), } req.Sys = sys + // Set request hostlist from leader query as we don't have + // access to the server config from here. + req.SetHostList(replicas) svc.log.Tracef("%T: %+v", req, req) @@ -1432,6 +1440,11 @@ func (svc *mgmtSvc) SystemRebuildManage(ctx context.Context, pbReq *mgmtpb.Syste return &mgmtpb.SystemRebuildManageResp{}, nil // Successful no-op. } + _, replicas, err := svc.sysdb.LeaderQuery() + if err != nil { + return nil, err + } + var results []*control.PoolRebuildManageResult for _, id := range poolIDs { opCode := control.PoolRebuildOpCode(pbReq.OpCode) @@ -1441,6 +1454,10 @@ func (svc *mgmtSvc) SystemRebuildManage(ctx context.Context, pbReq *mgmtpb.Syste OpCode: opCode, Force: pbReq.Force, } + // Set request hostlist from leader query as we don't have + // access to the server config from here. + req.SetHostList(replicas) + svc.log.Tracef("%T: %+v", req, req) result := &control.PoolRebuildManageResult{ @@ -1468,7 +1485,7 @@ func (svc *mgmtSvc) SystemRebuildManage(ctx context.Context, pbReq *mgmtpb.Syste // selfHealExcludeRanks fetches a list of detected dead ranks from the leader's engine and updates // states within the control-plane membership appropriately. func (svc *mgmtSvc) selfHealExcludeRanks(ctx context.Context) error { - // TODO: Pass a real, nonzero map version. + // DAOS-18163 TODO: Pass a real, nonzero map version. req := &mgmtpb.GetGroupStatusReq{} // Fetch dead rank list from leader's engine with group status dRPC call. @@ -1528,12 +1545,21 @@ func (svc *mgmtSvc) selfHealNotifyPSes(ctx context.Context, propVal string) erro return nil // Successful no-op. } + _, replicas, err := svc.sysdb.LeaderQuery() + if err != nil { + return err + } + var successes, failures []string for _, id := range poolIDs { req := &control.PoolSelfHealEvalReq{ ID: id, SysPropVal: propVal, } + // Set request hostlist from leader query as we don't have + // access to the server config from here. + req.SetHostList(replicas) + svc.log.Tracef("%T: %+v", req, req) if err := control.PoolSelfHealEval(ctx, svc.rpcClient, req); err != nil { @@ -1591,7 +1617,6 @@ func (svc *mgmtSvc) SystemSelfHealEval(ctx context.Context, pbReq *mgmtpb.System !daos.SystemPropertySelfHealHasFlag(selfHeal, daos.SysSelfHealFlagPoolExclude) { return new(mgmtpb.DaosResp), nil } - if err := svc.selfHealNotifyPSes(ctx, selfHeal); err != nil { return nil, errors.Wrapf(err, "notify pool services of self_heal=%q", selfHeal) } diff --git a/src/control/server/mgmt_system_test.go b/src/control/server/mgmt_system_test.go index a7d214183f6..46e7266af6d 100644 --- a/src/control/server/mgmt_system_test.go +++ b/src/control/server/mgmt_system_test.go @@ -252,6 +252,26 @@ func stateString(s system.MemberState) string { return strings.ToLower(s.String()) } +func startSysDB(t *testing.T, ctx context.Context, log logging.Logger, replicas []*net.TCPAddr, svc *mgmtSvc) func() { + db, cleanup := raft.TestDatabase(t, log, replicas...) + svc.sysdb = db + + if err := db.Start(ctx); err != nil { + cleanup() + t.Fatal(err) + } + + // wait for the bootstrap to finish + for { + if leader, _, _ := db.LeaderQuery(); leader != "" { + break + } + time.Sleep(250 * time.Millisecond) + } + + return cleanup +} + func TestServer_MgmtSvc_LeaderQuery(t *testing.T) { localhost := common.LocalhostCtrlAddr() @@ -282,22 +302,11 @@ func TestServer_MgmtSvc_LeaderQuery(t *testing.T) { defer test.ShowBufferOnFailure(t, buf) svc := newTestMgmtSvc(t, log) - db, cleanup := raft.TestDatabase(t, log) - defer cleanup() - svc.sysdb = db - ctx := test.Context(t) - if err := db.Start(ctx); err != nil { - t.Fatal(err) - } + replicas := []*net.TCPAddr{common.LocalhostCtrlAddr()} - // wait for the bootstrap to finish - for { - if leader, _, _ := db.LeaderQuery(); leader != "" { - break - } - time.Sleep(250 * time.Millisecond) - } + cleanup := startSysDB(t, ctx, log, replicas, svc) + defer cleanup() gotResp, gotErr := svc.LeaderQuery(test.Context(t), tc.req) test.CmpErr(t, tc.expErr, gotErr) @@ -2325,6 +2334,7 @@ func TestServer_MgmtSvc_SystemDrain(t *testing.T) { useLabels bool pools []string members system.Members + replica *net.TCPAddr drpcResps []*mockDrpcResponse // For dRPC PoolQuery expDrpcCount int mic *control.MockInvokerConfig // For control-API PoolDrain/Reint @@ -2512,14 +2522,17 @@ func TestServer_MgmtSvc_SystemDrain(t *testing.T) { expDrpcCount: 2, expCtlApiCount: 1, }, - "drain multiple ranks on multiple pools": { - req: &mgmtpb.SystemDrainReq{Ranks: "0-3"}, + "drain rank on multiple pools; pool requests contain replica address": { + req: &mgmtpb.SystemDrainReq{ + Ranks: "0-3", + }, members: system.Members{ system.MockMember(t, 1, system.MemberStateJoined), system.MockMember(t, 2, system.MemberStateJoined), system.MockMember(t, 3, system.MemberStateJoined), }, - pools: []string{test.MockUUID(1), test.MockUUID(2)}, + pools: []string{test.MockUUID(1), test.MockUUID(2)}, + replica: test.MockTCPAddr(10003, 5), drpcResps: []*mockDrpcResponse{ &mockDrpcResponse{ Message: &mgmtpb.PoolQueryResp{ @@ -2553,14 +2566,15 @@ func TestServer_MgmtSvc_SystemDrain(t *testing.T) { expDrpcCount: 2, expCtlApiCount: 5, }, - "reintegrate multiple ranks on multiple pools": { + "reintegrate rank on multiple pools; pool requests contain replica address": { req: &mgmtpb.SystemDrainReq{Ranks: "0-3", Reint: true}, members: system.Members{ system.MockMember(t, 1, system.MemberStateJoined), system.MockMember(t, 2, system.MemberStateJoined), system.MockMember(t, 3, system.MemberStateJoined), }, - pools: []string{test.MockUUID(1), test.MockUUID(2)}, + pools: []string{test.MockUUID(1), test.MockUUID(2)}, + replica: test.MockTCPAddr(10003, 5), drpcResps: []*mockDrpcResponse{ &mockDrpcResponse{ Message: &mgmtpb.PoolQueryResp{ @@ -2656,8 +2670,8 @@ func TestServer_MgmtSvc_SystemDrain(t *testing.T) { req: &mgmtpb.SystemDrainReq{ Reint: true, // Resolves to ranks 1-2. - Hosts: fmt.Sprintf("%s,%s", test.MockHostAddr(1), - test.MockHostAddr(2)), + Hosts: fmt.Sprintf("%s,%s", system.MockControlAddr(t, 1), + system.MockControlAddr(t, 2)), }, members: system.Members{ system.MockMember(t, 1, system.MemberStateJoined), @@ -2692,21 +2706,36 @@ func TestServer_MgmtSvc_SystemDrain(t *testing.T) { Id: "00000002", Results: []*sharedpb.RankResult{ {Rank: 1}, + {Rank: 2}, }, }, }, }, expDrpcCount: 2, - expCtlApiCount: 2, + expCtlApiCount: 3, // One per pool-rank }, } { t.Run(name, func(t *testing.T) { log, buf := logging.NewTestLogger(t.Name()) defer test.ShowBufferOnFailure(t, buf) + harness := NewEngineHarness(log) + sp := storage.NewProvider(log, 0, nil, nil, nil, nil, nil) + e := newTestEngine(log, true, sp) + if err := harness.AddInstance(e); err != nil { + t.Fatal(err) + } + harness.started.SetTrue() + ctx := test.MustLogContext(t) - svc := newTestMgmtSvc(t, log) + if tc.replica == nil { + tc.replica = common.LocalhostCtrlAddr() + } + + db := raft.MockDatabaseWithAddr(t, log, tc.replica) + ms := system.NewMembership(log, db) + svc := newMgmtSvc(harness, ms, db, nil, nil) for _, m := range tc.members { if _, err := svc.membership.Add(m); err != nil { t.Fatal(err) @@ -2761,6 +2790,19 @@ func TestServer_MgmtSvc_SystemDrain(t *testing.T) { "dRPC invoke count") test.AssertEqual(t, tc.expCtlApiCount, mi.GetInvokeCount(), "rpc client invoke count") + + if tc.expCtlApiCount > 0 { + for _, sr := range mi.SentReqs { + // Mock database implementation will only return first + // replica so make sure the hostlist sent in pool drain or + // reint requests matches what leader query returns as the + // first replica. + reqSent := sr.(*control.PoolRanksReq) + exp := fmt.Sprintf("%v", tc.replica) + got := fmt.Sprintf("%v", reqSent.HostList[0]) + test.AssertEqual(t, exp, got, "first request host") + } + } }) } } @@ -2770,6 +2812,7 @@ func TestServer_MgmtSvc_SystemRebuildManage(t *testing.T) { req *mgmtpb.SystemRebuildManageReq useLabels bool pools []string + replica *net.TCPAddr mic *control.MockInvokerConfig // For control-API PoolRebuildStart/Stop expCtlApiCount int expErr error @@ -2883,12 +2926,13 @@ func TestServer_MgmtSvc_SystemRebuildManage(t *testing.T) { }, expCtlApiCount: 1, }, - "start pool rebuild results on multiple pools; use label identifiers": { + "start pool rebuild results on multiple pools; use label identifiers; sent to replicas": { req: &mgmtpb.SystemRebuildManageReq{ OpCode: uint32(control.PoolRebuildOpCodeStart), }, useLabels: true, pools: []string{test.MockUUID(3), test.MockUUID(2), test.MockUUID(1)}, + replica: test.MockTCPAddr(10003, 5), mic: &control.MockInvokerConfig{ UnaryResponseSet: []*control.UnaryResponse{ control.MockMSResponse("host1", nil, &mgmtpb.DaosResp{}), @@ -2925,8 +2969,23 @@ func TestServer_MgmtSvc_SystemRebuildManage(t *testing.T) { log, buf := logging.NewTestLogger(t.Name()) defer test.ShowBufferOnFailure(t, buf) + harness := NewEngineHarness(log) + sp := storage.NewProvider(log, 0, nil, nil, nil, nil, nil) + e := newTestEngine(log, true, sp) + if err := harness.AddInstance(e); err != nil { + t.Fatal(err) + } + harness.started.SetTrue() + ctx := test.MustLogContext(t) - svc := newTestMgmtSvc(t, log) + + if tc.replica == nil { + tc.replica = common.LocalhostCtrlAddr() + } + + db := raft.MockDatabaseWithAddr(t, log, tc.replica) + m := system.NewMembership(log, db) + svc := newMgmtSvc(harness, m, db, nil, nil) cfg := new(mockDrpcClientConfig) mdc := newMockDrpcClient(cfg) @@ -2959,10 +3018,14 @@ func TestServer_MgmtSvc_SystemRebuildManage(t *testing.T) { gotResp, gotErr := svc.SystemRebuildManage(ctx, tc.req) test.CmpErr(t, tc.expErr, gotErr) + cmpOpts := []cmp.Option{ + cmpopts.IgnoreUnexported(mgmtpb.SystemRebuildManageResp{}, + mgmtpb.PoolRebuildManageResult{}, control.PoolRebuildManageReq{}), + } + if tc.expErr == nil { - cmpOpts := []cmp.Option{ - cmpopts.IgnoreUnexported(mgmtpb.SystemRebuildManageResp{}, - mgmtpb.PoolRebuildManageResult{}), + if gotResp == nil { + t.Fatal("expected non-nil response") } if diff := cmp.Diff(tc.expResp, gotResp, cmpOpts...); diff != "" { t.Fatalf("unexpected response (-want, +got):\n%s\n", diff) @@ -2971,6 +3034,520 @@ func TestServer_MgmtSvc_SystemRebuildManage(t *testing.T) { test.AssertEqual(t, tc.expCtlApiCount, mi.GetInvokeCount(), "rpc client invoke count") + + if tc.expCtlApiCount > 0 { + for _, sr := range mi.SentReqs { + // Mock database implementation will only return first + // replica so make sure the hostlist sent in pool rebuild + // manage requests matches what leader query returns as the + // first replica. + rbldReqSent := sr.(*control.PoolRebuildManageReq) + exp := fmt.Sprintf("%v", tc.replica) + got := fmt.Sprintf("%v", rbldReqSent.HostList[0]) + test.AssertEqual(t, exp, got, "first request host") + } + } + }) + } +} + +func TestServer_MgmtSvc_getSysSelfHeal(t *testing.T) { + for name, tc := range map[string]struct { + selfHealProp string + propErr error + expSetPropErr error + expResult string + expErr error + }{ + "property set to empty": { + selfHealProp: "", + expResult: daos.DefaultSysSelfHealFlagsStr, + }, + "property set to exclude": { + selfHealProp: "exclude", + expResult: "exclude", + }, + "property set to pool_rebuild": { + selfHealProp: "pool_rebuild", + expResult: "pool_rebuild", + }, + "property set to invalid flag combination": { + selfHealProp: "exclude;pool_rebuild;pool_exclude", + expSetPropErr: errors.New("invalid value"), + }, + "property set to multiple flags": { + selfHealProp: "exclude;pool_exclude;pool_rebuild", + expResult: "exclude;pool_exclude;pool_rebuild", + }, + "property error": { + propErr: errors.New("database error"), + expErr: errors.New("unknown property \"self_heal\""), + }, + } { + t.Run(name, func(t *testing.T) { + log, buf := logging.NewTestLogger(t.Name()) + defer test.ShowBufferOnFailure(t, buf) + + svc := newTestMgmtSvc(t, log) + + if tc.selfHealProp != "" { + gotErr := system.SetUserProperty(svc.sysdb, svc.systemProps, + "self_heal", tc.selfHealProp) + test.CmpErr(t, tc.expSetPropErr, gotErr) + if tc.expSetPropErr != nil { + return + } + } + + if tc.propErr != nil { + // Simulate error by clearing the systemProps + svc.systemProps = nil + } + + gotResult, gotErr := svc.getSysSelfHeal() + + test.CmpErr(t, tc.expErr, gotErr) + if tc.expErr == nil { + test.AssertEqual(t, tc.expResult, gotResult, "self_heal property value") + } + }) + } +} + +func TestServer_MgmtSvc_selfHealExcludeRanks(t *testing.T) { + for name, tc := range map[string]struct { + drpcResp proto.Message + drpcErr error + deadRanks []uint32 + members system.Members + expErr error + expDebug string + }{ + "drpc call fails": { + drpcErr: errors.New("drpc failed"), + expErr: errors.New("drpc failed"), + }, + "drpc returns error status": { + drpcResp: &mgmtpb.GetGroupStatusResp{ + Status: int32(daos.Nonexistent), + }, + expErr: daos.Nonexistent, + }, + "no dead ranks": { + drpcResp: &mgmtpb.GetGroupStatusResp{ + Status: 0, + }, + }, + "one dead rank": { + drpcResp: &mgmtpb.GetGroupStatusResp{ + Status: 0, + DeadRanks: []uint32{1}, + }, + members: system.Members{ + system.NewMember(1, test.MockUUID(1), nil, test.MockHostAddr(), + system.MemberStateJoined), + system.NewMember(2, test.MockUUID(2), nil, test.MockHostAddr(), + system.MemberStateJoined), + }, + expDebug: "do group update", + }, + "multiple dead ranks": { + drpcResp: &mgmtpb.GetGroupStatusResp{ + Status: 0, + DeadRanks: []uint32{1, 2, 3}, + }, + members: system.Members{ + system.NewMember(1, test.MockUUID(1), nil, test.MockHostAddr(), + system.MemberStateJoined), + system.NewMember(2, test.MockUUID(2), nil, test.MockHostAddr(), + system.MemberStateJoined), + system.NewMember(3, test.MockUUID(3), nil, test.MockHostAddr(), + system.MemberStateJoined), + }, + expDebug: "do group update", + }, + "dead rank not in membership": { + drpcResp: &mgmtpb.GetGroupStatusResp{ + Status: 0, + DeadRanks: []uint32{99}, + }, + }, + "dead rank already excluded": { + drpcResp: &mgmtpb.GetGroupStatusResp{ + Status: 0, + DeadRanks: []uint32{1}, + }, + members: system.Members{ + system.NewMember(1, test.MockUUID(1), nil, test.MockHostAddr(), + system.MemberStateExcluded), + system.NewMember(2, test.MockUUID(2), nil, test.MockHostAddr(), + system.MemberStateJoined), + system.NewMember(3, test.MockUUID(3), nil, test.MockHostAddr(), + system.MemberStateJoined), + }, + }, + } { + t.Run(name, func(t *testing.T) { + log, buf := logging.NewTestLogger(t.Name()) + defer test.ShowBufferOnFailure(t, buf) + + ctx := test.MustLogContext(t) + svc := newTestMgmtSvc(t, log) + + // Add members to the database + seenMembers := make(map[ranklist.Rank]system.MemberState) + for _, m := range tc.members { + if _, err := svc.membership.Add(m); err != nil { + t.Fatal(err) + } + seenMembers[m.Rank] = m.State + } + + cfg := new(mockDrpcClientConfig) + rb, _ := proto.Marshal(tc.drpcResp) + cfg.setSendMsgResponse(drpc.Status_SUCCESS, rb, tc.drpcErr) + mdc := newMockDrpcClient(cfg) + setupSvcDrpcClient(svc, 0, mdc) + + gotErr := svc.selfHealExcludeRanks(ctx) + + test.CmpErr(t, tc.expErr, gotErr) + if tc.expErr != nil { + return + } + + // Verify members were marked as dead if expected + if tc.drpcResp != nil { + resp := tc.drpcResp.(*mgmtpb.GetGroupStatusResp) + for _, deadRank := range resp.DeadRanks { + m, err := svc.membership.Get(ranklist.Rank(deadRank)) + if system.IsMemberNotFound(err) { + continue + } + if err != nil { + t.Fatal(err) + } + test.AssertEqual(t, system.MemberStateExcluded, m.State, + fmt.Sprintf("rank %d state", deadRank)) + seenMembers[ranklist.Rank(deadRank)] = system.MemberStateExcluded + } + } + + // Verify members have expected states in the database + for rank, state := range seenMembers { + m, err := svc.membership.Get(ranklist.Rank(rank)) + if err != nil { + t.Fatal(err) + } + test.AssertEqual(t, state, m.State, + fmt.Sprintf("rank %d end state", rank)) + } + + if !strings.Contains(buf.String(), tc.expDebug) { + t.Fatalf("expected debug log output to contain %s, got %s\n", + tc.expDebug, buf.String()) + } + }) + } +} + +func TestServer_MgmtSvc_selfHealNotifyPSes(t *testing.T) { + for name, tc := range map[string]struct { + propVal string + pools []string + replica *net.TCPAddr + mic *control.MockInvokerConfig + expErr error + expCtlApiCount int + }{ + "no pools": { + propVal: "pool_rebuild", + }, + "one pool success": { + propVal: "pool_rebuild", + pools: []string{test.MockUUID(1)}, + mic: &control.MockInvokerConfig{ + UnaryResponseSet: []*control.UnaryResponse{ + control.MockMSResponse("host1", nil, &mgmtpb.DaosResp{}), + }, + }, + expCtlApiCount: 1, + }, + "multiple pools all succeed; replica address sent in pool request": { + propVal: "pool_exclude", + pools: []string{test.MockUUID(1), test.MockUUID(2), test.MockUUID(3)}, + replica: test.MockTCPAddr(10003, 5), + mic: &control.MockInvokerConfig{ + UnaryResponseSet: []*control.UnaryResponse{ + control.MockMSResponse("host1", nil, &mgmtpb.DaosResp{}), + control.MockMSResponse("host1", nil, &mgmtpb.DaosResp{}), + control.MockMSResponse("host1", nil, &mgmtpb.DaosResp{}), + }, + }, + expCtlApiCount: 3, + }, + "one pool fails": { + propVal: "pool_rebuild", + pools: []string{test.MockUUID(1), test.MockUUID(2)}, + mic: &control.MockInvokerConfig{ + UnaryResponseSet: []*control.UnaryResponse{ + control.MockMSResponse("host1", nil, &mgmtpb.DaosResp{}), + control.MockMSResponse("host1", errors.New("pool failed"), nil), + }, + }, + expErr: errors.New("pool self-heal evaluate drpc failed for 1 pool"), + expCtlApiCount: 2, + }, + "multiple pools fail": { + propVal: "pool_exclude", + pools: []string{test.MockUUID(1), test.MockUUID(2), test.MockUUID(3)}, + mic: &control.MockInvokerConfig{ + UnaryResponseSet: []*control.UnaryResponse{ + control.MockMSResponse("host1", errors.New("fail1"), nil), + control.MockMSResponse("host1", nil, &mgmtpb.DaosResp{}), + control.MockMSResponse("host1", errors.New("fail2"), nil), + }, + }, + expErr: errors.New("pool self-heal evaluate drpc failed for 2 pools"), + expCtlApiCount: 3, + }, + "empty propVal with pools": { + propVal: "", + pools: []string{test.MockUUID(1)}, + mic: &control.MockInvokerConfig{ + UnaryResponseSet: []*control.UnaryResponse{ + control.MockMSResponse("host1", nil, &mgmtpb.DaosResp{}), + }, + }, + expCtlApiCount: 1, + }, + } { + t.Run(name, func(t *testing.T) { + log, buf := logging.NewTestLogger(t.Name()) + defer test.ShowBufferOnFailure(t, buf) + + harness := NewEngineHarness(log) + sp := storage.NewProvider(log, 0, nil, nil, nil, nil, nil) + e := newTestEngine(log, true, sp) + if err := harness.AddInstance(e); err != nil { + t.Fatal(err) + } + harness.started.SetTrue() + + ctx := test.MustLogContext(t) + + if tc.replica == nil { + tc.replica = common.LocalhostCtrlAddr() + } + + db := raft.MockDatabaseWithAddr(t, log, tc.replica) + m := system.NewMembership(log, db) + svc := newMgmtSvc(harness, m, db, nil, nil) + + mic := tc.mic + if mic == nil { + mic = control.DefaultMockInvokerConfig() + } + mi := control.NewMockInvoker(log, mic) + svc.rpcClient = mi + + for _, uuidStr := range tc.pools { + addTestPoolService(t, svc.sysdb, &system.PoolService{ + PoolUUID: uuid.MustParse(uuidStr), + State: system.PoolServiceStateReady, + Replicas: []ranklist.Rank{0}, + }) + } + + gotErr := svc.selfHealNotifyPSes(ctx, tc.propVal) + + test.CmpErr(t, tc.expErr, gotErr) + test.AssertEqual(t, tc.expCtlApiCount, mi.GetInvokeCount(), + "rpc client invoke count") + + if tc.expCtlApiCount > 0 { + for _, sr := range mi.SentReqs { + // Mock database implementation will only return first + // replica so make sure the hostlist sent in pool self-heal + // eval requests matches what leader query returns as the + // first replica. + reqSent := sr.(*control.PoolSelfHealEvalReq) + exp := fmt.Sprintf("%v", tc.replica) + got := fmt.Sprintf("%v", reqSent.HostList[0]) + test.AssertEqual(t, exp, got, "first request host") + } + } + }) + } +} + +func TestServer_MgmtSvc_SystemSelfHealEval(t *testing.T) { + for name, tc := range map[string]struct { + req *mgmtpb.SystemSelfHealEvalReq + selfHealProp string + mic *control.MockInvokerConfig // For control-API PoolSelfHealEval + drpcResp proto.Message + drpcErr error + expErr error + noPools bool + noMembers bool + expCtlApiCount int + expGrpUpd bool + }{ + "nil req": { + req: (*mgmtpb.SystemSelfHealEvalReq)(nil), + expErr: errors.New("nil *mgmt.SystemSelfHealEvalReq"), + }, + "not system leader": { + req: &mgmtpb.SystemSelfHealEvalReq{Sys: "quack"}, + expErr: FaultWrongSystem("quack", build.DefaultSystemName), + }, + "exclude flag set; drpc call fails": { + req: &mgmtpb.SystemSelfHealEvalReq{}, + selfHealProp: "exclude", + drpcErr: errors.New("drpc failed"), + expErr: errors.New("excluding ranks based on self_heal.exclude"), + }, + "exclude flag set; no dead ranks": { + req: &mgmtpb.SystemSelfHealEvalReq{}, + selfHealProp: "exclude", + drpcResp: &mgmtpb.GetGroupStatusResp{}, + }, + "exclude flag set; with dead ranks": { + req: &mgmtpb.SystemSelfHealEvalReq{}, + selfHealProp: "exclude", + drpcResp: &mgmtpb.GetGroupStatusResp{ + DeadRanks: []uint32{1, 2}, + }, + expGrpUpd: true, + }, + "pool_rebuild flag set; no pools": { + req: &mgmtpb.SystemSelfHealEvalReq{}, + selfHealProp: "pool_rebuild", + noPools: true, + }, + "pool_rebuild flag set; multiple pool success": { + req: &mgmtpb.SystemSelfHealEvalReq{}, + selfHealProp: "pool_rebuild", + mic: &control.MockInvokerConfig{ + UnaryResponse: control.MockMSResponse("host1", nil, &mgmtpb.DaosResp{}), + }, + expCtlApiCount: 3, + }, + "pool_exclude flag set; multiple pool failures": { + req: &mgmtpb.SystemSelfHealEvalReq{}, + selfHealProp: "pool_exclude", + mic: &control.MockInvokerConfig{ + UnaryResponseSet: []*control.UnaryResponse{ + control.MockMSResponse("host1", nil, &mgmtpb.DaosResp{}), + control.MockMSResponse("host1", errors.New("pool failed"), nil), + control.MockMSResponse("host1", errors.New("pool failed"), nil), + }, + }, + expErr: errors.New("pool self-heal evaluate drpc failed for 2 pools"), + expCtlApiCount: 3, + }, + "pool_rebuild and pool_exclude flags set; multiple pools": { + req: &mgmtpb.SystemSelfHealEvalReq{}, + selfHealProp: "pool_exclude;pool_rebuild", + mic: &control.MockInvokerConfig{ + UnaryResponse: control.MockMSResponse("host1", nil, &mgmtpb.DaosResp{}), + }, + expCtlApiCount: 3, + }, + "all flags set; exclude with dead ranks and pool operations; pool requests sent to replica": { + req: &mgmtpb.SystemSelfHealEvalReq{}, + selfHealProp: "exclude;pool_exclude;pool_rebuild", + drpcResp: &mgmtpb.GetGroupStatusResp{ + DeadRanks: []uint32{0}, + }, + mic: &control.MockInvokerConfig{ + UnaryResponse: control.MockMSResponse("host1", nil, &mgmtpb.DaosResp{}), + }, + expCtlApiCount: 3, + expGrpUpd: true, + }, + } { + t.Run(name, func(t *testing.T) { + log, buf := logging.NewTestLogger(t.Name()) + defer test.ShowBufferOnFailure(t, buf) + + ctx := test.MustLogContext(t) + svc := newTestMgmtSvc(t, log) + + cfg := new(mockDrpcClientConfig) + rb, _ := proto.Marshal(tc.drpcResp) + cfg.setSendMsgResponse(drpc.Status_SUCCESS, rb, tc.drpcErr) + mdc := newMockDrpcClient(cfg) + setupSvcDrpcClient(svc, 0, mdc) + + mic := tc.mic + if mic == nil { + mic = control.DefaultMockInvokerConfig() + } + mi := control.NewMockInvoker(log, mic) + svc.rpcClient = mi + + // Set up system self_heal property + if tc.selfHealProp != "" { + if err := system.SetUserProperty(svc.sysdb, svc.systemProps, + "self_heal", tc.selfHealProp); err != nil { + t.Fatal(err) + } + } + + // Add pool service entries to the system database + if !tc.noPools { + pools := []string{test.MockUUID(1), test.MockUUID(2), test.MockUUID(3)} + for _, uuidStr := range pools { + addTestPoolService(t, svc.sysdb, &system.PoolService{ + PoolUUID: uuid.MustParse(uuidStr), + State: system.PoolServiceStateReady, + Replicas: []ranklist.Rank{0}, + }) + } + } + + // Add members to the system membership + if !tc.noMembers { + members := system.Members{ + system.NewMember(1, test.MockUUID(1), nil, test.MockHostAddr(), + system.MemberStateExcluded), + system.NewMember(2, test.MockUUID(2), nil, test.MockHostAddr(), + system.MemberStateJoined), + system.NewMember(3, test.MockUUID(3), nil, test.MockHostAddr(), + system.MemberStateJoined), + } + for _, m := range members { + if _, err := svc.membership.Add(m); err != nil { + t.Fatal(err) + } + } + } + + if tc.req != nil && tc.req.Sys == "" { + tc.req.Sys = build.DefaultSystemName + } + + gotResp, gotErr := svc.SystemSelfHealEval(ctx, tc.req) + test.CmpErr(t, tc.expErr, gotErr) + if tc.expErr == nil { + if gotResp == nil { + t.Fatal("expected non-nil response") + } + cmpOpts := []cmp.Option{ + cmpopts.IgnoreUnexported(mgmtpb.DaosResp{}), + } + if diff := cmp.Diff(&mgmtpb.DaosResp{}, gotResp, cmpOpts...); diff != "" { + t.Fatalf("unexpected response (-want, +got):\n%s\n", diff) + } + } + + test.AssertEqual(t, tc.expCtlApiCount, mi.GetInvokeCount(), + "rpc client invoke count") + + didGrpUpd := strings.Contains(buf.String(), "do group update") + test.AssertEqual(t, tc.expGrpUpd, didGrpUpd, "group update performed") }) } } From fce99752eb98b85500bea267c9b656bd34821765 Mon Sep 17 00:00:00 2001 From: Makito Kano Date: Mon, 15 Dec 2025 23:18:59 +0900 Subject: [PATCH 081/253] DAOS-18293 test: container/query_properties.yaml - Remove (1) from layout_type (#17270) The expected layout_type has been updated from POSIX (1) to POSIX by PR17040. The PR removed the layout number (1) because the value returned by get-prop needs to be able to feed into set-prop, but it wouldn't work if the number is there. Update the test to follow this change. Signed-off-by: Makito Kano --- src/tests/ftest/container/query_properties.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tests/ftest/container/query_properties.yaml b/src/tests/ftest/container/query_properties.yaml index 22cb146db98..f84193026a4 100644 --- a/src/tests/ftest/container/query_properties.yaml +++ b/src/tests/ftest/container/query_properties.yaml @@ -25,7 +25,7 @@ container: properties: cksum:crc16,cksum_size:16384,srv_cksum:on expected_get_prop: - layout_type: "POSIX (1)" + layout_type: "POSIX" cksum: "crc16" cksum_size: 16384 srv_cksum: "on" From 2ff1d21f9acc7c0739d23f085b83da59826493f6 Mon Sep 17 00:00:00 2001 From: Tomasz Gromadzki Date: Mon, 15 Dec 2025 20:57:47 +0100 Subject: [PATCH 082/253] SRE-3458 build: Use explicit minor version number for build (el 9.6, leap 15.5) (#17152) Use explicit version number for build (el 9.6 and leap 15.5) OS version can be fully controlled now from Jenkins file. It is not longer decided by the docker host configuration. Signed-off-by: Tomasz Gromadzki --- Jenkinsfile | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index cf92efd1610..756892b901d 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -574,7 +574,7 @@ pipeline { } } } - stage('Build on EL 9') { + stage('Build on EL 9.6') { when { beforeAgent true expression { !skip_build_stage('el9') } @@ -589,7 +589,9 @@ pipeline { " -t ${sanitized_JOB_NAME()}-el9 " + ' --build-arg DAOS_PACKAGES_BUILD=no ' + ' --build-arg DAOS_KEEP_SRC=yes ' + - ' --build-arg REPOS="' + prRepos() + '"' + ' --build-arg REPOS="' + prRepos() + '"' + + ' --build-arg POINT_RELEASE=.6 ' + } } steps { @@ -640,7 +642,9 @@ pipeline { deps_build: false) + ' --build-arg DAOS_PACKAGES_BUILD=no ' + ' --build-arg DAOS_KEEP_SRC=yes ' + - " -t ${sanitized_JOB_NAME()}-leap15-gcc" + " -t ${sanitized_JOB_NAME()}-leap15" + + ' --build-arg POINT_RELEASE=.5 ' + } } steps { @@ -687,9 +691,11 @@ pipeline { additionalBuildArgs dockerBuildArgs(repo_type: 'stable', parallel_build: true, deps_build: true) + - " -t ${sanitized_JOB_NAME()}-leap15" + + " -t ${sanitized_JOB_NAME()}-leap15-icc" + ' --build-arg DAOS_PACKAGES_BUILD=no ' + - ' --build-arg COMPILER=icc' + ' --build-arg COMPILER=icc' + + ' --build-arg POINT_RELEASE=.5 ' + } } steps { From 9b9feb9143b76b65ca4bf9caf2998e76a973cd8b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 16 Dec 2025 07:53:16 -0800 Subject: [PATCH 083/253] DAOS-18351 cq: bump GHA versions (#17275) Updates `actions/upload-artifact` from 5.0.0 to 6.0.0 Updates `github/codeql-action` from 4.31.6 to 4.31.8 Signed-off-by: dependabot[bot] --- .github/workflows/bullseye-coverage.yml | 8 ++++---- .github/workflows/linting.yml | 4 ++-- .github/workflows/ossf-scorecard.yml | 4 ++-- .github/workflows/rpm-build-and-test.yml | 8 ++++---- .github/workflows/trivy.yml | 4 ++-- 5 files changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/workflows/bullseye-coverage.yml b/.github/workflows/bullseye-coverage.yml index 942b34ae01b..d34c51b0a5e 100644 --- a/.github/workflows/bullseye-coverage.yml +++ b/.github/workflows/bullseye-coverage.yml @@ -374,14 +374,14 @@ jobs: - name: Publish artifacts if: (!cancelled()) && (success() || failure()) && steps.run-test.outcome != 'skipped' - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 with: name: ${{ env.STAGE_NAME }} artifacts path: ${{ env.STAGE_NAME }}/** - name: Upload test results if: (success() || failure()) && steps.run-test.outcome != 'skipped' - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 with: name: ${{ env.STAGE_NAME }} test-results path: ${{ env.STAGE_NAME }}/**/results.xml @@ -642,14 +642,14 @@ jobs: - name: Publish artifacts if: (!cancelled()) && (success() || failure()) && steps.run-test.outcome != 'skipped' - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 with: name: ${{ env.STAGE_NAME }} artifacts path: ${{ env.STAGE_NAME }}/** - name: Upload test results if: (success() || failure()) && steps.run-test.outcome != 'skipped' - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 with: name: ${{ env.STAGE_NAME }} test-results path: ${{ env.STAGE_NAME }}/**/results.xml diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 9f11ab28228..624700dce63 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -129,7 +129,7 @@ jobs: - name: Run check run: doxygen Doxyfile - name: 'Upload Artifact' - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 with: name: API Documentation path: docs/doxygen/html/ @@ -191,7 +191,7 @@ jobs: with: target: ${{ steps.get_merge_base.outputs.ref }} - name: Export changes - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 if: failure() with: name: format-patch-for-pr-${{ github.event.pull_request.number }} diff --git a/.github/workflows/ossf-scorecard.yml b/.github/workflows/ossf-scorecard.yml index 72451b65db6..1f28cc096f3 100644 --- a/.github/workflows/ossf-scorecard.yml +++ b/.github/workflows/ossf-scorecard.yml @@ -62,7 +62,7 @@ jobs: # uploads of run results in SARIF # format to the repository Actions tab. - name: "Upload artifact" - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 with: name: SARIF file path: results.sarif @@ -71,6 +71,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard (optional). # Commenting out will disable upload of results to your repo's Code Scanning dashboard - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@fe4161a26a8629af62121b670040955b330f9af2 # v4.31.6 + uses: github/codeql-action/upload-sarif@1b168cd39490f61582a9beae412bb7057a6b2c4e # v4.31.8 with: sarif_file: results.sarif diff --git a/.github/workflows/rpm-build-and-test.yml b/.github/workflows/rpm-build-and-test.yml index f83e45e65ce..56c6c61c85a 100644 --- a/.github/workflows/rpm-build-and-test.yml +++ b/.github/workflows/rpm-build-and-test.yml @@ -383,14 +383,14 @@ jobs: - name: Publish artifacts if: (!cancelled()) && (success() || failure()) && steps.run-test.outcome != 'skipped' - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 with: name: ${{ env.STAGE_NAME }} artifacts path: ${{ env.STAGE_NAME }}/** - name: Upload test results if: (success() || failure()) && steps.run-test.outcome != 'skipped' - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 with: name: ${{ env.STAGE_NAME }} test-results path: ${{ env.STAGE_NAME }}/**/results.xml @@ -651,14 +651,14 @@ jobs: - name: Publish artifacts if: (!cancelled()) && (success() || failure()) && steps.run-test.outcome != 'skipped' - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 with: name: ${{ env.STAGE_NAME }} artifacts path: ${{ env.STAGE_NAME }}/** - name: Upload test results if: (success() || failure()) && steps.run-test.outcome != 'skipped' - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 with: name: ${{ env.STAGE_NAME }} test-results path: ${{ env.STAGE_NAME }}/**/results.xml diff --git a/.github/workflows/trivy.yml b/.github/workflows/trivy.yml index 8e264d4b923..df7b7fa0437 100644 --- a/.github/workflows/trivy.yml +++ b/.github/workflows/trivy.yml @@ -49,7 +49,7 @@ jobs: cp utils/trivy/.trivyignore report/trivyignore.txt - name: Upload the report to the GitHub artifact store - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 with: path: report/* name: trivy-report-daos @@ -68,7 +68,7 @@ jobs: trivy-config: 'utils/trivy/trivy.yaml' - name: Upload Trivy scan results to GitHub Security tab - uses: github/codeql-action/upload-sarif@fe4161a26a8629af62121b670040955b330f9af2 # v4.31.6 + uses: github/codeql-action/upload-sarif@1b168cd39490f61582a9beae412bb7057a6b2c4e # v4.31.8 with: sarif_file: 'trivy-results.sarif' From a87fd3f70454ad3778e52682147acd2d2e9f0d80 Mon Sep 17 00:00:00 2001 From: Dalton Bohning Date: Tue, 16 Dec 2025 10:31:08 -0800 Subject: [PATCH 084/253] DAOS-623 cq: improving linting rollup (#17277) Improving linting rollup by printing which job failed. Signed-off-by: Dalton Bohning --- .github/workflows/linting.yml | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 624700dce63..5d4442e0ab4 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -247,9 +247,14 @@ jobs: steps: - name: Check if any job failed run: | - if [[ -z "$(echo "${{ join(needs.*.result, '') }}" | sed -e 's/success//g')" ]]; then - echo "All jobs succeeded" - else - echo "One or more jobs did not succeed" - exit 1 - fi + ALL_DEPS_RESULT='${{ toJSON(needs) }}' + echo "$ALL_DEPS_RESULT" | jq -rc 'keys[] as $k | "\($k):\(.[$k].result)"' \ + | while read job_result; do + job_name=$(echo "$job_result" | cut -d: -f1) + job_result=$(echo "$job_result" | cut -d: -f2) + echo "$job_name = $job_result" + if [[ "$job_result" != "success" ]]; then + echo "Job $job_name failed" + exit 1 + fi + done From f066e3ca60ad4d397677aa149a64998709e57ae4 Mon Sep 17 00:00:00 2001 From: Makito Kano Date: Wed, 17 Dec 2025 03:35:31 +0900 Subject: [PATCH 085/253] DAOS-18251 test: Increase timeout for recovery/cat_recov_core.yaml (#17271) The reported timeout failure was at build 175. Between 175 and the most recent build, 195, there were two timeouts. Most of the passed tests took around 5,500 sec, which is close to the 5,700 sec timeout. The test executes many check-related operations during the ~90 min test period. By looking at build 175 result, the test timed out near the end (the remaining steps were to start system and disconnect from pool), which means there was no single operation that took unusually long. Thus, I don't believe there is anything we need to look into regarding the checker feature. Just increase the timeout. Signed-off-by: Makito Kano --- src/tests/ftest/recovery/cat_recov_core.yaml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/tests/ftest/recovery/cat_recov_core.yaml b/src/tests/ftest/recovery/cat_recov_core.yaml index 23200a8b403..0e148ae1f2c 100644 --- a/src/tests/ftest/recovery/cat_recov_core.yaml +++ b/src/tests/ftest/recovery/cat_recov_core.yaml @@ -1,6 +1,8 @@ hosts: test_servers: 4 -timeout: 5700 + +timeout: 1H40M + server_config: name: daos_server engines_per_host: 2 @@ -20,6 +22,7 @@ server_config: - FI_LOG_LEVEL=warn - D_LOG_STDERR_IN_LOG=1 storage: auto + 1: pinned_numa_node: 1 nr_xs_helpers: 0 @@ -35,15 +38,19 @@ server_config: - FI_LOG_LEVEL=warn - D_LOG_STDERR_IN_LOG=1 storage: auto + transport_config: allow_insecure: true system_ram_reserved: 64 + agent_config: transport_config: allow_insecure: true + dmg: transport_config: allow_insecure: true + daos_tests: num_clients: test_daos_cat_recov_core: 1 From 07803292d9a51db2ea70c366a23d9c307e4a45d7 Mon Sep 17 00:00:00 2001 From: Kris Jacque Date: Tue, 16 Dec 2025 14:23:29 -0700 Subject: [PATCH 086/253] DAOS-17693 control: Preserve metadata device label (#17279) In MD on SSD mode, if a device is being used for the metadata, preserve the original device label (if any) during format. Signed-off-by: Kris Jacque --- src/control/provider/system/mocks.go | 64 +++++++++------ src/control/provider/system/system_linux.go | 28 +++++++ .../provider/system/system_linux_test.go | 80 ++++++++++++++++++- .../server/storage/metadata/provider.go | 15 ++++ .../server/storage/metadata/provider_test.go | 63 +++++++++++---- 5 files changed, 209 insertions(+), 41 deletions(-) diff --git a/src/control/provider/system/mocks.go b/src/control/provider/system/mocks.go index dc52d7b3b96..52384054f8a 100644 --- a/src/control/provider/system/mocks.go +++ b/src/control/provider/system/mocks.go @@ -1,5 +1,6 @@ // // (C) Copyright 2022-2024 Intel Corporation. +// (C) Copyright 2025 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -32,31 +33,33 @@ type ( // MockSysConfig alters mock SystemProvider behavior. MockSysConfig struct { - IsMountedBool bool - IsMountedErr error - MountErr error - UnmountErr error - MkfsErr error - ChmodErr error - ChownErr error - GetfsStr string - GetfsErr error - SourceToTarget map[string]string - GetfsIndex int - GetfsUsageResps []GetfsUsageRetval - GetfsTypeRes *FsType - GetfsTypeErr []error - StatErrors map[string]error - RealStat bool - ReadFileResults map[string][]byte - ReadFileErrors map[string]error - RealReadFile bool - GeteuidRes int - GetegidRes int - MkdirErr error - RealMkdir bool - RemoveAllErr error - RealRemoveAll bool + IsMountedBool bool + IsMountedErr error + MountErr error + UnmountErr error + MkfsErr error + ChmodErr error + ChownErr error + GetfsStr string + GetfsErr error + SourceToTarget map[string]string + GetfsIndex int + GetfsUsageResps []GetfsUsageRetval + GetfsTypeRes *FsType + GetfsTypeErr []error + GetDeviceLabelRes string + GetDeviceLabelErr error + StatErrors map[string]error + RealStat bool + ReadFileResults map[string][]byte + ReadFileErrors map[string]error + RealReadFile bool + GeteuidRes int + GetegidRes int + MkdirErr error + RealMkdir bool + RemoveAllErr error + RealRemoveAll bool } // MockSysProvider gives a mock SystemProvider implementation. @@ -67,6 +70,7 @@ type ( isMounted MountMap IsMountedInputs []string GetfsTypeCount int + MkfsReqs []MkfsReq } ) @@ -146,7 +150,10 @@ func (msp *MockSysProvider) Unmount(target string, _ int) error { return msp.cfg.UnmountErr } -func (msp *MockSysProvider) Mkfs(_ MkfsReq) error { +func (msp *MockSysProvider) Mkfs(in MkfsReq) error { + msp.Lock() + msp.MkfsReqs = append(msp.MkfsReqs, in) + msp.Unlock() return msp.cfg.MkfsErr } @@ -187,6 +194,10 @@ func (msp *MockSysProvider) GetfsType(path string) (*FsType, error) { return result, err } +func (msp *MockSysProvider) GetDeviceLabel(device string) (string, error) { + return msp.cfg.GetDeviceLabelRes, msp.cfg.GetDeviceLabelErr +} + func (msp *MockSysProvider) Stat(path string) (os.FileInfo, error) { msp.RLock() defer msp.RUnlock() @@ -257,6 +268,7 @@ func NewMockSysProvider(log logging.Logger, cfg *MockSysConfig) *MockSysProvider isMounted: MountMap{ mounted: make(map[string]string), }, + MkfsReqs: make([]MkfsReq, 0), } log.Debugf("creating MockSysProvider with cfg: %+v", msp.cfg) return msp diff --git a/src/control/provider/system/system_linux.go b/src/control/provider/system/system_linux.go index e3fb439c0d8..e27066e4215 100644 --- a/src/control/provider/system/system_linux.go +++ b/src/control/provider/system/system_linux.go @@ -1,5 +1,6 @@ // // (C) Copyright 2019-2024 Intel Corporation. +// (C) Copyright 2025 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -284,6 +285,33 @@ func (s LinuxProvider) Mkfs(req MkfsReq) error { return nil } +// GetDeviceLabel retrieves the filesystem label for the specified device. +func (s LinuxProvider) GetDeviceLabel(device string) (string, error) { + if device == "" { + return "", errors.New("empty path") + } + + cmdPath, err := exec.LookPath("lsblk") + if err != nil { + return "", errors.Wrap(err, "unable to find lsblk") + } + + if err := s.checkDevice(device); err != nil { + return "", err + } + + args := []string{"-o", "label", "--noheadings", device} + out, err := exec.Command(cmdPath, args...).Output() + if err != nil { + return "", &RunCmdError{ + Wrapped: err, + Stdout: string(out), + } + } + + return strings.TrimSpace(string(out)), nil +} + // Getfs probes the specified device in an attempt to determine the // formatted filesystem type, if any. func (s LinuxProvider) Getfs(device string) (string, error) { diff --git a/src/control/provider/system/system_linux_test.go b/src/control/provider/system/system_linux_test.go index dc9e6b21a04..e658b6d5bcf 100644 --- a/src/control/provider/system/system_linux_test.go +++ b/src/control/provider/system/system_linux_test.go @@ -1,5 +1,6 @@ // // (C) Copyright 2019-2024 Intel Corporation. +// (C) Copyright 2025 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -8,6 +9,8 @@ package system import ( "errors" + "os" + "regexp" "strings" "syscall" "testing" @@ -72,6 +75,14 @@ func TestScanMountInfo(t *testing.T) { func TestIsMounted(t *testing.T) { provider := LinuxProvider{} + tmpDir, cleanup := test.CreateTestDir(t) + defer cleanup() + + testFilePath := tmpDir + "/testfile" + if err := os.WriteFile(testFilePath, []byte("test"), 0644); err != nil { + t.Fatalf("unable to create test file %q: %v", testFilePath, err) + } + for name, tc := range map[string]struct { target string expMounted bool @@ -97,7 +108,7 @@ func TestIsMounted(t *testing.T) { expErr: errors.New("no such file or directory"), }, "neither dir nor device": { - target: "/dev/stderr", + target: testFilePath, expErr: errors.New("not a valid mount target"), }, } { @@ -191,6 +202,73 @@ func TestSystemLinux_GetfsType(t *testing.T) { } } +func TestSystemLinux_GetDeviceLabel(t *testing.T) { + validDev := func(t *testing.T) string { + t.Helper() + + // Only want numbered partitions, not whole disks + re := regexp.MustCompile(`^[a-zA-Z]+[0-9]+$`) + + sysRoot := "/sys/class/block/" + entries, err := os.ReadDir(sysRoot) + if err != nil { + t.Fatalf("unable to read %q: %v", sysRoot, err) + } + + for _, entry := range entries { + if !re.MatchString(entry.Name()) { + continue + } + + devPath := "/dev/" + entry.Name() + info, err := os.Stat(devPath) + if err != nil { + continue + } + if (info.Mode()&os.ModeDevice) != 0 && (info.Mode()&os.ModeCharDevice) == 0 { + t.Logf("using block device %q for test", devPath) + return devPath + } + } + + t.Fatal("no valid block device found for test") + return "" + } + + for name, tc := range map[string]struct { + path string + expErr error + }{ + "no path": { + expErr: errors.New("empty path"), + }, + "nonexistent": { + path: "fake", + expErr: syscall.ENOENT, + }, + "not a device": { + path: "/tmp", + expErr: errors.New("not a device file"), + }, + "valid block device": { + path: validDev(t), + }, + } { + t.Run(name, func(t *testing.T) { + result, err := DefaultProvider().GetDeviceLabel(tc.path) + + test.CmpErr(t, tc.expErr, err) + + if tc.expErr != nil { + test.AssertEqual(t, "", result, "") + } else { + // We can't predict the label since it's system dependent. It might even be empty. + t.Logf("got label %q", result) + } + }) + } +} + func TestSystemLinux_fsStrFromMagic(t *testing.T) { for name, tc := range map[string]struct { magic int64 diff --git a/src/control/server/storage/metadata/provider.go b/src/control/server/storage/metadata/provider.go index ba3b3110ee8..bf7c86a0b6d 100644 --- a/src/control/server/storage/metadata/provider.go +++ b/src/control/server/storage/metadata/provider.go @@ -1,5 +1,6 @@ // // (C) Copyright 2022-2024 Intel Corporation. +// (C) Copyright 2025 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -27,6 +28,7 @@ type ( Chown(string, int, int) error Getfs(device string) (string, error) GetfsType(path string) (*system.FsType, error) + GetDeviceLabel(string) (string, error) Mkdir(string, os.FileMode) error Mkfs(req system.MkfsReq) error RemoveAll(string) error @@ -99,10 +101,23 @@ func (p *Provider) setupMountPoint(req storage.MetadataFormatRequest) error { return errors.Wrap(err, "creating control metadata mount point") } + p.log.Debugf("checking existing device label for %q", req.Device) + label, err := p.sys.GetDeviceLabel(req.Device) + if err != nil { + return errors.Wrap(err, "checking existing device label") + } + + var opts []string + if label != "" { + p.log.Debugf("preserving existing device label %q for %q", label, req.Device) + opts = append(opts, "-L", label) + } + p.log.Debugf("formatting device %q", req.Device) if err := p.sys.Mkfs(system.MkfsReq{ Filesystem: defaultDevFS, Device: req.Device, + Options: opts, Force: true, }); err != nil { return errors.Wrap(err, "formatting control metadata device filesystem") diff --git a/src/control/server/storage/metadata/provider_test.go b/src/control/server/storage/metadata/provider_test.go index a3262f78f41..f9c6f05c5b2 100644 --- a/src/control/server/storage/metadata/provider_test.go +++ b/src/control/server/storage/metadata/provider_test.go @@ -1,5 +1,6 @@ // // (C) Copyright 2022-2024 Intel Corporation. +// (C) Copyright 2025 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -38,12 +39,14 @@ func TestMetadata_Provider_Format(t *testing.T) { } for name, tc := range map[string]struct { - nilProv bool - sysCfg *system.MockSysConfig - mountCfg *storage.MockMountProviderConfig - setup func(*testing.T, string) func() - req storage.MetadataFormatRequest - expErr error + nilProv bool + sysCfg *system.MockSysConfig + mountCfg *storage.MockMountProviderConfig + setup func(*testing.T, string) func() + req storage.MetadataFormatRequest + expErr error + expMkfs bool + expMkfsOpts []string }{ "nil provider": { nilProv: true, @@ -107,6 +110,7 @@ func TestMetadata_Provider_Format(t *testing.T) { sysCfg: &system.MockSysConfig{ GetfsTypeErr: []error{errors.New("mock GetfsType")}, }, + expMkfs: true, }, "GetfsType retries with parent if dir doesn't exist": { req: pathReq, @@ -129,19 +133,28 @@ func TestMetadata_Provider_Format(t *testing.T) { }, expErr: errors.New("mock MakeMountPath"), }, + "get label fails": { + req: deviceReq, + sysCfg: &system.MockSysConfig{ + GetDeviceLabelErr: errors.New("mock GetDeviceLabel"), + }, + expErr: errors.New("mock GetDeviceLabel"), + }, "mkfs fails": { req: deviceReq, sysCfg: &system.MockSysConfig{ MkfsErr: errors.New("mock mkfs"), }, - expErr: errors.New("mock mkfs"), + expErr: errors.New("mock mkfs"), + expMkfs: true, }, "Mount fails": { req: deviceReq, mountCfg: &storage.MockMountProviderConfig{ MountErr: errors.New("mock Mount"), }, - expErr: errors.New("mock Mount"), + expErr: errors.New("mock Mount"), + expMkfs: true, }, "remove old data dir fails": { req: deviceReq, @@ -159,7 +172,8 @@ func TestMetadata_Provider_Format(t *testing.T) { } } }, - expErr: errors.New("removing old control metadata subdirectory"), + expErr: errors.New("removing old control metadata subdirectory"), + expMkfs: true, }, "create data dir fails": { req: deviceReq, @@ -177,14 +191,16 @@ func TestMetadata_Provider_Format(t *testing.T) { } } }, - expErr: errors.New("creating control metadata subdirectory"), + expErr: errors.New("creating control metadata subdirectory"), + expMkfs: true, }, "chown data dir fails": { req: deviceReq, sysCfg: &system.MockSysConfig{ ChownErr: errors.New("mock chown"), }, - expErr: errors.New("mock chown"), + expErr: errors.New("mock chown"), + expMkfs: true, }, "Unmount fails": { req: deviceReq, @@ -192,10 +208,20 @@ func TestMetadata_Provider_Format(t *testing.T) { IsMountedRes: true, UnmountErr: errors.New("mock Unmount"), }, - expErr: errors.New("mock Unmount"), + expErr: errors.New("mock Unmount"), + expMkfs: true, }, "device success": { + req: deviceReq, + expMkfs: true, + }, + "preserve existing label": { req: deviceReq, + sysCfg: &system.MockSysConfig{ + GetDeviceLabelRes: "old_label", + }, + expMkfsOpts: []string{"-L", "old_label"}, + expMkfs: true, }, "path only doesn't attempt device format": { req: pathReq, @@ -244,14 +270,23 @@ func TestMetadata_Provider_Format(t *testing.T) { defer teardown() var p *Provider + mockSys := system.NewMockSysProvider(log, tc.sysCfg) if !tc.nilProv { - p = NewProvider(log, system.NewMockSysProvider(log, tc.sysCfg), - storage.NewMockMountProvider(tc.mountCfg)) + p = NewProvider(log, mockSys, storage.NewMockMountProvider(tc.mountCfg)) } err := p.Format(tc.req) test.CmpErr(t, tc.expErr, err) + + if tc.expMkfs { + test.AssertEqual(t, 1, len(mockSys.MkfsReqs), "should have called mkfs") + if diff := cmp.Diff(tc.expMkfsOpts, mockSys.MkfsReqs[0].Options); diff != "" { + t.Errorf("unexpected mkfs options (-want +got):\n%s\n", diff) + } + } else { + test.AssertEqual(t, 0, len(mockSys.MkfsReqs), "should not have called mkfs") + } }) } } From 7abad43d2775f7306ff33e6dc84fbcb18447cb41 Mon Sep 17 00:00:00 2001 From: Ken Cain Date: Tue, 16 Dec 2025 19:31:52 -0500 Subject: [PATCH 087/253] DAOS-17358 test: add int_rebuild_dkeys_stop_failing() (#17220) Using the interactive rebuild feature, stop a repeatedly-failing rebuild (triggered by a rank targets exclusion). Use command dmg system stop --force while the failing rebuild is in its Fail_reclaim stage. New common test functions are added to support waiting for the pool rebuild state rs_version to be below a given version. Signed-off-by: Kenneth Cain --- src/tests/ftest/daos_test/suite.yaml | 2 +- src/tests/suite/daos_rebuild_interactive.c | 127 +++++++++++++++++---- src/tests/suite/daos_test.h | 2 + src/tests/suite/daos_test_common.c | 61 +++++++++- 4 files changed, 166 insertions(+), 26 deletions(-) diff --git a/src/tests/ftest/daos_test/suite.yaml b/src/tests/ftest/daos_test/suite.yaml index 23e8bf7297b..7fb851e4e56 100644 --- a/src/tests/ftest/daos_test/suite.yaml +++ b/src/tests/ftest/daos_test/suite.yaml @@ -25,7 +25,7 @@ timeouts: test_daos_rebuild_simple: 1800 test_daos_drain_simple: 3720 test_daos_extend_simple: 3600 - test_daos_rebuild_interactive: 1020 + test_daos_rebuild_interactive: 1185 test_daos_oid_allocator: 640 test_daos_checksum: 500 test_daos_rebuild_ec: 9000 diff --git a/src/tests/suite/daos_rebuild_interactive.c b/src/tests/suite/daos_rebuild_interactive.c index b74a875549a..ea4dc200ffb 100644 --- a/src/tests/suite/daos_rebuild_interactive.c +++ b/src/tests/suite/daos_rebuild_interactive.c @@ -22,7 +22,9 @@ #define DEFAULT_FAIL_TGT 0 #define DRAIN_KEY_NR 50 +#define KEY_NR 10 #define OBJ_NR 10 +#define DATA_SIZE (1048576 * 2 + 512) static void reintegrate_with_inflight_io(test_arg_t *arg, daos_obj_id_t *oid, d_rank_t rank, int tgt) @@ -52,8 +54,7 @@ reintegrate_with_inflight_io(test_arg_t *arg, daos_obj_id_t *oid, d_rank_t rank, int rc; rc = daos_obj_verify(arg->coh, inflight_oid, DAOS_EPOCH_MAX); - if (rc != 0) - assert_rc_equal(rc, -DER_NOSYS); + assert_rc_equal(rc, 0); } } @@ -109,23 +110,19 @@ int_rebuild_snap_update_recs(void **state) for (i = 0; i < SNAP_CNT; i++) { rc = daos_obj_verify(arg->coh, oid, snap_epoch[i]); - if (rc != 0) - assert_rc_equal(rc, -DER_NOSYS); + assert_rc_equal(rc, 0); } rc = daos_obj_verify(arg->coh, oid, DAOS_EPOCH_MAX); - if (rc != 0) - assert_rc_equal(rc, -DER_NOSYS); + assert_rc_equal(rc, 0); arg->interactive_rebuild = 0; reintegrate_with_inflight_io(arg, &oid, ranks_to_kill[0], tgt); for (i = 0; i < SNAP_CNT; i++) { rc = daos_obj_verify(arg->coh, oid, snap_epoch[i]); - if (rc != 0) - assert_rc_equal(rc, -DER_NOSYS); + assert_rc_equal(rc, 0); } rc = daos_obj_verify(arg->coh, oid, DAOS_EPOCH_MAX); - if (rc != 0) - assert_rc_equal(rc, -DER_NOSYS); + assert_rc_equal(rc, 0); T_END(); } @@ -170,24 +167,20 @@ int_rebuild_snap_punch_recs(void **state) for (i = 0; i < SNAP_CNT; i++) { rc = daos_obj_verify(arg->coh, oid, snap_epoch[i]); - if (rc != 0) - assert_rc_equal(rc, -DER_NOSYS); + assert_rc_equal(rc, 0); } rc = daos_obj_verify(arg->coh, oid, DAOS_EPOCH_MAX); - if (rc != 0) - assert_rc_equal(rc, -DER_NOSYS); + assert_rc_equal(rc, 0); /* insert rebuild stop|start into the reintegrate rebuild execution */ arg->interactive_rebuild = 1; reintegrate_with_inflight_io(arg, &oid, ranks_to_kill[0], tgt); for (i = 0; i < SNAP_CNT; i++) { rc = daos_obj_verify(arg->coh, oid, snap_epoch[i]); - if (rc != 0) - assert_rc_equal(rc, -DER_NOSYS); + assert_rc_equal(rc, 0); } rc = daos_obj_verify(arg->coh, oid, DAOS_EPOCH_MAX); - if (rc != 0) - assert_rc_equal(rc, -DER_NOSYS); + assert_rc_equal(rc, 0); T_END(); } @@ -276,8 +269,7 @@ int_rebuild_many_objects_with_failure(void **state) for (i = 0; i < NUM_OBJS; i++) { rc = daos_obj_verify(arg->coh, oids[i], DAOS_EPOCH_MAX); - if (rc != 0) - assert_rc_equal(rc, -DER_NOSYS); + assert_rc_equal(rc, 0); } D_FREE(oids); T_END(); @@ -643,6 +635,99 @@ int_dfs_extend_enumerate_extend(void **state) T_END(); } +static void +int_rebuild_dkeys_stop_failing(void **state) +{ + test_arg_t *arg = *state; + daos_pool_info_t pinfo = {0}; + d_rank_t kill_rank = 0; + int kill_rank_nr; + uint32_t excl_rebuild_ver; + uint32_t reclaim_rebuild_ver; + daos_obj_id_t oid; + struct ioreq req; + int i; + int rc; + + FAULT_INJECTION_REQUIRED(); + + if (!test_runable(arg, 4)) + return; + + T_BEGIN(); + + oid = daos_test_oid_gen(arg->coh, arg->obj_class, 0, 0, arg->myrank); + ioreq_init(&req, arg->coh, oid, DAOS_IOD_ARRAY, arg); + + /** Insert records */ + print_message("Insert %d kv record in object " DF_OID "\n", KEY_NR, DP_OID(oid)); + for (i = 0; i < KEY_NR; i++) { + char key[32] = {0}; + daos_recx_t recx; + char data[DATA_SIZE]; + + sprintf(key, "dkey_0_%d", i); + insert_single(key, "a_key", 0, "data", strlen("data") + 1, DAOS_TX_NONE, &req); + + sprintf(key, "dkey_0_1M_%d", i); + recx.rx_idx = 0; + recx.rx_nr = DATA_SIZE; + + memset(data, 'a', DATA_SIZE); + insert_recxs(key, "a_key_1M", 1, DAOS_TX_NONE, &recx, 1, data, DATA_SIZE, &req); + } + + get_killing_rank_by_oid(arg, oid, 1, 0, &kill_rank, &kill_rank_nr); + ioreq_fini(&req); + + /* Cause first (and subsequent) rebuild attempts to fail with -DER_IO */ + if (arg->myrank == 0) { + print_message("inject fault DAOS_REBUILD_OBJ_FAIL on all engines\n"); + daos_debug_set_params(arg->group, -1, DMG_KEY_FAIL_LOC, + DAOS_REBUILD_OBJ_FAIL | DAOS_FAIL_ALWAYS, 0, NULL); + } + + /* Trigger exclude and rebuild, fail twice, force-stop it during the second Fail_reclaim */ + arg->no_rebuild = 1; + rebuild_single_pool_target(arg, kill_rank, -1, false); + arg->no_rebuild = 0; + test_rebuild_wait_to_start(&arg, 1); + pinfo.pi_bits = DPI_REBUILD_STATUS; + rc = test_pool_get_info(arg, &pinfo, NULL /* engine_ranks */); + assert_rc_equal(rc, 0); + excl_rebuild_ver = pinfo.pi_rebuild_st.rs_version; + + print_message("Wait for exclude rebuild ver %u to fail (and start Fail_reclaim)\n", + excl_rebuild_ver); + test_rebuild_wait_to_start_before_ver(&arg, 1, excl_rebuild_ver); + rc = test_pool_get_info(arg, &pinfo, NULL /* engine_ranks */); + assert_rc_equal(rc, 0); + reclaim_rebuild_ver = pinfo.pi_rebuild_st.rs_version; + + print_message("Wait for Fail_reclaim to finish (and start retry of exclude rebuild)\n"); + test_rebuild_wait_to_start_after_ver(&arg, 1, reclaim_rebuild_ver); + print_message("Wait for second exclude rebuild to fail (and start Fail_reclaim)\n"); + test_rebuild_wait_to_start_before_ver(&arg, 1, excl_rebuild_ver); + sleep(2); + + print_message("Force-stop runaway failing exclude rebuild retries\n"); + rc = rebuild_force_stop_with_dmg(arg); + assert_rc_equal(rc, 0); + test_rebuild_wait(&arg, 1); + assert_int_equal(arg->pool.pool_info.pi_rebuild_st.rs_state, DRS_NOT_STARTED); + assert_int_equal(arg->pool.pool_info.pi_rebuild_st.rs_errno, -DER_OP_CANCELED); + print_message("Exclude rebuild stopped\n"); + + daos_debug_set_params(arg->group, -1, DMG_KEY_FAIL_LOC, 0, 0, NULL); + + /* Do not restart the rebuild ; instead, go directly to reintegrate the rank */ + reintegrate_with_inflight_io(arg, &oid, kill_rank, -1); + rc = daos_obj_verify(arg->coh, oid, DAOS_EPOCH_MAX); + if (rc != 0) + assert_rc_equal(rc, -DER_NOSYS); + T_END(); +} + /** create a new pool/container for each test */ static const struct CMUnitTest rebuild_interactive_tests[] = { {"IREBUILD1: interactive exclude: records with multiple snapshots", @@ -659,6 +744,8 @@ static const struct CMUnitTest rebuild_interactive_tests[] = { rebuild_sub_rf0_setup, test_teardown}, {"IREBUILD7: interactive extend: enumerate object during two rebuilds", int_dfs_extend_enumerate_extend, rebuild_sub_3nodes_rf0_setup, test_teardown}, + {"IREBUILD8: interactive exclude: stop repeatedly-failing rebuild", + int_rebuild_dkeys_stop_failing, rebuild_small_sub_setup, test_teardown}, }; int diff --git a/src/tests/suite/daos_test.h b/src/tests/suite/daos_test.h index 50281309405..dd76c844992 100644 --- a/src/tests/suite/daos_test.h +++ b/src/tests/suite/daos_test.h @@ -422,6 +422,8 @@ void test_rebuild_wait_to_start(test_arg_t **args, int args_cnt); void test_rebuild_wait_to_start_after_ver(test_arg_t **args, int args_cnt, uint32_t rs_version); +void +test_rebuild_wait_to_start_before_ver(test_arg_t **args, int args_cnt, uint32_t rs_version); void test_rebuild_wait_to_error(test_arg_t **args, int args_cnt); int daos_pool_set_prop(const uuid_t pool_uuid, const char *name, diff --git a/src/tests/suite/daos_test_common.c b/src/tests/suite/daos_test_common.c index cc8b0e5b77c..aa3fd327c08 100644 --- a/src/tests/suite/daos_test_common.c +++ b/src/tests/suite/daos_test_common.c @@ -783,7 +783,7 @@ test_pool_get_info(test_arg_t *arg, daos_pool_info_t *pinfo, d_rank_list_t **eng /* Determine if pool rebuild is busy, and the rebuild version is > rs_version */ static bool -rebuild_pool_started(test_arg_t *arg, uint32_t rs_version) +rebuild_pool_started_after_ver(test_arg_t *arg, uint32_t rs_version) { daos_pool_info_t pinfo = {0}; struct daos_rebuild_status *rst; @@ -807,6 +807,32 @@ rebuild_pool_started(test_arg_t *arg, uint32_t rs_version) } } +/* Determine if pool rebuild is busy, and the rebuild version is < rs_version */ +static bool +rebuild_pool_started_before_ver(test_arg_t *arg, uint32_t rs_version) +{ + daos_pool_info_t pinfo = {0}; + struct daos_rebuild_status *rst; + int rc; + + pinfo.pi_bits = DPI_REBUILD_STATUS; + rc = test_pool_get_info(arg, &pinfo, NULL /* engine_ranks */); + rst = &pinfo.pi_rebuild_st; + + if (rc != 0) { + print_message("pool query for rebuild status failed, rc=%d, pool " DF_UUIDF "\n", + rc, DP_UUID(arg->pool.pool_uuid)); + return false; + } else { + bool in_progress = (rst->rs_state == DRS_IN_PROGRESS); + print_message("rebuild for pool " DF_UUIDF "has %sstarted, rs_version=%u " + "(waiting for < %d)\n", + DP_UUID(arg->pool.pool_uuid), in_progress ? "" : "not yet ", + rst->rs_version, rs_version); + return in_progress && (rst->rs_version < rs_version); + } +} + static bool rebuild_pool_erroring(test_arg_t *arg) { @@ -910,7 +936,7 @@ test_get_last_svr_rank(test_arg_t *arg) } bool -test_rebuild_started(test_arg_t **args, int args_cnt, uint32_t rs_version) +test_rebuild_started_after_ver(test_arg_t **args, int args_cnt, uint32_t rs_version) { bool all_started = true; int i; @@ -919,7 +945,25 @@ test_rebuild_started(test_arg_t **args, int args_cnt, uint32_t rs_version) bool started = true; if (!args[i]->pool.destroyed) - started = rebuild_pool_started(args[i], rs_version); + started = rebuild_pool_started_after_ver(args[i], rs_version); + + if (!started) + all_started = false; + } + return all_started; +} + +bool +test_rebuild_started_before_ver(test_arg_t **args, int args_cnt, uint32_t rs_version) +{ + bool all_started = true; + int i; + + for (i = 0; i < args_cnt; i++) { + bool started = true; + + if (!args[i]->pool.destroyed) + started = rebuild_pool_started_before_ver(args[i], rs_version); if (!started) all_started = false; @@ -930,14 +974,21 @@ test_rebuild_started(test_arg_t **args, int args_cnt, uint32_t rs_version) void test_rebuild_wait_to_start(test_arg_t **args, int args_cnt) { - while (!test_rebuild_started(args, args_cnt, 0 /* don't care rs_version */)) + while (!test_rebuild_started_after_ver(args, args_cnt, 0 /* don't care rs_version */)) sleep(2); } void test_rebuild_wait_to_start_after_ver(test_arg_t **args, int args_cnt, uint32_t rs_version) { - while (!test_rebuild_started(args, args_cnt, rs_version)) + while (!test_rebuild_started_after_ver(args, args_cnt, rs_version)) + sleep(2); +} + +void +test_rebuild_wait_to_start_before_ver(test_arg_t **args, int args_cnt, uint32_t rs_version) +{ + while (!test_rebuild_started_before_ver(args, args_cnt, rs_version)) sleep(2); } From 6386f9fa601cc4e6ecad38e97ad030521de2a879 Mon Sep 17 00:00:00 2001 From: Liu Xuezhao Date: Wed, 17 Dec 2025 21:25:27 +0800 Subject: [PATCH 088/253] DAOS-18310 rebuild: refine ds_iv_ns_reint_prep (#17262) Cannot do the IV ns cleanup if with in-flight IV operation, wait IV operation's completion in ds_iv_ns_reint_prep before cleanup. Signed-off-by: Xuezhao Liu --- src/engine/server_iv.c | 35 ++++++++++++++++++++++++++++++++--- src/include/daos_srv/iv.h | 3 ++- src/object/cli_obj.c | 2 +- src/object/cli_shard.c | 3 ++- src/pool/srv_target.c | 4 ++-- 5 files changed, 39 insertions(+), 8 deletions(-) diff --git a/src/engine/server_iv.c b/src/engine/server_iv.c index abde85fa360..96186da2c9c 100644 --- a/src/engine/server_iv.c +++ b/src/engine/server_iv.c @@ -339,8 +339,8 @@ iv_entry_lookup_or_create(struct ds_iv_ns *ns, struct ds_iv_key *key, entry->iv_ref++; if (got != NULL) *got = entry; - D_DEBUG(DB_TRACE, "Get entry %p/%d key %d\n", - entry, entry->iv_ref, key->class_id); + D_DEBUG(DB_TRACE, "Get entry %p, ref %d valid %d key %d\n", entry, entry->iv_ref, + entry->iv_valid, key->class_id); return 0; } @@ -883,12 +883,39 @@ ds_iv_ns_cleanup(struct ds_iv_ns *ns) /* To prepare for reintegrate, cleanup some IVs' cache. * May add more types later when needed. */ -void +int ds_iv_ns_reint_prep(struct ds_iv_ns *ns) { struct ds_iv_entry *entry; struct ds_iv_entry *tmp; + uint32_t msec = 100; + uint32_t total = 0; + int rc; + + /* iv_refcount is 1 after ns create, + * 2 after ds_iv_ns_start. + * > 2 if with any in-flight IV operation. + * here wait the in-flight IV operation for at most 30 seconds, if cannot finish within + * 30 seconds return EBUSY so user can redo the reintegration. Should be very rare case + * for 30 seconds IV timeout. + */ + while (ns->iv_refcount > 2) { + msec = min(5000, msec * 2); + dss_sleep(msec); + total += msec; + if (total > 30000) { + rc = -DER_BUSY; + DL_ERROR( + rc, DF_UUID " timed out for wait IV, iv_refcount %d, waited %d seconds", + DP_UUID(ns->iv_pool_uuid), ns->iv_refcount, min(1, total / 1000)); + return rc; + } else { + D_INFO(DF_UUID " wait IV operation, iv_refcount %d, waited %d seconds", + DP_UUID(ns->iv_pool_uuid), ns->iv_refcount, min(1, total / 1000)); + } + } + /* no yield for the cleanup */ d_list_for_each_entry_safe(entry, tmp, &ns->iv_entry_list, iv_link) { if (entry->iv_key.class_id == IV_CONT_TRACK_EPOCH || entry->iv_key.class_id == IV_CONT_PROP || @@ -899,6 +926,8 @@ ds_iv_ns_reint_prep(struct ds_iv_ns *ns) iv_entry_free(entry); } } + + return 0; } void diff --git a/src/include/daos_srv/iv.h b/src/include/daos_srv/iv.h index 1221e97739b..46ec9d0f8e8 100644 --- a/src/include/daos_srv/iv.h +++ b/src/include/daos_srv/iv.h @@ -319,7 +319,8 @@ int ds_iv_ns_create(crt_context_t ctx, uuid_t pool_uuid, crt_group_t *grp, void ds_iv_ns_update(struct ds_iv_ns *ns, unsigned int master_rank, uint64_t term); void ds_iv_ns_cleanup(struct ds_iv_ns *ns); -void ds_iv_ns_reint_prep(struct ds_iv_ns *ns); +int + ds_iv_ns_reint_prep(struct ds_iv_ns *ns); void ds_iv_ns_stop(struct ds_iv_ns *ns); void ds_iv_ns_leader_stop(struct ds_iv_ns *ns); void ds_iv_ns_start(struct ds_iv_ns *ns); diff --git a/src/object/cli_obj.c b/src/object/cli_obj.c index d22cf109e41..0cc48dcea1f 100644 --- a/src/object/cli_obj.c +++ b/src/object/cli_obj.c @@ -6468,7 +6468,7 @@ shard_anchors_check_alloc_bufs(struct obj_auxi_args *obj_auxi, struct shard_anch } if (obj_args->recxs != NULL) { - if (sub_anchor->ssa_recxs != NULL && sub_anchors->sa_nr == nr) + if (sub_anchor->ssa_recxs != NULL && sub_anchors->sa_nr != nr) D_FREE(sub_anchor->ssa_recxs); if (sub_anchor->ssa_recxs == NULL) { diff --git a/src/object/cli_shard.c b/src/object/cli_shard.c index 871474f10b5..ba806bdb823 100644 --- a/src/object/cli_shard.c +++ b/src/object/cli_shard.c @@ -847,7 +847,8 @@ dc_rw_cb(tse_task_t *task, void *arg) * rec2big errors which can be expected. */ if (rc == -DER_REC2BIG || rc == -DER_NONEXIST || rc == -DER_NO_PERM || - rc == -DER_EXIST || rc == -DER_RF) + rc == -DER_EXIST || rc == -DER_RF || rc == -DER_UPDATE_AGAIN || + rc == -DER_FETCH_AGAIN) D_DEBUG(DB_IO, DF_UOID" rpc %p opc %d to rank %d tag %d: "DF_RC"\n", DP_UOID(orw->orw_oid), rw_args->rpc, opc, rw_args->rpc->cr_ep.ep_rank, rw_args->rpc->cr_ep.ep_tag, DP_RC(rc)); diff --git a/src/pool/srv_target.c b/src/pool/srv_target.c index a3a89c56f39..24b44205a3f 100644 --- a/src/pool/srv_target.c +++ b/src/pool/srv_target.c @@ -2798,7 +2798,7 @@ ds_pool_tgt_discard_handler(crt_rpc_t *rpc) pool->sp_discard_status = 0; rc = dss_ult_execute(ds_pool_tgt_discard_ult, arg, NULL, NULL, DSS_XS_SYS, 0, 0); if (rc == 0) - ds_iv_ns_reint_prep(pool->sp_iv_ns); /* cleanup IV cache */ + rc = ds_iv_ns_reint_prep(pool->sp_iv_ns); /* cleanup IV cache */ ds_pool_put(pool); out: @@ -3120,7 +3120,7 @@ ds_pool_recov_cont_handler(crt_rpc_t *rpc) ABT_rwlock_unlock(pool->sp_recov_lock); if (rc == 0) - ds_iv_ns_reint_prep(pool->sp_iv_ns); /* cleanup IV cache */ + rc = ds_iv_ns_reint_prep(pool->sp_iv_ns); /* cleanup IV cache */ out: DL_CDEBUG(rc != 0, DLOG_ERR, DB_REBUILD, rc, From 2b458ed9e637ec5b912b0f19710057f80163e16a Mon Sep 17 00:00:00 2001 From: Tom Nabarro Date: Wed, 17 Dec 2025 13:47:28 +0000 Subject: [PATCH 089/253] DAOS-18324 control: Apply group perms to hugepage and dpdk dirs (#17260) Allow dlck off-line debug tool to create hugepage and dpdk files if run as a member of the daos_server user-group. Signed-off-by: Tom Nabarro --- src/control/server/init/setup_spdk.sh | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/control/server/init/setup_spdk.sh b/src/control/server/init/setup_spdk.sh index 059d1d3c4b0..845baf0eb24 100755 --- a/src/control/server/init/setup_spdk.sh +++ b/src/control/server/init/setup_spdk.sh @@ -92,8 +92,12 @@ else set +x if [ -d "/dev/hugepages/" ]; then - echo "RUN: chown -R ${_TARGET_USER} /dev/hugepages" - chown -R "${_TARGET_USER}" "/dev/hugepages" + echo "RUN: chown -R ${_TARGET_USER}:${_TARGET_USER} /dev/hugepages" + chown -R "${_TARGET_USER}:${_TARGET_USER}" /dev/hugepages + fi + if [ -d "/tmp/dpdk/" ]; then + echo "RUN: chmod -R g+rwx /tmp/dpdk" + chmod -R g+rwx /tmp/dpdk fi echo "Setting VFIO file permissions for unprivileged access" From a8b37c814803b075c79e7b0899dd270c695d6b9b Mon Sep 17 00:00:00 2001 From: Dalton Bohning Date: Wed, 17 Dec 2025 07:57:11 -0800 Subject: [PATCH 090/253] Revert "DAOS-17495 client: intercept getcwd with trampoline (#16398)" (#17261) This reverts commit eaed9023ddb5a9a18135f3e188f6d02881f841fe. Signed-off-by: Dalton Bohning --- src/client/dfuse/inval.c | 19 ++------ src/client/dfuse/pil4dfs/int_dfs.c | 76 ++++++++++++++---------------- src/tests/ftest/daos_test/dfuse.py | 2 - src/tests/suite/dfuse_test.c | 73 +--------------------------- 4 files changed, 41 insertions(+), 129 deletions(-) diff --git a/src/client/dfuse/inval.c b/src/client/dfuse/inval.c index b668928e5c7..3ddcc052d86 100644 --- a/src/client/dfuse/inval.c +++ b/src/client/dfuse/inval.c @@ -1,6 +1,5 @@ /** * (C) Copyright 2016-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * (C) Copyright 2025 Google LLC * * SPDX-License-Identifier: BSD-2-Clause-Patent @@ -79,8 +78,6 @@ #define INVAL_DIRECTORY_GRACE (60 * 60 * 24 * 365 * 20) /* 20 years to avoid getcwd failures */ #define INVAL_FILE_GRACE 2 -static double expiration_time_dir = INVAL_DIRECTORY_GRACE; - /* Represents one timeout value (time). Maintains a ordered list of dentries that are using * this timeout. */ @@ -265,13 +262,7 @@ ival_bucket_add(d_list_t *list, double timeout) int ival_init(struct dfuse_info *dfuse_info) { - int rc; - uint64_t expiration_time_dir_env; - - /* this env is only used for testing */ - rc = d_getenv_uint64_t("D_EXPIRATION_TIME_DIR", &expiration_time_dir_env); - if (rc != -DER_NONEXIST) - expiration_time_dir = 1.0 * expiration_time_dir_env; + int rc; DFUSE_TRA_UP(&ival_data, dfuse_info, "invalidator"); @@ -352,7 +343,7 @@ ival_update_inode(struct dfuse_inode_entry *inode, double timeout) bool wake = false; if (S_ISDIR(inode->ie_stat.st_mode)) - timeout += expiration_time_dir; + timeout += INVAL_DIRECTORY_GRACE; else timeout += INVAL_FILE_GRACE; @@ -463,13 +454,13 @@ ival_add_cont_buckets(struct dfuse_cont *dfc) D_MUTEX_LOCK(&ival_lock); - rc = ival_bucket_add_value(dfc->dfc_dentry_dir_timeout + expiration_time_dir); + rc = ival_bucket_add_value(dfc->dfc_dentry_dir_timeout + INVAL_DIRECTORY_GRACE); if (rc != 0) goto out; if (dfc->dfc_dentry_timeout != 0) { rc = ival_bucket_add_value(dfc->dfc_dentry_timeout + INVAL_FILE_GRACE); if (rc != 0) - ival_bucket_dec_value(dfc->dfc_dentry_dir_timeout + expiration_time_dir); + ival_bucket_dec_value(dfc->dfc_dentry_dir_timeout + INVAL_DIRECTORY_GRACE); } out: @@ -484,7 +475,7 @@ ival_dec_cont_buckets(struct dfuse_cont *dfc) D_MUTEX_LOCK(&ival_lock); if (dfc->dfc_dentry_timeout != 0) ival_bucket_dec_value(dfc->dfc_dentry_timeout + INVAL_FILE_GRACE); - ival_bucket_dec_value(dfc->dfc_dentry_dir_timeout + expiration_time_dir); + ival_bucket_dec_value(dfc->dfc_dentry_dir_timeout + INVAL_DIRECTORY_GRACE); D_MUTEX_UNLOCK(&ival_lock); } diff --git a/src/client/dfuse/pil4dfs/int_dfs.c b/src/client/dfuse/pil4dfs/int_dfs.c index 0276c8fa4f4..fad6ccb5ede 100644 --- a/src/client/dfuse/pil4dfs/int_dfs.c +++ b/src/client/dfuse/pil4dfs/int_dfs.c @@ -414,7 +414,7 @@ static int (*next_rename)(const char *old_name, const char *new_name); static int (*next_renameat)(int olddirfd, const char *oldpath, int newdirfd, const char *newpath); -static char *(*libc_getcwd)(char *buf, size_t size); +static char *(*next_getcwd)(char *buf, size_t size); static int (*libc_unlink)(const char *path); @@ -495,6 +495,12 @@ static int (*next_mpi_init)(int *argc, char ***argv); static int (*next_pmpi_init)(int *argc, char ***argv); static void *(*next_dlopen)(const char *filename, int flags); +/* to do!! */ +/** + * static char * (*org_realpath)(const char *pathname, char *resolved_path); + * org_realpath real_realpath=NULL; + */ + static int remove_dot_dot(char path[], int *len); static int @@ -5088,57 +5094,36 @@ renameat(int olddirfd, const char *oldpath, int newdirfd, const char *newpath) } char * -new_getcwd(char *buf, size_t size) +getcwd(char *buf, size_t size) { - char *cwd; - size_t len; - int rc; - int idx; - struct duns_attr_t attr = {0}; + if (next_getcwd == NULL) { + next_getcwd = dlsym(RTLD_NEXT, "getcwd"); + D_ASSERT(next_getcwd != NULL); + } if (!d_hook_enabled) - return libc_getcwd(buf, size); + return next_getcwd(buf, size); - if (cur_dir[0] != '/') { - /* cur_dir is not initialized yet */ - cwd = libc_getcwd(cur_dir, DFS_MAX_PATH); - if (cwd == NULL) - return NULL; - } + if (cur_dir[0] != '/') + update_cwd(); - idx = query_dfs_mount(cur_dir); - if (idx < 0) - return libc_getcwd(buf, size); + if (query_dfs_mount(cur_dir) < 0) + return next_getcwd(buf, size); if (buf == NULL) { + size_t len; + if (size == 0) size = PATH_MAX; len = strnlen(cur_dir, size); if (len >= size) { - errno = ENAMETOOLONG; + errno = ERANGE; return NULL; } return strdup(cur_dir); } - rc = duns_resolve_path(cur_dir, &attr); - if (rc) { - errno = rc; - return NULL; - } - - rc = snprintf(buf, size, "%s%s", dfs_list[idx].fs_root, attr.da_rel_path); - if (rc == size) { - /* buffer size is not large enough */ - errno = ENAMETOOLONG; - D_FREE(attr.da_rel_path); - return NULL; - } else if (rc < 0) { - D_FREE(attr.da_rel_path); - return NULL; - } - - D_FREE(attr.da_rel_path); + strncpy(buf, cur_dir, size); return buf; } @@ -6842,12 +6827,23 @@ static void update_cwd(void) { char *cwd = NULL; + char *pt_end = NULL; /* daos_init() may be not called yet. */ - cwd = libc_getcwd(cur_dir, DFS_MAX_PATH); + cwd = get_current_dir_name(); + if (cwd == NULL) { - D_FATAL("fatal error to get CWD with getcwd(): %d (%s)\n", errno, strerror(errno)); + D_FATAL("fatal error to get CWD with get_current_dir_name(): %d (%s)\n", errno, + strerror(errno)); abort(); + } else { + pt_end = stpncpy(cur_dir, cwd, DFS_MAX_PATH - 1); + if ((long int)(pt_end - cur_dir) >= DFS_MAX_PATH - 1) { + D_FATAL("fatal error, cwd path is too long: %d (%s)\n", ENAMETOOLONG, + strerror(ENAMETOOLONG)); + abort(); + } + free(cwd); } } @@ -7258,6 +7254,7 @@ init_myhook(void) return; } + update_cwd(); rc = D_MUTEX_INIT(&lock_reserve_fd, NULL); if (rc) return; @@ -7348,7 +7345,6 @@ init_myhook(void) register_a_hook("libc", "exit", (void *)new_exit, (long int *)(&next_exit)); register_a_hook("libc", "dup3", (void *)new_dup3, (long int *)(&libc_dup3)); register_a_hook("libc", "readlink", (void *)new_readlink, (long int *)(&libc_readlink)); - register_a_hook("libc", "getcwd", (void *)new_getcwd, (long int *)(&libc_getcwd)); libc_version = query_libc_version(); if (libc_ver_cmp(libc_version, 2.34) < 0) @@ -7366,8 +7362,6 @@ init_myhook(void) install_hook(); - update_cwd(); - d_hook_enabled = 1; hook_enabled_bak = d_hook_enabled; } diff --git a/src/tests/ftest/daos_test/dfuse.py b/src/tests/ftest/daos_test/dfuse.py index 09fce4008f4..0e8b8d998a3 100644 --- a/src/tests/ftest/daos_test/dfuse.py +++ b/src/tests/ftest/daos_test/dfuse.py @@ -74,8 +74,6 @@ def run_test(self, il_lib=None): container.set_attr(attrs=cont_attrs) dfuse = get_dfuse(self, self.hostlist_clients) - # Only for test. Set directory expiration time 1 second. - dfuse.env['D_EXPIRATION_TIME_DIR'] = '1' start_dfuse(self, dfuse, pool, container) mount_dir = dfuse.mount_dir.value diff --git a/src/tests/suite/dfuse_test.c b/src/tests/suite/dfuse_test.c index bdae8da0db9..bf078145746 100644 --- a/src/tests/suite/dfuse_test.c +++ b/src/tests/suite/dfuse_test.c @@ -638,23 +638,7 @@ do_directory(void **state) DIR *dirp; struct dirent **namelist; long pos; - int entry_count = 100; - bool with_pil4dfs = false; - bool use_dfuse = true; - char *env_ldpreload; - /* "/tmp/dfuse-test" is assigned in src/tests/ftest/daos_test/dfuse.py */ - char native_mount_dir[] = "/tmp/dfuse-test"; - char cwd[1024]; - char cwd_saved[1024]; - char *resolved_path; - char *path_ret; - - if (strstr(test_dir, native_mount_dir)) - use_dfuse = false; - - env_ldpreload = getenv("LD_PRELOAD"); - if (env_ldpreload != NULL && strstr(env_ldpreload, "libpil4dfs.so") != NULL) - with_pil4dfs = true; + int entry_count = 100; printf("Creating dir and files\n"); root = open(test_dir, O_PATH | O_DIRECTORY); @@ -749,61 +733,6 @@ do_directory(void **state) rc = close(root); assert_return_code(rc, errno); - - if (!with_pil4dfs || !use_dfuse) - return; - - /* start testing getcwd() and realpath() */ - resolved_path = malloc(PATH_MAX); - assert_true(resolved_path != NULL); - - path_ret = getcwd(cwd_saved, sizeof(cwd_saved)); - assert_true(path_ret != NULL); - - rc = chdir(test_dir); - assert_return_code(rc, errno); - - rc = mkdir("dir_test", 0755); - assert_return_code(rc, errno); - - rc = symlink("dir_test", "link_test"); - assert_return_code(rc, errno); - - rc = chdir("link_test"); - assert_return_code(rc, errno); - - path_ret = getcwd(cwd, sizeof(cwd)); - assert_true(path_ret != NULL); - assert_true(strstr(cwd, "dir_test") != NULL); - - path_ret = realpath(".", resolved_path); - assert_true(path_ret != NULL); - assert_true(strstr(resolved_path, "dir_test") != NULL); - - sleep(2); - - path_ret = getcwd(cwd, sizeof(cwd)); - assert_true(path_ret != NULL); - assert_true(strstr(cwd, "dir_test") != NULL); - - path_ret = realpath(".", resolved_path); - assert_true(path_ret != NULL); - assert_true(strstr(resolved_path, "dir_test") != NULL); - - rc = chdir(".."); - assert_return_code(rc, errno); - - rc = unlink("link_test"); - assert_return_code(rc, errno); - - rc = rmdir("dir_test"); - assert_return_code(rc, errno); - - rc = chdir(cwd_saved); - assert_return_code(rc, errno); - - free(resolved_path); - /* end testing getcwd() and realpath() */ } void From e6876c3ba31116768505acaded016d62ddb1071e Mon Sep 17 00:00:00 2001 From: Joseph Moore <26410038+jgmoore-or@users.noreply.github.com> Date: Wed, 17 Dec 2025 21:32:26 -0700 Subject: [PATCH 091/253] DAOS-17931 engine: Terminate engine process upon receipt of SIGBUS signal. (#17268) * DAOS-17931 engine: Terminate engine process upon receipt of SIGBUS signal. Signed-off-by: Joseph Moore --- src/engine/init.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/engine/init.c b/src/engine/init.c index 9ef8828b01c..ed3bd80f183 100644 --- a/src/engine/init.c +++ b/src/engine/init.c @@ -1134,8 +1134,9 @@ int main(int argc, char **argv) { sigset_t set; - int sig; - int rc; + bool exit_failure = false; + int sig; + int rc; /** parse command line arguments */ parse(argc, argv); @@ -1167,6 +1168,7 @@ main(int argc, char **argv) /** wait for shutdown signal */ sigemptyset(&set); + sigaddset(&set, SIGBUS); sigaddset(&set, SIGINT); sigaddset(&set, SIGTERM); sigaddset(&set, SIGUSR1); @@ -1179,7 +1181,6 @@ main(int argc, char **argv) D_ERROR("failed to wait for signals: %d\n", rc); break; } - /* open specific file to dump ABT infos and ULTs stacks */ if (sig == SIGUSR1 || sig == SIGUSR2) { struct timeval tv; @@ -1261,12 +1262,18 @@ main(int argc, char **argv) continue; } - /* SIGINT/SIGTERM cause server shutdown */ + /* Log error for SIGBUS occurrence */ + if (sig == SIGBUS) { + D_ERROR("SIGBUS signal received; proceeding to shutdown.\n"); + exit_failure = true; + } + + /* SIGINT/SIGTERM/SIGBUS cause server shutdown */ break; } /** shutdown */ server_fini(true); - exit(EXIT_SUCCESS); + exit(exit_failure ? EXIT_FAILURE : EXIT_SUCCESS); } From 9cd78260c758c3a20ce4d675d48f56398c78f9a9 Mon Sep 17 00:00:00 2001 From: Dalton Bohning Date: Thu, 18 Dec 2025 10:41:26 -0800 Subject: [PATCH 092/253] DAOS-18246 test: add ftest utilities for interactive rebuild (#17151) Add ftest utilities for: dmg pool rebuild start dmg pool rebuild stop dmg system drain dmg system rebuild start dmg system rebuild stop dmg system reintegrate Signed-off-by: Dalton Bohning --- src/tests/ftest/util/dmg_utils.py | 495 ++++++++++++++---------- src/tests/ftest/util/dmg_utils_base.py | 94 +++++ src/tests/ftest/util/test_utils_pool.py | 41 +- 3 files changed, 424 insertions(+), 206 deletions(-) diff --git a/src/tests/ftest/util/dmg_utils.py b/src/tests/ftest/util/dmg_utils.py index 95febc8173c..1579d1cd144 100644 --- a/src/tests/ftest/util/dmg_utils.py +++ b/src/tests/ftest/util/dmg_utils.py @@ -639,78 +639,23 @@ def pool_create(self, scm_size, uid=None, gid=None, nvme_size=None, return data - def pool_query(self, pool, show_enabled=False, health_only=False): - """Query a pool with the dmg command. + def pool_delete_acl(self, pool, principal): + """Delete the acl for a given pool. Args: - pool (str): Pool UUID or label to query. - show_enabled (bool, optional): Display enabled ranks. - health_only (bool, optional): Only perform pool health related queries. - - Raises: - CommandFailure: if the dmg pool query command fails. + pool (str): Pool for which to delete the ACL. + principal (str): principal to be deleted Returns: - dict: the dmg json command output converted to a python dictionary - - """ - # Sample JSON output - # { - # "response": { - # "status": 0, - # "uuid": "EDAE0965-7A6E-48BD-A71C-A29F199C679F", - # "total_targets": 8, - # "active_targets": 8, - # "total_engines": 1, - # "disabled_targets": 0, - # "version": 1, - # "svc_ldr": 0, - # "rebuild": { - # "status": 0, - # "state": "idle", - # "objects": 0, - # "records": 0 - # }, - # "scm": { - # "total": 16000000000, - # "free": 15999992320, - # "min": 1999999040, - # "max": 1999999040, - # "mean": 1999999040 - # }, - # "nvme": { - # "total": 32000000000, - # "free": 31999950848, - # "min": 3999993856, - # "max": 3999993856, - # "mean": 3999993856 - # }, - # "enabled_ranks": [0,1,3], - # "disabled_ranks": [2] - # }, - # "error": null, - # "status": 0 - # } - return self._get_json_result(("pool", "query"), pool=pool, - show_enabled=show_enabled, health_only=health_only) - - def pool_query_targets(self, pool, rank=None, target_idx=None): - """Call dmg pool query-targets. - - Args: - pool (str): Pool UUID or label - rank (str, optional): Engine rank of the targets to be queried - target_idx (str, optional): Comma-separated list of target idx(s) to be queried + CmdResult: Object that contains exit status, stdout, and other + information. Raises: - CommandFailure: if the command fails. - - Returns: - dict: the dmg json command output converted to a python dictionary + CommandFailure: if the dmg pool delete-acl command fails. """ - return self._get_json_result(("pool", "query-targets"), pool=pool, - rank=rank, target_idx=target_idx) + return self._get_result( + ("pool", "delete-acl"), pool=pool, principal=principal) def pool_destroy(self, pool, force=True, recursive=True): """Destroy a pool with the dmg command. @@ -729,91 +674,116 @@ def pool_destroy(self, pool, force=True, recursive=True): """ return self._get_result(("pool", "destroy"), pool=pool, force=force, recursive=recursive) - def pool_get_acl(self, pool): - """Get the ACL for a given pool. + def pool_drain(self, pool, ranks, tgt_idx=None): + """Drain a daos_server from the pool. Args: - pool (str): Pool for which to get the ACL. + pool (str): Pool uuid. + ranks (str): Comma separated daos_server-rank ranges to drain e.g. + "0,2-5". + tgt_idx (list, optional): targets to drain on ranks e.g. "1,2". + Defaults to None. Returns: CmdResult: Object that contains exit status, stdout, and other - information. + information. Raises: - CommandFailure: if the dmg pool get-acl command fails. + CommandFailure: if the dmg pool drain command fails. """ - return self._get_result(("pool", "get-acl"), pool=pool) + return self._get_result( + ("pool", "drain"), pool=pool, ranks=ranks, tgt_idx=tgt_idx) - def pool_update_acl(self, pool, acl_file=None, entry=None): - """Update the acl for a given pool. + def pool_evict(self, pool): + """Evict a pool. Args: - pool (str): Pool for which to update the ACL. - acl_file (str, optional): ACL file to update - entry (str, optional): entry to be updated + pool (str): UUID of DAOS pool to evict connection to Returns: CmdResult: Object that contains exit status, stdout, and other information. Raises: - CommandFailure: if the dmg pool update-acl command fails. + CommandFailure: if the dmg pool evict command fails. + + """ + return self._get_result(("pool", "evict"), pool=pool) + + def pool_exclude(self, pool, ranks, tgt_idx=None, force=False): + """Exclude a daos_server from the pool. + + Args: + pool (str): Pool uuid. + ranks (str): Comma separated daos_server-rank ranges to exclude e.g. + "0,2-5". + tgt_idx (list, optional): targets to exclude on ranks e.g. "1,2". + Defaults to None. + force (bool, optional): force exclusion regardless of data loss. Defaults to False + + Returns: + CmdResult: Object that contains exit status, stdout, and other + information. + + Raises: + CommandFailure: if the dmg pool exclude command fails. """ return self._get_result( - ("pool", "update-acl"), pool=pool, acl_file=acl_file, entry=entry) + ("pool", "exclude"), pool=pool, ranks=ranks, tgt_idx=tgt_idx, force=force) - def pool_upgrade(self, pool): - """Call dmg pool upgrade. + def pool_extend(self, pool, ranks): + """Extend the daos_server pool. Args: - pool (str): pool to upgrade + pool (str): Pool uuid. + ranks (str): Comma separated daos_server-rank ranges to extend e.g. + "0,2-5". Returns: - dict: the dmg json command output converted to a python dictionary + CmdResult: Object that contains exit status, stdout, and other + information. Raises: - CommandFailure: if the command fails. + CommandFailure: if the dmg pool extend command fails. """ - return self._get_json_result(("pool", "upgrade"), pool=pool) + return self._get_result( + ("pool", "extend"), pool=pool, ranks=ranks) - def pool_overwrite_acl(self, pool, acl_file): - """Overwrite the acl for a given pool. + def pool_get_acl(self, pool): + """Get the ACL for a given pool. Args: - pool (str): Pool for which to overwrite the ACL. - acl_file (str): ACL file to update + pool (str): Pool for which to get the ACL. Returns: CmdResult: Object that contains exit status, stdout, and other information. Raises: - CommandFailure: if the dmg pool overwrite-acl command fails. + CommandFailure: if the dmg pool get-acl command fails. """ - return self._get_result( - ("pool", "overwrite-acl"), pool=pool, acl_file=acl_file) + return self._get_result(("pool", "get-acl"), pool=pool) - def pool_delete_acl(self, pool, principal): - """Delete the acl for a given pool. + def pool_get_prop(self, pool, name=None): + """Get the Property for a given pool. Args: - pool (str): Pool for which to delete the ACL. - principal (str): principal to be deleted + pool (str): Pool for which to get the property. + name (str, optional): Get the Property value based on name. Returns: CmdResult: Object that contains exit status, stdout, and other information. Raises: - CommandFailure: if the dmg pool delete-acl command fails. + CommandFailure: if the dmg pool get-prop command fails. """ - return self._get_result( - ("pool", "delete-acl"), pool=pool, principal=principal) + return self._get_json_result(("pool", "get-prop"), pool=pool, name=name) def pool_list(self, no_query=False, verbose=False): """List pools. @@ -865,121 +835,198 @@ def pool_list(self, no_query=False, verbose=False): return self._get_json_result( ("pool", "list"), no_query=no_query, verbose=verbose) - def pool_set_prop(self, pool, properties): - """Set property for a given Pool. + def pool_overwrite_acl(self, pool, acl_file): + """Overwrite the acl for a given pool. Args: - pool (str): Pool uuid for which property is supposed to be set. - properties (str): Property in the form of key:val[,key:val...] + pool (str): Pool for which to overwrite the ACL. + acl_file (str): ACL file to update Returns: - CmdResult: Object that contains exit status, stdout, and other information. + CmdResult: Object that contains exit status, stdout, and other + information. Raises: - CommandFailure: if the dmg pool set-prop command fails. + CommandFailure: if the dmg pool overwrite-acl command fails. """ - return self._get_result(("pool", "set-prop"), pool=pool, properties=properties) + return self._get_result( + ("pool", "overwrite-acl"), pool=pool, acl_file=acl_file) - def pool_get_prop(self, pool, name=None): - """Get the Property for a given pool. + def pool_query(self, pool, show_enabled=False, health_only=False): + """Query a pool with the dmg command. Args: - pool (str): Pool for which to get the property. - name (str, optional): Get the Property value based on name. + pool (str): Pool UUID or label to query. + show_enabled (bool, optional): Display enabled ranks. + health_only (bool, optional): Only perform pool health related queries. + + Raises: + CommandFailure: if the dmg pool query command fails. Returns: - CmdResult: Object that contains exit status, stdout, and other - information. + dict: the dmg json command output converted to a python dictionary + + """ + # Sample JSON output + # { + # "response": { + # "status": 0, + # "uuid": "EDAE0965-7A6E-48BD-A71C-A29F199C679F", + # "total_targets": 8, + # "active_targets": 8, + # "total_engines": 1, + # "disabled_targets": 0, + # "version": 1, + # "svc_ldr": 0, + # "rebuild": { + # "status": 0, + # "state": "idle", + # "objects": 0, + # "records": 0 + # }, + # "scm": { + # "total": 16000000000, + # "free": 15999992320, + # "min": 1999999040, + # "max": 1999999040, + # "mean": 1999999040 + # }, + # "nvme": { + # "total": 32000000000, + # "free": 31999950848, + # "min": 3999993856, + # "max": 3999993856, + # "mean": 3999993856 + # }, + # "enabled_ranks": [0,1,3], + # "disabled_ranks": [2] + # }, + # "error": null, + # "status": 0 + # } + return self._get_json_result(("pool", "query"), pool=pool, + show_enabled=show_enabled, health_only=health_only) + + def pool_query_targets(self, pool, rank=None, target_idx=None): + """Call dmg pool query-targets. + + Args: + pool (str): Pool UUID or label + rank (str, optional): Engine rank of the targets to be queried + target_idx (str, optional): Comma-separated list of target idx(s) to be queried Raises: - CommandFailure: if the dmg pool get-prop command fails. + CommandFailure: if the command fails. + + Returns: + dict: the dmg json command output converted to a python dictionary """ - return self._get_json_result(("pool", "get-prop"), pool=pool, name=name) + return self._get_json_result(("pool", "query-targets"), pool=pool, + rank=rank, target_idx=target_idx) - def pool_exclude(self, pool, ranks, tgt_idx=None, force=False): - """Exclude a daos_server from the pool. + def pool_reintegrate(self, pool, ranks, tgt_idx=None): + """Reintegrate a daos_server to the pool. Args: pool (str): Pool uuid. - ranks (str): Comma separated daos_server-rank ranges to exclude e.g. - "0,2-5". - tgt_idx (list, optional): targets to exclude on ranks e.g. "1,2". + ranks (str): Comma separated daos_server-rank ranges to reintegrate + e.g. "0,2-5". + tgt_idx (list, optional): targets to reintegrate on ranks e.g. "1,2". Defaults to None. - force (bool, optional): force exclusion regardless of data loss. Defaults to False Returns: CmdResult: Object that contains exit status, stdout, and other information. Raises: - CommandFailure: if the dmg pool exclude command fails. + CommandFailure: if the dmg pool reintegrate command fails. """ return self._get_result( - ("pool", "exclude"), pool=pool, ranks=ranks, tgt_idx=tgt_idx, force=force) + ("pool", "reintegrate"), pool=pool, ranks=ranks, tgt_idx=tgt_idx) - def pool_extend(self, pool, ranks): - """Extend the daos_server pool. + def pool_rebuild_start(self, pool): + """Rebuild start request submitted to pool. Args: - pool (str): Pool uuid. - ranks (str): Comma separated daos_server-rank ranges to extend e.g. - "0,2-5". + pool (str): Pool label or uuid. Returns: - CmdResult: Object that contains exit status, stdout, and other - information. + CmdResult: Object that contains exit status, stdout, and other information. Raises: - CommandFailure: if the dmg pool extend command fails. + CommandFailure: if the command fails. """ - return self._get_result( - ("pool", "extend"), pool=pool, ranks=ranks) + return self._get_result(("pool", "rebuild", "start"), pool=pool) - def pool_drain(self, pool, ranks, tgt_idx=None): - """Drain a daos_server from the pool. + def pool_rebuild_stop(self, pool, force=False): + """Rebuild stop request submitted to pool. Args: - pool (str): Pool uuid. - ranks (str): Comma separated daos_server-rank ranges to drain e.g. - "0,2-5". - tgt_idx (list, optional): targets to drain on ranks e.g. "1,2". - Defaults to None. + pool (str): Pool label or uuid. + force (bool): Force stop rebuild. Returns: - CmdResult: Object that contains exit status, stdout, and other - information. + CmdResult: Object that contains exit status, stdout, and other information. Raises: - CommandFailure: if the dmg pool drain command fails. + CommandFailure: if the command fails. """ - return self._get_result( - ("pool", "drain"), pool=pool, ranks=ranks, tgt_idx=tgt_idx) + return self._get_result(("pool", "rebuild", "stop"), pool=pool, force=force) - def pool_reintegrate(self, pool, ranks, tgt_idx=None): - """Reintegrate a daos_server to the pool. + def pool_set_prop(self, pool, properties): + """Set property for a given Pool. Args: - pool (str): Pool uuid. - ranks (str): Comma separated daos_server-rank ranges to reintegrate - e.g. "0,2-5". - tgt_idx (list, optional): targets to reintegrate on ranks e.g. "1,2". - Defaults to None. + pool (str): Pool uuid for which property is supposed to be set. + properties (str): Property in the form of key:val[,key:val...] + + Returns: + CmdResult: Object that contains exit status, stdout, and other information. + + Raises: + CommandFailure: if the dmg pool set-prop command fails. + + """ + return self._get_result(("pool", "set-prop"), pool=pool, properties=properties) + + def pool_update_acl(self, pool, acl_file=None, entry=None): + """Update the acl for a given pool. + + Args: + pool (str): Pool for which to update the ACL. + acl_file (str, optional): ACL file to update + entry (str, optional): entry to be updated Returns: CmdResult: Object that contains exit status, stdout, and other - information. + information. Raises: - CommandFailure: if the dmg pool reintegrate command fails. + CommandFailure: if the dmg pool update-acl command fails. """ return self._get_result( - ("pool", "reintegrate"), pool=pool, ranks=ranks, tgt_idx=tgt_idx) + ("pool", "update-acl"), pool=pool, acl_file=acl_file, entry=entry) + + def pool_upgrade(self, pool): + """Call dmg pool upgrade. + + Args: + pool (str): pool to upgrade + + Returns: + dict: the dmg json command output converted to a python dictionary + + Raises: + CommandFailure: if the command fails. + + """ + return self._get_json_result(("pool", "upgrade"), pool=pool) def cont_set_owner(self, pool, cont, user=None, group=None): """Dmg container set-owner to the specified new user/group. @@ -1038,18 +1085,18 @@ def system_cleanup(self, machinename=None, verbose=True): return self._get_json_result( ("system", "cleanup"), machinename=machinename, verbose=verbose) - def system_clear_exclude(self, ranks, rank_hosts): - """Clear exclude ranks from system. + def system_clear_exclude(self, ranks=None, rank_hosts=None): + """Call dmg system clear-exclude. - Either ranks or rank_hosts is necessary. Pass in None to one of them. + Either ranks or rank_hosts is required. Args: - ranks (str): Comma separated rank-ranges to exclude e.g. "0,2-5". - rank_hosts (str): hostlist representing hosts whose managed ranks are to be + ranks (str, optional): Comma separated rank-ranges to exclude e.g. "0,2-5". + rank_hosts (str, optional): hostlist representing hosts whose managed ranks are to be operated on. Raises: - CommandFailure: if the dmg system clear-exclude command fails. + CommandFailure: if the command fails. Returns: dict: the dmg json command output converted to a python dictionary @@ -1058,6 +1105,70 @@ def system_clear_exclude(self, ranks, rank_hosts): return self._get_json_result( ("system", "clear-exclude"), ranks=ranks, rank_hosts=rank_hosts) + def system_drain(self, ranks=None, rank_hosts=None): + """Call dmg system drain. + + Either ranks or rank_hosts is required. + + Args: + ranks (str, optional): Comma separated rank-ranges to exclude e.g. "0,2-5". + rank_hosts (str, optional): hostlist representing hosts whose managed ranks are to be + operated on. + + Raises: + CommandFailure: if the command fails. + + Returns: + dict: the dmg json command output converted to a python dictionary + + """ + return self._get_json_result( + ("system", "drain"), ranks=ranks, rank_hosts=rank_hosts) + + def system_erase(self): + """Erase system metadata prior to reformat. + + Raises: + CommandFailure: if the command fails. + + Returns: + dict: the dmg json command output converted to a python dictionary + + """ + return self._get_json_result(("system", "erase")) + + def system_exclude(self, ranks=None, rank_hosts=None): + """Call dmg system exclude. + + Either ranks or rank_hosts is required. + + Args: + ranks (str, optional): Comma separated rank-ranges to exclude e.g. "0,2-5". + rank_hosts (str, optional): hostlist representing hosts whose managed ranks are to be + operated on. + + Raises: + CommandFailure: if the command fails. + + Returns: + dict: the dmg json command output converted to a python dictionary + + """ + return self._get_json_result( + ("system", "exclude"), ranks=ranks, rank_hosts=rank_hosts) + + def system_leader_query(self): + """Call dmg system leader-query. + + Raises: + CommandFailure: if the command fails. + + Returns: + dict: the dmg json command output converted to a python dictionary + + """ + return self._get_json_result(("system", "leader-query")) + def system_query(self, ranks=None, verbose=True): """Query system to obtain the status of the servers. @@ -1106,60 +1217,56 @@ def system_query(self, ranks=None, verbose=True): return self._get_json_result( ("system", "query"), ranks=ranks, verbose=verbose) - def system_leader_query(self): - """Query system to obtain the MS leader and replica information. + def system_reintegrate(self, ranks=None, rank_hosts=None): + """Call dmg system reintegrate. + + Args: + ranks (str, optional): Comma separated rank-ranges to exclude e.g. "0,2-5". + rank_hosts (str, optional): hostlist representing hosts whose managed ranks are to be + operated on. Raises: - CommandFailure: if the dmg system query command fails. + CommandFailure: if the dmg system reintegrate command fails. Returns: dict: the dmg json command output converted to a python dictionary """ - # Example JSON output: - # { - # "response": { - # "current_leader": "127.0.0.1:10001", - # "replicas": [ - # "127.0.0.1:10001" - # ] - # }, - # "error": null, - # "status": 0 - # } - return self._get_json_result(("system", "leader-query")) + return self._get_json_result( + ("system", "reintegrate"), ranks=ranks, rank_hosts=rank_hosts) - def system_erase(self): - """Erase system metadata prior to reformat. + def system_rebuild_start(self, verbose=False): + """Call dmg system rebuild start. + + Args: + verbose (str, optional): Print pool identifiers Raises: - CommandFailure: if the dmg system erase command fails. + CommandFailure: if the dmg system rebuild start command fails. Returns: dict: the dmg json command output converted to a python dictionary """ - return self._get_json_result(("system", "erase")) - - def system_exclude(self, ranks, rank_hosts): - """Exclude ranks from system. + return self._get_json_result( + ("system", "rebuild", "start"), verbose=verbose) - Either ranks or rank_hosts is necessary. Pass in None to one of them. + def system_rebuild_stop(self, verbose=False, force=False): + """Call dmg system rebuild stop. Args: - ranks (str): Comma separated rank-ranges to exclude e.g. "0,2-5". - rank_hosts (str): hostlist representing hosts whose managed ranks are to be - operated on. + verbose (str, optional): Print pool identifiers + force (str, optional): Forcibly stop interactive rebuild Raises: - CommandFailure: if the dmg system exclude command fails. + CommandFailure: if the dmg system rebuild start command fails. Returns: dict: the dmg json command output converted to a python dictionary """ return self._get_json_result( - ("system", "exclude"), ranks=ranks, rank_hosts=rank_hosts) + ("system", "rebuild", "stop"), verbose=verbose, force=force) def system_start(self, ranks=None, ignore_admin_excluded=False): """Start the system. @@ -1221,22 +1328,6 @@ def system_stop(self, force=False, ranks=None): data[rank] = info[1].strip() return data - def pool_evict(self, pool): - """Evict a pool. - - Args: - pool (str): UUID of DAOS pool to evict connection to - - Returns: - CmdResult: Object that contains exit status, stdout, and other - information. - - Raises: - CommandFailure: if the dmg pool evict command fails. - - """ - return self._get_result(("pool", "evict"), pool=pool) - def config_generate(self, mgmt_svc_replicas, num_engines=None, scm_only=False, net_class=None, net_provider=None, use_tmpfs_scm=False, control_metadata_path=None): diff --git a/src/tests/ftest/util/dmg_utils_base.py b/src/tests/ftest/util/dmg_utils_base.py index bbacbd19088..be13a06c107 100644 --- a/src/tests/ftest/util/dmg_utils_base.py +++ b/src/tests/ftest/util/dmg_utils_base.py @@ -413,6 +413,8 @@ def get_sub_command_class(self): self.sub_command_class = self.QuerySubCommand() elif self.sub_command.value == "query-targets": self.sub_command_class = self.QueryTargetsSubCommand() + elif self.sub_command.value == "rebuild": + self.sub_command_class = self.RebuildSubCommand() elif self.sub_command.value == "set-prop": self.sub_command_class = self.SetPropSubCommand() elif self.sub_command.value == "update-acl": @@ -560,6 +562,40 @@ def __init__(self): self.rank = FormattedParameter("--rank={}", None) self.target_idx = FormattedParameter("--target-idx={}", None) + class RebuildSubCommand(CommandWithSubCommand): + """Defines an object for the dmg pool rebuild command.""" + + def __init__(self): + """Create a dmg pool rebuild command object.""" + super().__init__("/run/dmg/pool/rebuild/*", "rebuild") + + def get_sub_command_class(self): + # pylint: disable=redefined-variable-type + """Get the dmg pool sub command object.""" + if self.sub_command.value == "start": + self.sub_command_class = self.StartSubCommand() + elif self.sub_command.value == "stop": + self.sub_command_class = self.StopSubCommand() + else: + self.sub_command_class = None + + class StartSubCommand(CommandWithParameters): + """Defines an object for dmg pool rebuild start command.""" + + def __init__(self): + """Create a dmg pool rebuild start command object.""" + super().__init__("/run/dmg/pool/rebuild/start/*", "start") + self.pool = BasicParameter(None, position=1) + + class StopSubCommand(CommandWithParameters): + """Defines an object for dmg pool rebuild stop command.""" + + def __init__(self): + """Create a dmg pool rebuild stop command object.""" + super().__init__("/run/dmg/pool/rebuild/stop/*", "stop") + self.pool = BasicParameter(None, position=1) + self.force = FormattedParameter("--force", False) + class ReintegrateSubCommand(CommandWithParameters): """Defines an object for dmg pool reintegrate command.""" @@ -843,6 +879,8 @@ def get_sub_command_class(self): self.sub_command_class = self.CleanupSubCommand() elif self.sub_command.value == "clear-exclude": self.sub_command_class = self.ClearExcludeSubCommand() + elif self.sub_command.value == "drain": + self.sub_command_class = self.DrainSubCommand() elif self.sub_command.value == "erase": self.sub_command_class = self.EraseSubCommand() elif self.sub_command.value == "exclude": @@ -853,6 +891,10 @@ def get_sub_command_class(self): self.sub_command_class = self.ListPoolsSubCommand() elif self.sub_command.value == "query": self.sub_command_class = self.QuerySubCommand() + elif self.sub_command.value == "rebuild": + self.sub_command_class = self.RebuildSubCommand() + elif self.sub_command.value == "reintegrate": + self.sub_command_class = self.ReintegrateSubCommand() elif self.sub_command.value == "start": self.sub_command_class = self.StartSubCommand() elif self.sub_command.value == "stop": @@ -878,6 +920,15 @@ def __init__(self): self.ranks = FormattedParameter("--ranks={}") self.rank_hosts = FormattedParameter("--rank-hosts={}") + class DrainSubCommand(CommandWithParameters): + """Defines an object for the dmg system drain command.""" + + def __init__(self): + """Create a dmg system drain command object.""" + super().__init__("/run/dmg/system/drain/*", "drain") + self.ranks = FormattedParameter("--ranks={}") + self.rank_hosts = FormattedParameter("--rank-hosts={}") + class EraseSubCommand(CommandWithParameters): """Defines an object for the dmg system erase command.""" @@ -917,6 +968,49 @@ def __init__(self): self.ranks = FormattedParameter("--ranks={}") self.verbose = FormattedParameter("--verbose", False) + class ReintegrateSubCommand(CommandWithParameters): + """Defines an object for the dmg system reintegrate command.""" + + def __init__(self): + """Create a dmg system reintegrate command object.""" + super().__init__("/run/dmg/system/reintegrate/*", "reintegrate") + self.ranks = FormattedParameter("--ranks={}") + self.rank_hosts = FormattedParameter("--rank-hosts={}") + + class RebuildSubCommand(CommandWithSubCommand): + """Defines an object for the dmg system rebuild command.""" + + def __init__(self): + """Create a dmg system rebuild command object.""" + super().__init__("/run/dmg/system/rebuild/*", "rebuild") + + def get_sub_command_class(self): + # pylint: disable=redefined-variable-type + """Get the dmg system sub command object.""" + if self.sub_command.value == "start": + self.sub_command_class = self.StartSubCommand() + elif self.sub_command.value == "stop": + self.sub_command_class = self.StopSubCommand() + else: + self.sub_command_class = None + + class StartSubCommand(CommandWithParameters): + """Defines an object for the dmg system rebuild start command.""" + + def __init__(self): + """Create a dmg system rebuild start command object.""" + super().__init__("/run/dmg/system/rebuild/start/*", "start") + self.verbose = FormattedParameter("--verbose", False) + + class StopSubCommand(CommandWithParameters): + """Defines an object for the dmg system rebuild stop command.""" + + def __init__(self): + """Create a dmg system rebuild stop command object.""" + super().__init__("/run/dmg/system/rebuild/stop/*", "stop") + self.verbose = FormattedParameter("--verbose", False) + self.force = FormattedParameter("--force", False) + class StartSubCommand(CommandWithParameters): """Defines an object for the dmg system start command.""" diff --git a/src/tests/ftest/util/test_utils_pool.py b/src/tests/ftest/util/test_utils_pool.py index 4895f858873..c6cdde5c3ba 100644 --- a/src/tests/ftest/util/test_utils_pool.py +++ b/src/tests/ftest/util/test_utils_pool.py @@ -847,6 +847,24 @@ def reintegrate(self, ranks, tgt_idx=None): """ return self.dmg.pool_reintegrate(self.identifier, ranks, tgt_idx) + def rebuild_start(self, *args, **kwargs): + """Use dmg to start rebuild on this pool. + + Returns: + CmdResult: Object that contains exit status, stdout, and other information. + + """ + return self.dmg.pool_rebuild_start(self.identifier, *args, **kwargs) + + def rebuild_stop(self, *args, **kwargs): + """Use dmg to stop rebuild on this pool. + + Returns: + CmdResult: Object that contains exit status, stdout, and other information. + + """ + return self.dmg.pool_rebuild_stop(self.identifier, *args, **kwargs) + @fail_on(CommandFailure) def set_property(self, prop_name, prop_value): """Set Property. @@ -1387,6 +1405,9 @@ def _update_rebuild_data(self, verbose=True): # If the current state is busy or idle w/o a version increase after previously being # busy then rebuild is running self._rebuild_data["check"] = "running" + elif self._rebuild_data["state"] == "idle" and self._rebuild_data["status"] == -2027: + # Rebuild was explicitly stopped + self._rebuild_data["check"] = "stopped" elif self._rebuild_data["check"] is None: # Otherwise rebuild has yet to start self._rebuild_data["check"] = "not yet started" @@ -1398,8 +1419,8 @@ def _wait_for_rebuild(self, expected, interval=1): """Wait for the rebuild to start or end. Args: - expected (str): which rebuild data check to wait for: 'running' or 'completed' - interval (int): number of seconds to wait in between rebuild completion checks + expected (str): which rebuild data check to wait for: 'running', 'completed', 'stopped' + interval (int, optional): number of seconds to wait between checks. Defaults to 1. Raises: DaosTestError: if waiting for rebuild times out. @@ -1461,7 +1482,7 @@ def wait_for_rebuild_to_start(self, interval=1): """Wait for the rebuild to start. Args: - interval (int): number of seconds to wait in between rebuild completion checks + interval (int, optional): number of seconds to wait between checks. Defaults to 1. Raises: DaosTestError: if waiting for rebuild times out. @@ -1473,7 +1494,7 @@ def wait_for_rebuild_to_end(self, interval=1): """Wait for the rebuild to end. Args: - interval (int): number of seconds to wait in between rebuild completion checks + interval (int, optional): number of seconds to wait between checks. Defaults to 1. Raises: DaosTestError: if waiting for rebuild times out. @@ -1481,6 +1502,18 @@ def wait_for_rebuild_to_end(self, interval=1): """ self._wait_for_rebuild("completed", interval) + def wait_for_rebuild_to_stop(self, interval=1): + """Wait for the rebuild to stop without completing. + + Args: + interval (int, optional): number of seconds to wait between checks. Defaults to 1. + + Raises: + DaosTestError: if waiting for rebuild times out. + + """ + self._wait_for_rebuild("stopped", interval) + def measure_rebuild_time(self, operation, interval=1): """Measure rebuild time. From 2247c07aeaa9899624b94864ffc6aa2a10dae2f4 Mon Sep 17 00:00:00 2001 From: Dalton Bohning Date: Fri, 19 Dec 2025 08:09:29 -0800 Subject: [PATCH 093/253] DAOS-16260 test: disable performance enforcement in intercept_multi_client (#17285) Since performance checks in CI are not reliable. Signed-off-by: Dalton Bohning --- src/tests/ftest/ior/intercept_multi_client.yaml | 16 ++++++++++------ src/tests/ftest/util/ior_intercept_test_base.py | 9 +++++++-- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/src/tests/ftest/ior/intercept_multi_client.yaml b/src/tests/ftest/ior/intercept_multi_client.yaml index 94a0508fbbb..4831c58ebb8 100644 --- a/src/tests/ftest/ior/intercept_multi_client.yaml +++ b/src/tests/ftest/ior/intercept_multi_client.yaml @@ -1,7 +1,9 @@ hosts: test_servers: 2 test_clients: 6 -timeout: 1000 + +timeout: 760 + server_config: name: daos_server engines_per_host: 2 @@ -22,12 +24,13 @@ server_config: log_file: daos_server1.log log_mask: WARN storage: auto + pool: size: 90% - svcn: 1 + container: type: POSIX - control_method: daos + ior: env_vars: - D_LOG_MASK=WARN @@ -35,13 +38,13 @@ ior: client_processes: ppn: 16 test_file: testFile - repetitions: 3 sw_deadline: 60 flags: "-v -w -r -R" - dfs_oclass: "SX" - block_size: '100G' + dfs_oclass: SX + block_size: 100G write_x: 0.10 # Max 10% performance difference. read_x: 0.10 # Loosely derived from 3% stddev + 8% actual deviation. + enforce_performance: false # Skip enformance in CI since it is flaky. transfersize: !mux 512B: transfer_size: '512B' @@ -49,5 +52,6 @@ ior: transfer_size: '4K' 1M: transfer_size: '1M' + dfuse: disable_caching: true diff --git a/src/tests/ftest/util/ior_intercept_test_base.py b/src/tests/ftest/util/ior_intercept_test_base.py index fb28c7356ca..34fa3a550c5 100644 --- a/src/tests/ftest/util/ior_intercept_test_base.py +++ b/src/tests/ftest/util/ior_intercept_test_base.py @@ -1,5 +1,6 @@ """ (C) Copyright 2019-2023 Intel Corporation. + (C) Copyright 2025 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -31,6 +32,7 @@ def run_il_perf_check(self, libname): # Write and read performance thresholds write_x = self.params.get("write_x", self.ior_cmd.namespace, None) read_x = self.params.get("read_x", self.ior_cmd.namespace, None) + enforce_performance = self.params.get("enforce_performance", self.ior_cmd.namespace, True) if write_x is None or read_x is None: self.fail("Failed to get write_x and read_x from config") @@ -81,5 +83,8 @@ def run_il_perf_check(self, libname): self.log.info("DFUSE IL Max Read: %.2f", dfuse_max_read) self.log.info("Percent Diff: %.2f%%", actual_read_x * 100) - self.assertLessEqual(abs(actual_write_x), write_x, "Max Write Diff too large") - self.assertLessEqual(abs(actual_read_x), read_x, "Max Read Diff too large") + if enforce_performance: + self.assertLessEqual(abs(actual_write_x), write_x, "Max Write Diff too large") + self.assertLessEqual(abs(actual_read_x), read_x, "Max Read Diff too large") + else: + self.log.info("Skipping performance enforcement checks") From 7faa4d47ec0741bd3ba3cdabeb1cc4f46d40b808 Mon Sep 17 00:00:00 2001 From: Tom Nabarro Date: Fri, 19 Dec 2025 17:19:00 +0000 Subject: [PATCH 094/253] DAOS-17468 control: Prevent start if transparent hugepages are enabled (#16313) When THP feature is enabled on linux platforms, SPDK related hugepage management in DAOS performs sub-optimally. Resulting problems relate to memory accounting and fragmentation. To remedy, refuse to start daos_server if THP is enabled on platform and recommend disabling THP by applying kernel commandline parameters effective on reboot. Signed-off-by: Tom Nabarro --- docs/admin/deployment.md | 57 +++++++++++---- docs/admin/predeployment_check.md | 27 ++++++++ src/control/cmd/dmg/auto_test.go | 1 + src/control/fault/code/codes.go | 2 + .../hardware/defaults/topology/defaults.go | 7 ++ src/control/lib/hardware/sysfs/provider.go | 21 ++++++ .../lib/hardware/sysfs/provider_test.go | 63 +++++++++++++++++ src/control/lib/hardware/thp.go | 14 ++++ src/control/server/config/faults.go | 9 +++ src/control/server/config/server.go | 17 +++++ src/control/server/config/server_test.go | 65 ++++++++++++----- src/control/server/faults.go | 5 ++ src/control/server/mocks.go | 20 ++++++ src/control/server/server.go | 8 +-- src/control/server/server_utils.go | 18 ++++- src/control/server/server_utils_test.go | 69 ++++++++++++++++--- src/control/server/storage/config.go | 5 +- utils/config/daos_server.yml | 16 +++++ 18 files changed, 374 insertions(+), 50 deletions(-) create mode 100644 src/control/lib/hardware/thp.go diff --git a/docs/admin/deployment.md b/docs/admin/deployment.md index 879c415092d..88e0c4bd16c 100644 --- a/docs/admin/deployment.md +++ b/docs/admin/deployment.md @@ -438,7 +438,7 @@ per engine. The command redirects stderr to /dev/null and stdout to a temporary installation is from a source build. ```bash -[user@wolf-226 daos]$ install/bin/daos_server config generate -p ofi+tcp --use-tmpfs-scm 2>/dev/null | tee ~/configs/tmp.yml +$ daos_server config generate -p ofi+tcp --use-tmpfs-scm 2>/dev/null | tee ~/configs/tmp.yml port: 10001 transport_config: allow_insecure: false @@ -496,6 +496,7 @@ disable_vmd: false enable_hotplug: false nr_hugepages: 16384 disable_hugepages: false +allow_thp: false control_log_mask: INFO control_log_file: /var/log/daos/daos_server.log core_dump_filter: 19 @@ -512,7 +513,7 @@ Now we start the `daos_server` service from the generated config which loads suc and runs until the point where a storage format is required, as expected. ```bash -[user@wolf-226 daos]$ install/bin/daos_server start -i -o ~/configs/tmp.yml +$ daos_server start -i -o ~/configs/tmp.yml DAOS Server config loaded from /home/user/configs/tmp.yml install/bin/daos_server logging to file /tmp/daos_server.log NOTICE: Configuration includes only one MS replica. This provides no redundancy in the event of a MS replica failure. @@ -534,12 +535,12 @@ Note the subsequent system query command may not show ranks started immediately format command returns so it is recommended to leave a short delay (~5s) before invoking. ```bash -[user@wolf-226 daos]$ install/bin/dmg storage format -i +$ dmg storage format -i Format Summary: Hosts SCM Devices NVMe Devices ----- ----------- ------------ localhost 2 16 -[user@wolf-226 daos]$ install/bin/dmg system query -i +$ dmg system query -i Rank State ---- ----- [0-1] Joined @@ -564,17 +565,17 @@ daos_engine:1 Using NUMA core allocation algorithm SCM @ /mnt/daos0: 91 GB Total/91 GB Avail Starting I/O Engine instance 0: /home/user/projects/daos/install/bin/daos_engine daos_engine:0 Using NUMA core allocation algorithm -MS leader running on wolf-226.wolf.hpdd.intel.com -daos_engine:1 DAOS I/O Engine (v2.3.101) process 1215202 started on rank 1 with 16 target, 4 helper XS, firstcore 0, host wolf-226.wolf.hpdd.intel.com. +MS leader running on wolf-226.domain +daos_engine:1 DAOS I/O Engine (v2.3.101) process 1215202 started on rank 1 with 16 target, 4 helper XS, firstcore 0, host wolf-226.domain. Using NUMA node: 1 -daos_engine:0 DAOS I/O Engine (v2.3.101) process 1215209 started on rank 0 with 16 target, 4 helper XS, firstcore 0, host wolf-226.wolf.hpdd.intel.com. +daos_engine:0 DAOS I/O Engine (v2.3.101) process 1215209 started on rank 0 with 16 target, 4 helper XS, firstcore 0, host wolf-226.domain. Using NUMA node: 0 ``` For reference, the hardware scan results for the target storage server are included below. ```bash -[user@wolf-226 daos]$ install/bin/daos_server nvme scan +$ daos_server nvme scan Scan locally-attached NVMe storage... NVMe PCI Model FW Revision Socket ID Capacity -------- ----- ----------- --------- -------- @@ -595,7 +596,7 @@ NVMe PCI Model FW Revision Socket ID Capacity 0000:e0:00.0 MZXLR3T8HBLS-000H3 MPK7525Q 1 3.8 TB 0000:e1:00.0 MZXLR3T8HBLS-000H3 MPK7525Q 1 3.8 TB -[user@wolf-226 daos]$ install/bin/daos_server network scan +$ daos_server network scan --------- localhost --------- @@ -807,6 +808,32 @@ configuration file with a populated per-engine section can be stored in `/etc/daos/daos_server.yml`, and after reestarting the `daos_server` service it is then ready for the storage to be formatted. + +### Transparent HugePage (THP) support + +DAOS relies on the use of hugepages in a dedicated manner and turning on transparent hugepages means +the hugepage memory pool gets used in a model more like a cache. This can have adverse effects on +DAOS behavior and may cause OOM and DMA buffer allocation failures at high load. + +By default the server will fail to start and exit when the server is started with THP enabled. + +```bash +DEBUG 2025/12/14 09:54:32.537839 main.go:87: server: code = 623 description = "transparent hugepage (THP) enabled on storage server, DAOS requires THP to be disabled" +ERROR: server: code = 623 description = "transparent hugepage (THP) enabled on storage server, DAOS requires THP to be disabled" +ERROR: server: code = 623 resolution = "disable THP by adding 'transparent_hugepage=never' kernel parameter in the grub configuration file then reboot and restart daos_server" +``` + +The following command can be used to verify whether THP is enabled: + +```bash +cat /sys/kernel/mm/transparent_hugepage/enabled +[always] madvise never +``` + +If `allow_thp: true` parameter is set in server config file global section, the behavior will change +and the server will start with THP enabled. + + ## DAOS Server Remote Access Remote tasking of the DAOS system and individual DAOS Server processes can be @@ -895,7 +922,7 @@ resetting modules into "MemoryMode" through resource allocations. A subsequent reboot is required for BIOS to read the new resource allocations. -#### Multiple PMem namespaces per socket (Experimental) +#### Multiple PMem namespaces per socket By default the `daos_server scm prepare` command will create one PMem namespace on each PMem region. @@ -968,12 +995,12 @@ fallback to using UIO user-space driver with SPDK instead. The output will be equivalent running `dmg storage scan --verbose` remotely. ```bash -bash-4.2$ dmg storage scan +$ dmg storage scan Hosts SCM Total NVMe Total ----- --------- ---------- wolf-[71-72] 6.4 TB (2 namespaces) 3.1 TB (3 controllers) -bash-4.2$ dmg storage scan --verbose +$ dmg storage scan --verbose ------------ wolf-[71-72] ------------ @@ -1018,7 +1045,7 @@ manual reset to do so. SSD health state can be verified via `dmg storage scan --nvme-health`: ```bash -bash-4.2$ dmg storage scan --nvme-health +$ dmg storage scan --nvme-health ------- wolf-71 ------- @@ -1298,7 +1325,7 @@ To illustrate, assume a cluster with homogeneous hardware configurations that returns the following from scan for each host: ```bash -[daos@wolf-72 daos_m]$ dmg -l wolf-7[1-2] storage scan --verbose +$ dmg -l wolf-7[1-2] storage scan --verbose ------- wolf-7[1-2] ------- @@ -1544,7 +1571,7 @@ Upon successful format, DAOS Control Servers will start DAOS I/O engines that have been specified in the server config file. Successful start-up is indicated by the following on stdout: -`DAOS I/O Engine (v2.0.1) process 433456 started on rank 1 with 8 target, 2 helper XS, firstcore 0, host wolf-72.wolf.hpdd.intel.com.` +`DAOS I/O Engine (v2.0.1) process 433456 started on rank 1 with 8 target, 2 helper XS, firstcore 0, host wolf-72.domain.` ### SCM Format diff --git a/docs/admin/predeployment_check.md b/docs/admin/predeployment_check.md index 8b5a391d8a9..6a0d632a650 100644 --- a/docs/admin/predeployment_check.md +++ b/docs/admin/predeployment_check.md @@ -530,3 +530,30 @@ Current LBA Format: LBA Format #03 Displayed details for controller show LBA format is now "#03". Perform the above process for all SSDs that will be used by DAOS. + + +## Hugepage allocation and memory fragmentation + +DAOS uses linux hugepages for DMA buffer allocation. If hugepage memory becomes fragmented, DMA +buffer allocations may fail because of insufficient contiguous memory availability. + +By default DAOS will allocate necessary hugepages at runtime based on supplied server file +configuration details (mainly the number of engine targets). Runtime allocation of hugepages +may cause fragmentation over time. + +To reduce the chance of memory fragmentation, hugepages can be allocated on the kernel boot +command line by specifying the "hugepages=N" parameter, where 'N' = the number of huge pages +requested. + +[See here for details of allocating hugepages at +boot](https://docs.redhat.com/en/documentation/red_hat_enterprise_linux/7/html/performance_tuning_guide/sect-red_hat_enterprise_linux-performance_tuning_guide-memory-configuring-huge-pages) + + +## Disabling transparent hugepage (THP) feature at boot-time + +Linux transparent hugepages feature can increase the likelihood of hugepage memory fragmentation +and should be disabled for optimal performance of DAOS. + +[See here for details of how to disable THP on +boot](https://docs.kernel.org/admin-guide/mm/transhuge.html#boot-parameters) + diff --git a/src/control/cmd/dmg/auto_test.go b/src/control/cmd/dmg/auto_test.go index 92bcc9fde86..9ebf8aa3b62 100644 --- a/src/control/cmd/dmg/auto_test.go +++ b/src/control/cmd/dmg/auto_test.go @@ -595,6 +595,7 @@ nr_hugepages: 0 system_ram_reserved: 64 disable_hugepages: false allow_numa_imbalance: false +allow_thp: false control_log_mask: INFO control_log_file: /var/log/daos/daos_server.log core_dump_filter: 19 diff --git a/src/control/fault/code/codes.go b/src/control/fault/code/codes.go index 3ce9299e1e7..56282d003d2 100644 --- a/src/control/fault/code/codes.go +++ b/src/control/fault/code/codes.go @@ -161,6 +161,7 @@ const ( ServerBadFaultDomainLabels ServerJoinReplaceEnabledPoolRank ServerRankAdminExcluded + ServerTransparentHugepageEnabled ) // server config fault codes @@ -203,6 +204,7 @@ const ( ServerConfigEnableHotplugDeprecated ServerConfigBdevExcludeClash ServerConfigHugepagesDisabledWithNrSet + ServerConfigScmHugeEnabled ) // SPDK library bindings codes diff --git a/src/control/lib/hardware/defaults/topology/defaults.go b/src/control/lib/hardware/defaults/topology/defaults.go index b7e764aeca7..d9aade97ce4 100644 --- a/src/control/lib/hardware/defaults/topology/defaults.go +++ b/src/control/lib/hardware/defaults/topology/defaults.go @@ -1,5 +1,6 @@ // // (C) Copyright 2021-2024 Intel Corporation. +// (C) Copyright 2025 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -42,3 +43,9 @@ func DefaultProcessNUMAProvider(log logging.Logger) hardware.ProcessNUMAProvider func DefaultIOMMUDetector(log logging.Logger) hardware.IOMMUDetector { return sysfs.NewProvider(log) } + +// DefaultTHPDetector gets the default provider for the transparent hugepage +// detector. +func DefaultTHPDetector(log logging.Logger) hardware.THPDetector { + return sysfs.NewProvider(log) +} diff --git a/src/control/lib/hardware/sysfs/provider.go b/src/control/lib/hardware/sysfs/provider.go index 0a253188344..7ed420bdc21 100644 --- a/src/control/lib/hardware/sysfs/provider.go +++ b/src/control/lib/hardware/sysfs/provider.go @@ -1,5 +1,6 @@ // // (C) Copyright 2021-2024 Intel Corporation. +// (C) Copyright 2025 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -601,3 +602,23 @@ func (s *Provider) IsIOMMUEnabled() (bool, error) { return err == nil && len(dmars) > 0, nil } + +// IsTHPEnabled checks whether transparent hugepages is enabled by interrogating sysfs and +// implements the THPDetector interface on sysfs provider. +func (s *Provider) IsTHPEnabled() (bool, error) { + if s == nil { + return false, errors.New("sysfs provider is nil") + } + + thpStatePath := s.sysPath("kernel", "mm", "transparent_hugepage", "enabled") + thpState, err := os.ReadFile(thpStatePath) + if err != nil { + return false, errors.Wrap(err, "unable to get transparent hugepage state") + } + + thpStateStr := strings.TrimSuffix(string(thpState), "\n") + strToks := strings.Split(thpStateStr, " ") + isDisabled := common.Includes(strToks, "[never]") + + return !isDisabled, nil +} diff --git a/src/control/lib/hardware/sysfs/provider_test.go b/src/control/lib/hardware/sysfs/provider_test.go index 6375654d815..791c7de49f9 100644 --- a/src/control/lib/hardware/sysfs/provider_test.go +++ b/src/control/lib/hardware/sysfs/provider_test.go @@ -1,5 +1,6 @@ // // (C) Copyright 2021-2024 Intel Corporation. +// (C) Copyright 2025 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -1348,3 +1349,65 @@ func TestSysfs_Provider_IsIOMMUEnabled(t *testing.T) { }) } } + +func setupTestIsTHPEnabled(t *testing.T, root, txt string, extraDirs ...string) { + t.Helper() + + dirs := append([]string{root}, extraDirs...) + + path := filepath.Join(dirs...) + os.MkdirAll(path, 0755) + + contents := []byte(txt + "\n") + if err := os.WriteFile(filepath.Join(path, "enabled"), contents, 0644); err != nil { + t.Fatal(err) + } +} + +func TestSysfs_Provider_IsTHPEnabled(t *testing.T) { + for name, tc := range map[string]struct { + nilProvider bool + extraDirs []string + enableText string + expResult bool + expErr error + }{ + "nil provider": { + nilProvider: true, + expErr: errors.New("provider is nil"), + }, + "missing thp dir": { + extraDirs: []string{"kernel", "mm"}, + expErr: errors.New("no such file or directory"), + }, + "thp enabled": { + extraDirs: []string{"kernel", "mm", "transparent_hugepage"}, + enableText: "[always] madvise never", + expResult: true, + }, + "thp disabled": { + extraDirs: []string{"kernel", "mm", "transparent_hugepage"}, + enableText: "always madvise [never]", + }, + } { + t.Run(name, func(t *testing.T) { + testDir, cleanupTestDir := test.CreateTestDir(t) + defer cleanupTestDir() + + log, buf := logging.NewTestLogger(name) + defer test.ShowBufferOnFailure(t, buf) + + var p *Provider + if !tc.nilProvider { + p = NewProvider(log) + p.root = testDir + setupTestIsTHPEnabled(t, testDir, tc.enableText, tc.extraDirs...) + } + + result, err := p.IsTHPEnabled() + + test.CmpErr(t, tc.expErr, err) + test.AssertEqual(t, tc.expResult, result, "") + }) + } +} diff --git a/src/control/lib/hardware/thp.go b/src/control/lib/hardware/thp.go new file mode 100644 index 00000000000..cea54e5dd7c --- /dev/null +++ b/src/control/lib/hardware/thp.go @@ -0,0 +1,14 @@ +// +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + +package hardware + +type ( + // THPDetector is an interface for detecting if transparent hugepages is enabled on a + // system. + THPDetector interface { + IsTHPEnabled() (bool, error) + } +) diff --git a/src/control/server/config/faults.go b/src/control/server/config/faults.go index 4a2088f4220..5bc36a3c85d 100644 --- a/src/control/server/config/faults.go +++ b/src/control/server/config/faults.go @@ -167,6 +167,15 @@ func FaultConfigScmDiffClass(curIdx, seenIdx int) *fault.Fault { ) } +func FaultConfigScmDiffHugeEnabled(curIdx, seenIdx int) *fault.Fault { + return serverConfigFault( + code.ServerConfigScmHugeEnabled, + fmt.Sprintf("the scm_hugepages_disabled in engine %d is different from engine %d", + curIdx, seenIdx), + "ensure that each I/O Engine has the same setting for this parameter and restart", + ) +} + func FaultConfigOverlappingBdevDeviceList(curIdx, seenIdx int) *fault.Fault { return serverConfigFault( code.ServerConfigOverlappingBdevDeviceList, diff --git a/src/control/server/config/server.go b/src/control/server/config/server.go index f61d7cd58d2..3ea27442a6f 100644 --- a/src/control/server/config/server.go +++ b/src/control/server/config/server.go @@ -68,6 +68,7 @@ type Server struct { SystemRamReserved int `yaml:"system_ram_reserved"` // total for all engines DisableHugepages bool `yaml:"disable_hugepages"` AllowNumaImbalance bool `yaml:"allow_numa_imbalance"` + AllowTHP bool `yaml:"allow_thp"` ControlLogMask common.ControlLogLevel `yaml:"control_log_mask"` ControlLogFile string `yaml:"control_log_file,omitempty"` ControlLogJSON bool `yaml:"control_log_json,omitempty"` @@ -299,6 +300,12 @@ func (cfg *Server) WithAllowNumaImbalance(allowed bool) *Server { return cfg } +// WithAllowTHP allows DAOS server to run with transparent hugepage support enabled. +func (cfg *Server) WithAllowTHP(allowed bool) *Server { + cfg.AllowTHP = allowed + return cfg +} + // WithSystemRamReserved sets the amount of system memory to reserve for system (non-DAOS) // use. In units of GiB. func (cfg *Server) WithSystemRamReserved(nr int) *Server { @@ -897,6 +904,8 @@ func (cfg *Server) validateMultiEngineConfig(log logging.Logger) error { seenHelperStreamCount := -1 seenScmCls := storage.ClassNone seenScmClsIdx := -1 + seenScmHuge := false + seenScmHugeIdx := -1 for idx, engine := range cfg.Engines { fabricConfig := fmt.Sprintf("fabric:%q-%q-%q", @@ -943,6 +952,14 @@ func (cfg *Server) validateMultiEngineConfig(log logging.Logger) error { } seenScmCls = scmConf.Class seenScmClsIdx = idx + + if seenScmHugeIdx != -1 && scmConf.Scm.DisableHugepages != seenScmHuge { + log.Debugf("scm_hugepages_disabled entry %v in %d doesn't match %d", + scmConf.Scm.DisableHugepages, idx, seenScmHugeIdx) + return FaultConfigScmDiffHugeEnabled(idx, seenScmHugeIdx) + } + seenScmHuge = scmConf.Scm.DisableHugepages + seenScmHugeIdx = idx } bdevs := engine.Storage.GetBdevs() diff --git a/src/control/server/config/server_test.go b/src/control/server/config/server_test.go index 4c8cac655d4..3bf190b9b61 100644 --- a/src/control/server/config/server_test.go +++ b/src/control/server/config/server_test.go @@ -264,7 +264,8 @@ func TestServerConfig_Constructed(t *testing.T) { WithFabricAuthKey("foo:bar"). WithHyperthreads(true). // hyper-threads disabled by default WithSystemRamReserved(5). - WithAllowNumaImbalance(true) + WithAllowNumaImbalance(true). + WithAllowTHP(true) // add engines explicitly to test functionality applied in WithEngines() constructed.Engines = []*engine.Config{ @@ -278,7 +279,7 @@ func TestServerConfig_Constructed(t *testing.T) { storage.NewTierConfig(). WithScmMountPoint("/mnt/daos/1"). WithStorageClass("ram"). - WithScmDisableHugepages(), + WithScmHugepagesDisabled(), storage.NewTierConfig(). WithStorageClass("nvme"). WithBdevDeviceList("0000:81:00.0", "0000:82:00.0"). @@ -306,7 +307,8 @@ func TestServerConfig_Constructed(t *testing.T) { WithStorage( storage.NewTierConfig(). WithScmMountPoint("/mnt/daos/2"). - WithStorageClass("ram"), + WithStorageClass("ram"). + WithScmHugepagesDisabled(), storage.NewTierConfig(). WithStorageClass("file"). WithBdevDeviceList("/tmp/daos-bdev1", "/tmp/daos-bdev2"). @@ -694,7 +696,7 @@ func TestServerConfig_Validation(t *testing.T) { storage.NewTierConfig(). WithScmMountPoint("/mnt/daos/1"). WithStorageClass("ram"). - WithScmDisableHugepages(), + WithScmHugepagesDisabled(), storage.NewTierConfig(). WithStorageClass("nvme"). WithBdevDeviceList("0000:81:00.0", "0000:82:00.0"). @@ -707,7 +709,7 @@ func TestServerConfig_Validation(t *testing.T) { storage.NewTierConfig(). WithScmMountPoint("/mnt/daos/2"). WithStorageClass("ram"). - WithScmDisableHugepages(), + WithScmHugepagesDisabled(), storage.NewTierConfig(). WithStorageClass("nvme"). WithBdevDeviceList("0000:91:00.0", "0000:92:00.0"). @@ -729,7 +731,7 @@ func TestServerConfig_Validation(t *testing.T) { storage.NewTierConfig(). WithScmMountPoint("/mnt/daos/1"). WithStorageClass("ram"). - WithScmDisableHugepages(), + WithScmHugepagesDisabled(), storage.NewTierConfig(). WithStorageClass("nvme"). WithBdevDeviceList("0000:81:00.0", "0000:82:00.0"). @@ -752,7 +754,7 @@ func TestServerConfig_Validation(t *testing.T) { storage.NewTierConfig(). WithScmMountPoint("/mnt/daos/2"). WithStorageClass("ram"). - WithScmDisableHugepages(), + WithScmHugepagesDisabled(), storage.NewTierConfig(). WithStorageClass("nvme"). WithBdevDeviceList("0000:91:00.0", "0000:92:00.0"). @@ -783,7 +785,7 @@ func TestServerConfig_Validation(t *testing.T) { storage.NewTierConfig(). WithScmMountPoint("/mnt/daos/1"). WithStorageClass("ram"). - WithScmDisableHugepages(), + WithScmHugepagesDisabled(), storage.NewTierConfig(). WithStorageClass("nvme"). WithBdevDeviceList("0000:81:00.0", "0000:82:00.0"). @@ -804,7 +806,7 @@ func TestServerConfig_Validation(t *testing.T) { storage.NewTierConfig(). WithScmMountPoint("/mnt/daos/1"). WithStorageClass("ram"). - WithScmDisableHugepages(), + WithScmHugepagesDisabled(), storage.NewTierConfig(). WithStorageClass("nvme"). WithBdevDeviceList("0000:81:00.0", "0000:82:00.0"). @@ -834,7 +836,7 @@ func TestServerConfig_Validation(t *testing.T) { storage.NewTierConfig(). WithScmMountPoint("/mnt/daos/0"). WithStorageClass("ram"). - WithScmDisableHugepages(), + WithScmHugepagesDisabled(), storage.NewTierConfig(). WithStorageClass("nvme"). WithBdevDeviceList("0000:80:00.0"). @@ -846,7 +848,7 @@ func TestServerConfig_Validation(t *testing.T) { storage.NewTierConfig(). WithScmMountPoint("/mnt/daos/1"). WithStorageClass("ram"). - WithScmDisableHugepages(), + WithScmHugepagesDisabled(), storage.NewTierConfig(). WithStorageClass("nvme"). WithBdevDeviceList("0000:81:00.0"), @@ -867,7 +869,7 @@ func TestServerConfig_Validation(t *testing.T) { storage.NewTierConfig(). WithScmMountPoint("/mnt/daos/1"). WithStorageClass("ram"). - WithScmDisableHugepages(), + WithScmHugepagesDisabled(), storage.NewTierConfig(). WithStorageClass("nvme"). WithBdevDeviceList("0000:81:00.0", "0000:82:00.0"). @@ -886,7 +888,7 @@ func TestServerConfig_Validation(t *testing.T) { storage.NewTierConfig(). WithScmMountPoint("/mnt/daos/1"). WithStorageClass("ram"). - WithScmDisableHugepages(), + WithScmHugepagesDisabled(), storage.NewTierConfig(). WithStorageClass("nvme"). WithBdevDeviceList("0000:81:00.0", "0000:82:00.0"). @@ -930,7 +932,7 @@ func TestServerConfig_Validation(t *testing.T) { storage.NewTierConfig(). WithScmMountPoint("/mnt/daos/1"). WithStorageClass("ram"). - WithScmDisableHugepages(), + WithScmHugepagesDisabled(), storage.NewTierConfig(). WithStorageClass("nvme"). WithBdevDeviceList("0000:81:00.0"), @@ -949,7 +951,7 @@ func TestServerConfig_Validation(t *testing.T) { storage.NewTierConfig(). WithScmMountPoint("/mnt/daos/1"). WithStorageClass("ram"). - WithScmDisableHugepages(), + WithScmHugepagesDisabled(), storage.NewTierConfig(). WithStorageClass("nvme"). WithBdevDeviceList("0000:81:00.0"). @@ -1135,7 +1137,7 @@ func TestServerConfig_getMinNrHugepages(t *testing.T) { storage.NewTierConfig(). WithScmMountPoint("/mnt/daos/1"). WithStorageClass("ram"). - WithScmDisableHugepages(), + WithScmHugepagesDisabled(), storage.NewTierConfig(). WithStorageClass("nvme"). WithBdevDeviceList("0000:81:00.0", "0000:82:00.0"). @@ -1271,7 +1273,7 @@ func TestServerConfig_SetNrHugepages(t *testing.T) { storage.NewTierConfig(). WithScmMountPoint("/mnt/daos/1"). WithStorageClass("ram"). - WithScmDisableHugepages(), + WithScmHugepagesDisabled(), storage.NewTierConfig(). WithStorageClass("nvme"). WithBdevDeviceList("0000:81:00.0", "0000:82:00.0"). @@ -1291,7 +1293,7 @@ func TestServerConfig_SetNrHugepages(t *testing.T) { storage.NewTierConfig(). WithScmMountPoint("/mnt/daos/1"). WithStorageClass("ram"). - WithScmDisableHugepages(), + WithScmHugepagesDisabled(), storage.NewTierConfig(). WithStorageClass("nvme"). WithBdevDeviceList("0000:81:00.0", "0000:82:00.0"). @@ -1475,7 +1477,7 @@ func TestServerConfig_SetRamdiskSize(t *testing.T) { storage.NewTierConfig(). WithScmMountPoint("/mnt/daos/1"). WithStorageClass("ram"). - WithScmDisableHugepages(), + WithScmHugepagesDisabled(), storage.NewTierConfig(). WithStorageClass("nvme"). WithBdevDeviceList("0000:81:00.0", "0000:82:00.0"). @@ -1570,6 +1572,7 @@ func replaceFile(t *testing.T, name, oldTxt, newTxt string) { if linesChanged == 0 { t.Fatalf("no occurrences of %q in file %q", oldTxt, name) } + t.Logf("replaceFile: %d lines changed", linesChanged) // make sure the tmp file was successfully written to if err := tmp.Close(); err != nil { @@ -1730,6 +1733,18 @@ func TestServerConfig_Parsing(t *testing.T) { return nil }, }, + "allow_thp true allows scm_hugepages_disabled false": { + inTxt: " scm_hugepages_disabled: true", + outTxt: " scm_hugepages_disabled: false", + expCheck: func(c *Server) error { + for _, e := range c.Engines { + if e.Storage.Tiers.ScmConfigs()[0].Scm.DisableHugepages { + return errors.New("expecting scm hugepages to be enabled") + } + } + return nil + }, + }, } { t.Run(name, func(t *testing.T) { log, buf := logging.NewTestLogger(t.Name()) @@ -1988,12 +2003,24 @@ func TestServerConfig_validateMultiEngineConfig(t *testing.T) { ), expLog: "engine 1 has 2 but engine 0 has 1", }, + "mismatched scm_hugepages_disabled": { + configA: configA(), + configB: configB(). + WithStorage( + storage.NewTierConfig(). + WithStorageClass("ram"). + WithScmMountPoint("b"). + WithScmHugepagesDisabled(), + ), + expErr: FaultConfigScmDiffHugeEnabled(1, 0), + }, } { t.Run(name, func(t *testing.T) { log, buf := logging.NewTestLogger(t.Name()) defer test.ShowBufferOnFailure(t, buf) conf := DefaultServer(). + WithAllowTHP(true). // Enable differences between scm_hugepages_disabled. WithFabricProvider("test"). WithMgmtSvcReplicas( fmt.Sprintf("localhost:%d", build.DefaultControlPort)). diff --git a/src/control/server/faults.go b/src/control/server/faults.go index a866de34767..a2e51760026 100644 --- a/src/control/server/faults.go +++ b/src/control/server/faults.go @@ -38,6 +38,11 @@ var ( "disable_vfio: true in config while running as non-root user with NVMe devices", "set disable_vfio: false or run daos_server as root", ) + FaultTransparentHugepageEnabled = serverFault( + code.ServerTransparentHugepageEnabled, + "transparent hugepage (THP) enabled on storage server, DAOS requires THP to be disabled", + "disable THP by adding 'transparent_hugepage=never' kernel parameter in the grub configuration file then reboot and restart daos_server", + ) FaultHarnessNotStarted = serverFault( code.ServerHarnessNotStarted, fmt.Sprintf("%s harness not started", build.DataPlaneName), diff --git a/src/control/server/mocks.go b/src/control/server/mocks.go index f2d5f0c1ebd..6b654034557 100644 --- a/src/control/server/mocks.go +++ b/src/control/server/mocks.go @@ -86,3 +86,23 @@ func (ms *mockSubscriber) getRx() []string { return ms.rx } + +type mockIOMMUDetector struct { + enabled bool + err error +} + +// IsIOMMUEnabled implements hardware.IOMMUDetector interface +func (mid mockIOMMUDetector) IsIOMMUEnabled() (bool, error) { + return mid.enabled, mid.err +} + +type mockTHPDetector struct { + enabled bool + err error +} + +// IsTHPEnabled implements hardware.THPDetector interface +func (mid mockTHPDetector) IsTHPEnabled() (bool, error) { + return mid.enabled, mid.err +} diff --git a/src/control/server/server.go b/src/control/server/server.go index 22131b3cb4e..14155682c76 100644 --- a/src/control/server/server.go +++ b/src/control/server/server.go @@ -356,13 +356,11 @@ func (srv *server) addEngines(ctx context.Context, smi *common.SysMemInfo) error var allStarted sync.WaitGroup registerTelemetryCallbacks(ctx, srv) - iommuEnabled, err := topology.DefaultIOMMUDetector(srv.log).IsIOMMUEnabled() - if err != nil { - return err - } + iommuChecker := topology.DefaultIOMMUDetector(srv.log) + thpChecker := topology.DefaultTHPDetector(srv.log) // Allocate hugepages and rebind NVMe devices to userspace drivers. - if err := prepBdevStorage(srv, iommuEnabled, smi); err != nil { + if err := prepBdevStorage(srv, smi, iommuChecker, thpChecker); err != nil { return err } diff --git a/src/control/server/server_utils.go b/src/control/server/server_utils.go index ea2933db373..7d3ed9f6c44 100644 --- a/src/control/server/server_utils.go +++ b/src/control/server/server_utils.go @@ -362,7 +362,7 @@ func SetHugeNodes(log logging.Logger, srvCfg *config.Server, smi *common.SysMemI // Prepare bdev storage. Assumes validation has already been performed on server config. Hugepages // are required for both emulated (AIO devices) and real NVMe bdevs. VFIO and IOMMU are not // mandatory requirements for emulated NVMe. -func prepBdevStorage(srv *server, iommuEnabled bool, smi *common.SysMemInfo) error { +func prepBdevStorage(srv *server, smi *common.SysMemInfo, iommuChecker hardware.IOMMUDetector, thpChecker hardware.THPDetector) error { defer srv.logDuration(track("time to prepare bdev storage")) if srv.cfg == nil { @@ -373,6 +373,22 @@ func prepBdevStorage(srv *server, iommuEnabled bool, smi *common.SysMemInfo) err return nil } + // Fail to start if transparent hugepages are enabled. DAOS requires exclusive control over + // hugepages and therefore needs feature disabled. AllowTHP override flag provided for + // edge cases. + if !srv.cfg.AllowTHP { + if thpEnabled, err := thpChecker.IsTHPEnabled(); err != nil { + return errors.Wrap(err, "transparent hugepage check") + } else if thpEnabled { + return FaultTransparentHugepageEnabled + } + } + + iommuEnabled, err := iommuChecker.IsIOMMUEnabled() + if err != nil { + return errors.Wrap(err, "iommu check") + } + bdevCfgs := srv.cfg.GetBdevConfigs() // Perform these checks only if non-emulated NVMe is used and user is unprivileged. diff --git a/src/control/server/server_utils_test.go b/src/control/server/server_utils_test.go index db5bd957f67..69852dd9e93 100644 --- a/src/control/server/server_utils_test.go +++ b/src/control/server/server_utils_test.go @@ -320,7 +320,6 @@ func TestServer_prepBdevStorage_setEngineMemSize(t *testing.T) { } for name, tc := range map[string]struct { - iommuDisabled bool srvCfgExtra func(*config.Server) *config.Server memInfo1 *common.SysMemInfo // Before prepBdevStorage() memInfo2 *common.SysMemInfo // After prepBdevStorage() @@ -328,6 +327,10 @@ func TestServer_prepBdevStorage_setEngineMemSize(t *testing.T) { hugepagesTotal int // Values for all NUMA nodes, will be split per-node. bmbc *bdev.MockBackendConfig overrideUser string + iommuDisabled bool + iommuCheckErr error + thpEnabled bool + thpCheckErr error expPrepErr error expPrepCalls []storage.BdevPrepareRequest expMemSize int @@ -379,6 +382,13 @@ func TestServer_prepBdevStorage_setEngineMemSize(t *testing.T) { expMemSize: 16384, expHugepageSize: 2, }, + "iommu check error": { + iommuCheckErr: errors.New("fail"), + srvCfgExtra: func(sc *config.Server) *config.Server { + return sc.WithEngines(pmemEngine(0), pmemEngine(1)) + }, + expPrepErr: errors.New("iommu check: fail"), + }, "iommu disabled": { iommuDisabled: true, srvCfgExtra: func(sc *config.Server) *config.Server { @@ -422,6 +432,40 @@ func TestServer_prepBdevStorage_setEngineMemSize(t *testing.T) { expMemSize: 16384, expHugepageSize: 2, }, + "thp check error": { + thpCheckErr: errors.New("fail"), + srvCfgExtra: func(sc *config.Server) *config.Server { + return sc.WithEngines(pmemEngine(0), pmemEngine(1)) + }, + expPrepErr: errors.New("transparent hugepage check: fail"), + }, + "thp enabled": { + thpEnabled: true, + srvCfgExtra: func(sc *config.Server) *config.Server { + return sc.WithEngines(pmemEngine(0), pmemEngine(1)) + }, + expPrepErr: FaultTransparentHugepageEnabled, + }, + "thp enabled; override flag set": { + thpEnabled: true, + srvCfgExtra: func(sc *config.Server) *config.Server { + return sc.WithAllowTHP(true). + WithEngines(pmemEngine(0), pmemEngine(1)) + }, + expPrepCalls: []storage.BdevPrepareRequest{ + defCleanDualEngine, + { + HugeNodes: "nodes_hp[0]=8192,nodes_hp[1]=8192", + TargetUser: username, + PCIAllowList: fmt.Sprintf("%s%s%s", test.MockPCIAddr(0), + storage.BdevPciAddrSep, test.MockPCIAddr(1)), + EnableVMD: true, + }, + }, + expHugepageSize: 2, + // Allocation change logged. + expNotice: true, + }, "no bdevs configured; hugepages disabled": { srvCfgExtra: func(sc *config.Server) *config.Server { return sc.WithDisableHugepages(true). @@ -1064,6 +1108,16 @@ func TestServer_prepBdevStorage_setEngineMemSize(t *testing.T) { cfg = tc.srvCfgExtra(cfg) } + // Defaults are IOMMU=ON and THP=OFF. + iommuChecker := mockIOMMUDetector{ + enabled: !tc.iommuDisabled, + err: tc.iommuCheckErr, + } + thpChecker := mockTHPDetector{ + enabled: tc.thpEnabled, + err: tc.thpCheckErr, + } + mockAffSrc := func(l logging.Logger, e *engine.Config) (uint, error) { iface := e.Fabric.Interface l.Debugf("eval affinity of iface %q", iface) @@ -1156,7 +1210,7 @@ func TestServer_prepBdevStorage_setEngineMemSize(t *testing.T) { srv.runningUser = &user.User{Username: tc.overrideUser} } - gotPrepErr := prepBdevStorage(srv, !tc.iommuDisabled, tc.memInfo1) + gotPrepErr := prepBdevStorage(srv, tc.memInfo1, iommuChecker, thpChecker) mbb.RLock() if diff := cmp.Diff(tc.expPrepCalls, mbb.PrepareCalls, prepCmpOpt); diff != "" { @@ -1187,13 +1241,12 @@ func TestServer_prepBdevStorage_setEngineMemSize(t *testing.T) { test.AssertEqual(t, tc.expHugepageSize, ei.runner.GetConfig().HugepageSz, "unexpected huge page size") - txtMod := "" - if !tc.expNotice { - txtMod = "no " + gotNotice := strings.Contains(buf.String(), "NOTICE") + if tc.expNotice && !gotNotice { + t.Fatal("expected NOTICE level message but got none") + } else if !tc.expNotice && gotNotice { + t.Fatal("expected no NOTICE level message but got one") } - msg := fmt.Sprintf("expected %sNOTICE level message", txtMod) - test.AssertEqual(t, tc.expNotice, strings.Contains(buf.String(), "NOTICE"), - msg) }) } } diff --git a/src/control/server/storage/config.go b/src/control/server/storage/config.go index 1468d2e7bdb..d21aa9db15a 100644 --- a/src/control/server/storage/config.go +++ b/src/control/server/storage/config.go @@ -1,5 +1,6 @@ // // (C) Copyright 2019-2024 Intel Corporation. +// (C) Copyright 2025 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -171,8 +172,8 @@ func (tc *TierConfig) WithStorageClass(cls string) *TierConfig { return tc } -// WithScmDisableHugepages disables hugepages for tmpfs. -func (tc *TierConfig) WithScmDisableHugepages() *TierConfig { +// WithScmHugepagesDisabled disables hugepages for tmpfs. +func (tc *TierConfig) WithScmHugepagesDisabled() *TierConfig { tc.Scm.DisableHugepages = true return tc } diff --git a/utils/config/daos_server.yml b/utils/config/daos_server.yml index 9777dfb0905..7fc87854f9c 100644 --- a/utils/config/daos_server.yml +++ b/utils/config/daos_server.yml @@ -228,6 +228,17 @@ #allow_numa_imbalance: true # # + +## Allow DAOS server to run with transparent hugepages (THP) enabled on the host machine. +# +## WARNING: Transparent hugepages can conflict with how the DAOS server uses hugepages, and enabling +## them may cause intermittent errors. Unless transparent hugepages are required by another +## application on the machine, it is strongly recommended to leave this setting disabled. +# +## default: false +#allow_thp: true +# +# ## Reserve an amount of RAM for system use when calculating the size of RAM-disks that will be ## created for DAOS I/O engines. Units are in GiB and represents the total RAM that will be ## reserved when calculating RAM-disk sizes for all engines. @@ -574,6 +585,11 @@ # #class: dcpm # #scm_list: [/dev/pmem1] # +# # When class is set to ram, tmpfs will be mounted with hugepage +# # support, if the kernel supports it. If this is not desirable, +# # the behavior may be disabled here. +# scm_hugepages_disabled: true +# # - # # Backend block device type. Force a SPDK driver to be used by this engine # # instance. From 3a54420315227276e626a11ef17c20e198ea6640 Mon Sep 17 00:00:00 2001 From: Kris Jacque Date: Mon, 22 Dec 2025 07:11:10 -0700 Subject: [PATCH 095/253] DAOS-18349 control: Remove unused pool target type (#17294) Target type was never set anywhere internally, so always appeared as "unknown" in all user-facing tools. The target type field was intended to indicate the storage media, so is supplanted by the storage tier information. Signed-off-by: Kris Jacque --- src/control/cmd/daos/pretty/pool.go | 2 +- src/control/cmd/daos/pretty/pool_test.go | 21 +- src/control/cmd/dmg/pretty/pool_test.go | 93 +----- src/control/common/proto/mgmt/pool.pb.go | 304 +++++++----------- src/control/lib/control/pool.go | 1 - .../lib/daos/api/libdaos_pool_stubs.go | 2 - src/control/lib/daos/api/pool.go | 1 - src/control/lib/daos/pool.go | 15 +- src/include/daos_pool.h | 17 +- src/mgmt/pool.pb-c.c | 63 +--- src/mgmt/pool.pb-c.h | 27 +- src/mgmt/srv_drpc.c | 1 - src/mgmt/tests/mocks.c | 1 - src/mgmt/tests/srv_drpc_tests.c | 1 - src/pool/srv_cli.c | 1 - src/proto/mgmt/pool.proto | 9 +- 16 files changed, 149 insertions(+), 410 deletions(-) diff --git a/src/control/cmd/daos/pretty/pool.go b/src/control/cmd/daos/pretty/pool.go index af5b99f9c76..9126d51f145 100644 --- a/src/control/cmd/daos/pretty/pool.go +++ b/src/control/cmd/daos/pretty/pool.go @@ -164,7 +164,7 @@ func PrintPoolQueryTargetInfo(pqti *daos.PoolQueryTargetInfo, out io.Writer) err w := txtfmt.NewErrWriter(out) // Maintain output compatibility with the `daos pool query-targets` output. - fmt.Fprintf(w, "Target: type %s, state %s\n", pqti.Type, pqti.State) + fmt.Fprintf(w, "Target: state %s\n", pqti.State) if pqti.Space != nil { if pqti.MdOnSsdActive { printPoolTiersMdOnSsd(pqti.MemFileBytes, pqti.Space, w, false) diff --git a/src/control/cmd/daos/pretty/pool_test.go b/src/control/cmd/daos/pretty/pool_test.go index e743b509a44..d1364805212 100644 --- a/src/control/cmd/daos/pretty/pool_test.go +++ b/src/control/cmd/daos/pretty/pool_test.go @@ -419,7 +419,6 @@ func TestPretty_PrintPoolQueryTarget(t *testing.T) { }, "valid: single target (unknown, down_out)": { pqti: &daos.PoolQueryTargetInfo{ - Type: 0, State: daos.PoolTargetStateDownOut, Space: []*daos.StorageUsageStats{ { @@ -435,7 +434,7 @@ func TestPretty_PrintPoolQueryTarget(t *testing.T) { }, }, expPrintStr: ` -Target: type unknown, state down_out +Target: state down_out - Storage tier 0 (SCM): Total size: 6.0 GB Free: 5.0 GB @@ -446,7 +445,6 @@ Target: type unknown, state down_out }, "valid: single target (unknown, down)": { pqti: &daos.PoolQueryTargetInfo{ - Type: 0, State: daos.PoolTargetStateDown, Space: []*daos.StorageUsageStats{ { @@ -462,7 +460,7 @@ Target: type unknown, state down_out }, }, expPrintStr: ` -Target: type unknown, state down +Target: state down - Storage tier 0 (SCM): Total size: 6.0 GB Free: 5.0 GB @@ -473,7 +471,6 @@ Target: type unknown, state down }, "valid: single target (unknown, up)": { pqti: &daos.PoolQueryTargetInfo{ - Type: 0, State: daos.PoolTargetStateUp, Space: []*daos.StorageUsageStats{ { @@ -489,7 +486,7 @@ Target: type unknown, state down }, }, expPrintStr: ` -Target: type unknown, state up +Target: state up - Storage tier 0 (SCM): Total size: 6.0 GB Free: 5.0 GB @@ -500,7 +497,6 @@ Target: type unknown, state up }, "valid: single target (unknown, up_in)": { pqti: &daos.PoolQueryTargetInfo{ - Type: 0, State: daos.PoolTargetStateUpIn, Space: []*daos.StorageUsageStats{ { @@ -516,7 +512,7 @@ Target: type unknown, state up }, }, expPrintStr: ` -Target: type unknown, state up_in +Target: state up_in - Storage tier 0 (SCM): Total size: 6.0 GB Free: 5.0 GB @@ -527,7 +523,6 @@ Target: type unknown, state up_in }, "valid: single target (unknown, new)": { pqti: &daos.PoolQueryTargetInfo{ - Type: 0, State: daos.PoolTargetStateNew, Space: []*daos.StorageUsageStats{ { @@ -543,7 +538,7 @@ Target: type unknown, state up_in }, }, expPrintStr: ` -Target: type unknown, state new +Target: state new - Storage tier 0 (SCM): Total size: 6.0 GB Free: 5.0 GB @@ -554,7 +549,6 @@ Target: type unknown, state new }, "valid: single target (unknown, drain)": { pqti: &daos.PoolQueryTargetInfo{ - Type: 0, State: daos.PoolTargetStateDrain, Space: []*daos.StorageUsageStats{ { @@ -571,7 +565,7 @@ Target: type unknown, state new MemFileBytes: 3000000000, }, expPrintStr: ` -Target: type unknown, state drain +Target: state drain - Storage tier 0 (SCM): Total size: 6.0 GB Free: 5.0 GB @@ -582,7 +576,6 @@ Target: type unknown, state drain }, "valid: single target (unknown, down_out): MD-on-SSD": { pqti: &daos.PoolQueryTargetInfo{ - Type: 0, State: daos.PoolTargetStateDownOut, Space: []*daos.StorageUsageStats{ { @@ -600,7 +593,7 @@ Target: type unknown, state drain MdOnSsdActive: true, }, expPrintStr: ` -Target: type unknown, state down_out +Target: state down_out - Metadata storage: Total size: 6.0 GB Free: 5.0 GB diff --git a/src/control/cmd/dmg/pretty/pool_test.go b/src/control/cmd/dmg/pretty/pool_test.go index 1ba539bfeeb..ddbd3fd3aed 100644 --- a/src/control/cmd/dmg/pretty/pool_test.go +++ b/src/control/cmd/dmg/pretty/pool_test.go @@ -54,50 +54,46 @@ func TestPretty_PrintPoolQueryTargetResponse(t *testing.T) { Status: 0, Infos: []*daos.PoolQueryTargetInfo{ { - Type: 0, State: daos.PoolTargetStateDown, Space: []*daos.StorageUsageStats{tier0, tier1}, }, { - Type: 0, State: daos.PoolTargetStateUpIn, Space: []*daos.StorageUsageStats{tier0, tier1}, }, { - Type: 0, State: daos.PoolTargetStateDownOut, Space: []*daos.StorageUsageStats{tier0, tier1}, }, { - Type: 0, State: daos.PoolTargetStateUpIn, Space: []*daos.StorageUsageStats{tier0, tier1}, }, }, }, expPrintStr: ` -Target: type unknown, state down +Target: state down - Storage tier 0 (SCM): Total size: 6.0 GB Free: 5.0 GB - Storage tier 1 (NVME): Total size: 100 GB Free: 90 GB -Target: type unknown, state up_in +Target: state up_in - Storage tier 0 (SCM): Total size: 6.0 GB Free: 5.0 GB - Storage tier 1 (NVME): Total size: 100 GB Free: 90 GB -Target: type unknown, state down_out +Target: state down_out - Storage tier 0 (SCM): Total size: 6.0 GB Free: 5.0 GB - Storage tier 1 (NVME): Total size: 100 GB Free: 90 GB -Target: type unknown, state up_in +Target: state up_in - Storage tier 0 (SCM): Total size: 6.0 GB Free: 5.0 GB @@ -111,107 +107,46 @@ Target: type unknown, state up_in Status: 0, Infos: []*daos.PoolQueryTargetInfo{ { - Type: 0, State: 42, Space: []*daos.StorageUsageStats{tier0, tier1}, }, { - Type: 0, State: daos.PoolTargetStateUpIn, Space: []*daos.StorageUsageStats{tier0, tier1}, }, { - Type: 0, State: daos.PoolTargetStateDownOut, Space: []*daos.StorageUsageStats{tier0, tier1}, }, { - Type: 0, State: daos.PoolTargetStateUpIn, Space: []*daos.StorageUsageStats{tier0, tier1}, }, }, }, expPrintStr: ` -Target: type unknown, state invalid +Target: state invalid - Storage tier 0 (SCM): Total size: 6.0 GB Free: 5.0 GB - Storage tier 1 (NVME): Total size: 100 GB Free: 90 GB -Target: type unknown, state up_in +Target: state up_in - Storage tier 0 (SCM): Total size: 6.0 GB Free: 5.0 GB - Storage tier 1 (NVME): Total size: 100 GB Free: 90 GB -Target: type unknown, state down_out +Target: state down_out - Storage tier 0 (SCM): Total size: 6.0 GB Free: 5.0 GB - Storage tier 1 (NVME): Total size: 100 GB Free: 90 GB -Target: type unknown, state up_in -- Storage tier 0 (SCM): - Total size: 6.0 GB - Free: 5.0 GB -- Storage tier 1 (NVME): - Total size: 100 GB - Free: 90 GB -`, - }, - "invalid target type": { - pqtr: &control.PoolQueryTargetResp{ - Status: 0, - Infos: []*daos.PoolQueryTargetInfo{ - { - Type: 42, - State: daos.PoolTargetStateDown, - Space: []*daos.StorageUsageStats{tier0, tier1}, - }, - { - Type: 0, - State: daos.PoolTargetStateUpIn, - Space: []*daos.StorageUsageStats{tier0, tier1}, - }, - { - Type: 0, - State: daos.PoolTargetStateDownOut, - Space: []*daos.StorageUsageStats{tier0, tier1}, - }, - { - Type: 0, - State: daos.PoolTargetStateUpIn, - Space: []*daos.StorageUsageStats{tier0, tier1}, - }, - }, - }, - expPrintStr: ` -Target: type invalid, state down -- Storage tier 0 (SCM): - Total size: 6.0 GB - Free: 5.0 GB -- Storage tier 1 (NVME): - Total size: 100 GB - Free: 90 GB -Target: type unknown, state up_in -- Storage tier 0 (SCM): - Total size: 6.0 GB - Free: 5.0 GB -- Storage tier 1 (NVME): - Total size: 100 GB - Free: 90 GB -Target: type unknown, state down_out -- Storage tier 0 (SCM): - Total size: 6.0 GB - Free: 5.0 GB -- Storage tier 1 (NVME): - Total size: 100 GB - Free: 90 GB -Target: type unknown, state up_in +Target: state up_in - Storage tier 0 (SCM): Total size: 6.0 GB Free: 5.0 GB @@ -225,7 +160,6 @@ Target: type unknown, state up_in Status: 0, Infos: []*daos.PoolQueryTargetInfo{ { - Type: 0, State: daos.PoolTargetStateDown, Space: []*daos.StorageUsageStats{ tier0, tier1, @@ -236,7 +170,6 @@ Target: type unknown, state up_in }, }, { - Type: 0, State: daos.PoolTargetStateUpIn, Space: []*daos.StorageUsageStats{ tier0, tier1, @@ -247,7 +180,6 @@ Target: type unknown, state up_in }, }, { - Type: 0, State: daos.PoolTargetStateDownOut, Space: []*daos.StorageUsageStats{ tier0, tier1, @@ -259,7 +191,6 @@ Target: type unknown, state up_in }, }, { - Type: 0, State: daos.PoolTargetStateUpIn, Space: []*daos.StorageUsageStats{ tier0, tier1, @@ -272,7 +203,7 @@ Target: type unknown, state up_in }, }, expPrintStr: ` -Target: type unknown, state down +Target: state down - Storage tier 0 (SCM): Total size: 6.0 GB Free: 5.0 GB @@ -282,7 +213,7 @@ Target: type unknown, state down - Storage tier 2 (UNKNOWN): Total size: 800 GB Free: 200 GB -Target: type unknown, state up_in +Target: state up_in - Storage tier 0 (SCM): Total size: 6.0 GB Free: 5.0 GB @@ -292,7 +223,7 @@ Target: type unknown, state up_in - Storage tier 2 (UNKNOWN): Total size: 800 GB Free: 200 GB -Target: type unknown, state down_out +Target: state down_out - Storage tier 0 (SCM): Total size: 6.0 GB Free: 5.0 GB @@ -302,7 +233,7 @@ Target: type unknown, state down_out - Storage tier 2 (UNKNOWN): Total size: 800 GB Free: 200 GB -Target: type unknown, state up_in +Target: state up_in - Storage tier 0 (SCM): Total size: 6.0 GB Free: 5.0 GB diff --git a/src/control/common/proto/mgmt/pool.pb.go b/src/control/common/proto/mgmt/pool.pb.go index 67a85f294c6..74767f17fd7 100644 --- a/src/control/common/proto/mgmt/pool.pb.go +++ b/src/control/common/proto/mgmt/pool.pb.go @@ -177,61 +177,6 @@ func (PoolRebuildStatus_State) EnumDescriptor() ([]byte, []int) { return file_mgmt_pool_proto_rawDescGZIP(), []int{20, 0} } -type PoolQueryTargetInfo_TargetType int32 - -const ( - PoolQueryTargetInfo_UNKNOWN PoolQueryTargetInfo_TargetType = 0 - PoolQueryTargetInfo_HDD PoolQueryTargetInfo_TargetType = 1 // Rotating disk - PoolQueryTargetInfo_SSD PoolQueryTargetInfo_TargetType = 2 // Flash-based - PoolQueryTargetInfo_PM PoolQueryTargetInfo_TargetType = 3 // Persistent memory - PoolQueryTargetInfo_VM PoolQueryTargetInfo_TargetType = 4 // Volatile memory -) - -// Enum value maps for PoolQueryTargetInfo_TargetType. -var ( - PoolQueryTargetInfo_TargetType_name = map[int32]string{ - 0: "UNKNOWN", - 1: "HDD", - 2: "SSD", - 3: "PM", - 4: "VM", - } - PoolQueryTargetInfo_TargetType_value = map[string]int32{ - "UNKNOWN": 0, - "HDD": 1, - "SSD": 2, - "PM": 3, - "VM": 4, - } -) - -func (x PoolQueryTargetInfo_TargetType) Enum() *PoolQueryTargetInfo_TargetType { - p := new(PoolQueryTargetInfo_TargetType) - *p = x - return p -} - -func (x PoolQueryTargetInfo_TargetType) String() string { - return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) -} - -func (PoolQueryTargetInfo_TargetType) Descriptor() protoreflect.EnumDescriptor { - return file_mgmt_pool_proto_enumTypes[3].Descriptor() -} - -func (PoolQueryTargetInfo_TargetType) Type() protoreflect.EnumType { - return &file_mgmt_pool_proto_enumTypes[3] -} - -func (x PoolQueryTargetInfo_TargetType) Number() protoreflect.EnumNumber { - return protoreflect.EnumNumber(x) -} - -// Deprecated: Use PoolQueryTargetInfo_TargetType.Descriptor instead. -func (PoolQueryTargetInfo_TargetType) EnumDescriptor() ([]byte, []int) { - return file_mgmt_pool_proto_rawDescGZIP(), []int{30, 0} -} - type PoolQueryTargetInfo_TargetState int32 const ( @@ -277,11 +222,11 @@ func (x PoolQueryTargetInfo_TargetState) String() string { } func (PoolQueryTargetInfo_TargetState) Descriptor() protoreflect.EnumDescriptor { - return file_mgmt_pool_proto_enumTypes[4].Descriptor() + return file_mgmt_pool_proto_enumTypes[3].Descriptor() } func (PoolQueryTargetInfo_TargetState) Type() protoreflect.EnumType { - return &file_mgmt_pool_proto_enumTypes[4] + return &file_mgmt_pool_proto_enumTypes[3] } func (x PoolQueryTargetInfo_TargetState) Number() protoreflect.EnumNumber { @@ -290,7 +235,7 @@ func (x PoolQueryTargetInfo_TargetState) Number() protoreflect.EnumNumber { // Deprecated: Use PoolQueryTargetInfo_TargetState.Descriptor instead. func (PoolQueryTargetInfo_TargetState) EnumDescriptor() ([]byte, []int) { - return file_mgmt_pool_proto_rawDescGZIP(), []int{30, 1} + return file_mgmt_pool_proto_rawDescGZIP(), []int{30, 0} } // PoolCreateReq supplies new pool parameters. @@ -2636,7 +2581,6 @@ type PoolQueryTargetInfo struct { sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - Type PoolQueryTargetInfo_TargetType `protobuf:"varint,1,opt,name=type,proto3,enum=mgmt.PoolQueryTargetInfo_TargetType" json:"type,omitempty"` // Target type jsee enum daos_target_type_t State PoolQueryTargetInfo_TargetState `protobuf:"varint,2,opt,name=state,proto3,enum=mgmt.PoolQueryTargetInfo_TargetState" json:"state,omitempty"` // target state see enum daos_target_state_t // TODO: target performance data Space []*StorageTargetUsage `protobuf:"bytes,3,rep,name=space,proto3" json:"space,omitempty"` // this target's usage per storage tier @@ -2676,13 +2620,6 @@ func (*PoolQueryTargetInfo) Descriptor() ([]byte, []int) { return file_mgmt_pool_proto_rawDescGZIP(), []int{30} } -func (x *PoolQueryTargetInfo) GetType() PoolQueryTargetInfo_TargetType { - if x != nil { - return x.Type - } - return PoolQueryTargetInfo_UNKNOWN -} - func (x *PoolQueryTargetInfo) GetState() PoolQueryTargetInfo_TargetState { if x != nil { return x.State @@ -3401,72 +3338,65 @@ var file_mgmt_pool_proto_rawDesc = []byte{ 0x12, 0x35, 0x0a, 0x0a, 0x6d, 0x65, 0x64, 0x69, 0x61, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x16, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x4d, 0x65, 0x64, 0x69, 0x61, 0x54, 0x79, 0x70, 0x65, 0x52, 0x09, 0x6d, 0x65, - 0x64, 0x69, 0x61, 0x54, 0x79, 0x70, 0x65, 0x22, 0xa9, 0x03, 0x0a, 0x13, 0x50, 0x6f, 0x6f, 0x6c, + 0x64, 0x69, 0x61, 0x54, 0x79, 0x70, 0x65, 0x22, 0xb8, 0x02, 0x0a, 0x13, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x12, - 0x38, 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x24, 0x2e, - 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, - 0x72, 0x67, 0x65, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x2e, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x54, - 0x79, 0x70, 0x65, 0x52, 0x04, 0x74, 0x79, 0x70, 0x65, 0x12, 0x3b, 0x0a, 0x05, 0x73, 0x74, 0x61, - 0x74, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x25, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, - 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x49, - 0x6e, 0x66, 0x6f, 0x2e, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, - 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x2e, 0x0a, 0x05, 0x73, 0x70, 0x61, 0x63, 0x65, 0x18, - 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x74, 0x6f, - 0x72, 0x61, 0x67, 0x65, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x55, 0x73, 0x61, 0x67, 0x65, 0x52, - 0x05, 0x73, 0x70, 0x61, 0x63, 0x65, 0x12, 0x24, 0x0a, 0x0e, 0x6d, 0x65, 0x6d, 0x5f, 0x66, 0x69, - 0x6c, 0x65, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x04, 0x52, 0x0c, - 0x6d, 0x65, 0x6d, 0x46, 0x69, 0x6c, 0x65, 0x42, 0x79, 0x74, 0x65, 0x73, 0x12, 0x27, 0x0a, 0x10, - 0x6d, 0x64, 0x5f, 0x6f, 0x6e, 0x5f, 0x73, 0x73, 0x64, 0x5f, 0x61, 0x63, 0x74, 0x69, 0x76, 0x65, - 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0d, 0x6d, 0x64, 0x4f, 0x6e, 0x53, 0x73, 0x64, 0x41, - 0x63, 0x74, 0x69, 0x76, 0x65, 0x22, 0x3b, 0x0a, 0x0a, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x54, - 0x79, 0x70, 0x65, 0x12, 0x0b, 0x0a, 0x07, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, - 0x12, 0x07, 0x0a, 0x03, 0x48, 0x44, 0x44, 0x10, 0x01, 0x12, 0x07, 0x0a, 0x03, 0x53, 0x53, 0x44, - 0x10, 0x02, 0x12, 0x06, 0x0a, 0x02, 0x50, 0x4d, 0x10, 0x03, 0x12, 0x06, 0x0a, 0x02, 0x56, 0x4d, - 0x10, 0x04, 0x22, 0x5f, 0x0a, 0x0b, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x53, 0x74, 0x61, 0x74, - 0x65, 0x12, 0x11, 0x0a, 0x0d, 0x53, 0x54, 0x41, 0x54, 0x45, 0x5f, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, - 0x57, 0x4e, 0x10, 0x00, 0x12, 0x0c, 0x0a, 0x08, 0x44, 0x4f, 0x57, 0x4e, 0x5f, 0x4f, 0x55, 0x54, - 0x10, 0x01, 0x12, 0x08, 0x0a, 0x04, 0x44, 0x4f, 0x57, 0x4e, 0x10, 0x02, 0x12, 0x06, 0x0a, 0x02, - 0x55, 0x50, 0x10, 0x03, 0x12, 0x09, 0x0a, 0x05, 0x55, 0x50, 0x5f, 0x49, 0x4e, 0x10, 0x04, 0x12, - 0x07, 0x0a, 0x03, 0x4e, 0x45, 0x57, 0x10, 0x05, 0x12, 0x09, 0x0a, 0x05, 0x44, 0x52, 0x41, 0x49, - 0x4e, 0x10, 0x06, 0x22, 0x5e, 0x0a, 0x13, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, - 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, - 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, - 0x75, 0x73, 0x12, 0x2f, 0x0a, 0x05, 0x69, 0x6e, 0x66, 0x6f, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, - 0x0b, 0x32, 0x19, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, - 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x05, 0x69, 0x6e, - 0x66, 0x6f, 0x73, 0x22, 0x54, 0x0a, 0x13, 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x65, 0x62, 0x75, 0x69, - 0x6c, 0x64, 0x53, 0x74, 0x61, 0x72, 0x74, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, + 0x3b, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x25, + 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, + 0x61, 0x72, 0x67, 0x65, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x2e, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, + 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x2e, 0x0a, 0x05, + 0x73, 0x70, 0x61, 0x63, 0x65, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x6d, 0x67, + 0x6d, 0x74, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, + 0x55, 0x73, 0x61, 0x67, 0x65, 0x52, 0x05, 0x73, 0x70, 0x61, 0x63, 0x65, 0x12, 0x24, 0x0a, 0x0e, + 0x6d, 0x65, 0x6d, 0x5f, 0x66, 0x69, 0x6c, 0x65, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x18, 0x04, + 0x20, 0x01, 0x28, 0x04, 0x52, 0x0c, 0x6d, 0x65, 0x6d, 0x46, 0x69, 0x6c, 0x65, 0x42, 0x79, 0x74, + 0x65, 0x73, 0x12, 0x27, 0x0a, 0x10, 0x6d, 0x64, 0x5f, 0x6f, 0x6e, 0x5f, 0x73, 0x73, 0x64, 0x5f, + 0x61, 0x63, 0x74, 0x69, 0x76, 0x65, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0d, 0x6d, 0x64, + 0x4f, 0x6e, 0x53, 0x73, 0x64, 0x41, 0x63, 0x74, 0x69, 0x76, 0x65, 0x22, 0x5f, 0x0a, 0x0b, 0x54, + 0x61, 0x72, 0x67, 0x65, 0x74, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x11, 0x0a, 0x0d, 0x53, 0x54, + 0x41, 0x54, 0x45, 0x5f, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, 0x0c, 0x0a, + 0x08, 0x44, 0x4f, 0x57, 0x4e, 0x5f, 0x4f, 0x55, 0x54, 0x10, 0x01, 0x12, 0x08, 0x0a, 0x04, 0x44, + 0x4f, 0x57, 0x4e, 0x10, 0x02, 0x12, 0x06, 0x0a, 0x02, 0x55, 0x50, 0x10, 0x03, 0x12, 0x09, 0x0a, + 0x05, 0x55, 0x50, 0x5f, 0x49, 0x4e, 0x10, 0x04, 0x12, 0x07, 0x0a, 0x03, 0x4e, 0x45, 0x57, 0x10, + 0x05, 0x12, 0x09, 0x0a, 0x05, 0x44, 0x52, 0x41, 0x49, 0x4e, 0x10, 0x06, 0x4a, 0x04, 0x08, 0x01, + 0x10, 0x02, 0x22, 0x5e, 0x0a, 0x13, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, + 0x61, 0x72, 0x67, 0x65, 0x74, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, + 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, + 0x73, 0x12, 0x2f, 0x0a, 0x05, 0x69, 0x6e, 0x66, 0x6f, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, + 0x32, 0x19, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, + 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x05, 0x69, 0x6e, 0x66, + 0x6f, 0x73, 0x22, 0x54, 0x0a, 0x13, 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x65, 0x62, 0x75, 0x69, 0x6c, + 0x64, 0x53, 0x74, 0x61, 0x72, 0x74, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, + 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, + 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x1b, 0x0a, 0x09, 0x73, + 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, + 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x69, 0x0a, 0x12, 0x50, 0x6f, 0x6f, 0x6c, + 0x52, 0x65, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x53, 0x74, 0x6f, 0x70, 0x52, 0x65, 0x71, 0x12, 0x10, + 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, + 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, + 0x12, 0x14, 0x0a, 0x05, 0x66, 0x6f, 0x72, 0x63, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x08, 0x52, + 0x05, 0x66, 0x6f, 0x72, 0x63, 0x65, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, + 0x6e, 0x6b, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, + 0x6e, 0x6b, 0x73, 0x22, 0x76, 0x0a, 0x13, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x65, 0x6c, 0x66, 0x48, + 0x65, 0x61, 0x6c, 0x45, 0x76, 0x61, 0x6c, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, - 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x1b, 0x0a, 0x09, - 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, - 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x69, 0x0a, 0x12, 0x50, 0x6f, 0x6f, - 0x6c, 0x52, 0x65, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x53, 0x74, 0x6f, 0x70, 0x52, 0x65, 0x71, 0x12, - 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, - 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, - 0x64, 0x12, 0x14, 0x0a, 0x05, 0x66, 0x6f, 0x72, 0x63, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x08, - 0x52, 0x05, 0x66, 0x6f, 0x72, 0x63, 0x65, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, - 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, - 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x76, 0x0a, 0x13, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x65, 0x6c, 0x66, - 0x48, 0x65, 0x61, 0x6c, 0x45, 0x76, 0x61, 0x6c, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, - 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, - 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x20, 0x0a, - 0x0c, 0x73, 0x79, 0x73, 0x5f, 0x70, 0x72, 0x6f, 0x70, 0x5f, 0x76, 0x61, 0x6c, 0x18, 0x04, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x0a, 0x73, 0x79, 0x73, 0x50, 0x72, 0x6f, 0x70, 0x56, 0x61, 0x6c, 0x12, - 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x05, 0x20, 0x03, - 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x2a, 0x25, 0x0a, 0x10, - 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x4d, 0x65, 0x64, 0x69, 0x61, 0x54, 0x79, 0x70, 0x65, - 0x12, 0x07, 0x0a, 0x03, 0x53, 0x43, 0x4d, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x4e, 0x56, 0x4d, - 0x45, 0x10, 0x01, 0x2a, 0x5d, 0x0a, 0x10, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x65, 0x72, 0x76, 0x69, - 0x63, 0x65, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x0c, 0x0a, 0x08, 0x43, 0x72, 0x65, 0x61, 0x74, - 0x69, 0x6e, 0x67, 0x10, 0x00, 0x12, 0x09, 0x0a, 0x05, 0x52, 0x65, 0x61, 0x64, 0x79, 0x10, 0x01, - 0x12, 0x0e, 0x0a, 0x0a, 0x44, 0x65, 0x73, 0x74, 0x72, 0x6f, 0x79, 0x69, 0x6e, 0x67, 0x10, 0x02, - 0x12, 0x13, 0x0a, 0x0f, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x45, 0x78, 0x63, 0x6c, 0x75, - 0x64, 0x65, 0x64, 0x10, 0x03, 0x12, 0x0b, 0x0a, 0x07, 0x55, 0x6e, 0x6b, 0x6e, 0x6f, 0x77, 0x6e, - 0x10, 0x04, 0x42, 0x3a, 0x5a, 0x38, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, - 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2d, 0x73, 0x74, 0x61, 0x63, 0x6b, 0x2f, 0x64, 0x61, 0x6f, 0x73, - 0x2f, 0x73, 0x72, 0x63, 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x2f, 0x63, 0x6f, 0x6d, - 0x6d, 0x6f, 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x6d, 0x67, 0x6d, 0x74, 0x62, 0x06, - 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x20, 0x0a, 0x0c, + 0x73, 0x79, 0x73, 0x5f, 0x70, 0x72, 0x6f, 0x70, 0x5f, 0x76, 0x61, 0x6c, 0x18, 0x04, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x0a, 0x73, 0x79, 0x73, 0x50, 0x72, 0x6f, 0x70, 0x56, 0x61, 0x6c, 0x12, 0x1b, + 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, + 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x2a, 0x25, 0x0a, 0x10, 0x53, + 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x4d, 0x65, 0x64, 0x69, 0x61, 0x54, 0x79, 0x70, 0x65, 0x12, + 0x07, 0x0a, 0x03, 0x53, 0x43, 0x4d, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x4e, 0x56, 0x4d, 0x45, + 0x10, 0x01, 0x2a, 0x5d, 0x0a, 0x10, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, + 0x65, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x0c, 0x0a, 0x08, 0x43, 0x72, 0x65, 0x61, 0x74, 0x69, + 0x6e, 0x67, 0x10, 0x00, 0x12, 0x09, 0x0a, 0x05, 0x52, 0x65, 0x61, 0x64, 0x79, 0x10, 0x01, 0x12, + 0x0e, 0x0a, 0x0a, 0x44, 0x65, 0x73, 0x74, 0x72, 0x6f, 0x79, 0x69, 0x6e, 0x67, 0x10, 0x02, 0x12, + 0x13, 0x0a, 0x0f, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x45, 0x78, 0x63, 0x6c, 0x75, 0x64, + 0x65, 0x64, 0x10, 0x03, 0x12, 0x0b, 0x0a, 0x07, 0x55, 0x6e, 0x6b, 0x6e, 0x6f, 0x77, 0x6e, 0x10, + 0x04, 0x42, 0x3a, 0x5a, 0x38, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, + 0x64, 0x61, 0x6f, 0x73, 0x2d, 0x73, 0x74, 0x61, 0x63, 0x6b, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2f, + 0x73, 0x72, 0x63, 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x2f, 0x63, 0x6f, 0x6d, 0x6d, + 0x6f, 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x6d, 0x67, 0x6d, 0x74, 0x62, 0x06, 0x70, + 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( @@ -3481,74 +3411,72 @@ func file_mgmt_pool_proto_rawDescGZIP() []byte { return file_mgmt_pool_proto_rawDescData } -var file_mgmt_pool_proto_enumTypes = make([]protoimpl.EnumInfo, 5) +var file_mgmt_pool_proto_enumTypes = make([]protoimpl.EnumInfo, 4) var file_mgmt_pool_proto_msgTypes = make([]protoimpl.MessageInfo, 37) var file_mgmt_pool_proto_goTypes = []interface{}{ (StorageMediaType)(0), // 0: mgmt.StorageMediaType (PoolServiceState)(0), // 1: mgmt.PoolServiceState (PoolRebuildStatus_State)(0), // 2: mgmt.PoolRebuildStatus.State - (PoolQueryTargetInfo_TargetType)(0), // 3: mgmt.PoolQueryTargetInfo.TargetType - (PoolQueryTargetInfo_TargetState)(0), // 4: mgmt.PoolQueryTargetInfo.TargetState - (*PoolCreateReq)(nil), // 5: mgmt.PoolCreateReq - (*PoolCreateResp)(nil), // 6: mgmt.PoolCreateResp - (*PoolDestroyReq)(nil), // 7: mgmt.PoolDestroyReq - (*PoolDestroyResp)(nil), // 8: mgmt.PoolDestroyResp - (*PoolEvictReq)(nil), // 9: mgmt.PoolEvictReq - (*PoolEvictResp)(nil), // 10: mgmt.PoolEvictResp - (*PoolExcludeReq)(nil), // 11: mgmt.PoolExcludeReq - (*PoolExcludeResp)(nil), // 12: mgmt.PoolExcludeResp - (*PoolDrainReq)(nil), // 13: mgmt.PoolDrainReq - (*PoolDrainResp)(nil), // 14: mgmt.PoolDrainResp - (*PoolExtendReq)(nil), // 15: mgmt.PoolExtendReq - (*PoolExtendResp)(nil), // 16: mgmt.PoolExtendResp - (*PoolReintReq)(nil), // 17: mgmt.PoolReintReq - (*PoolReintResp)(nil), // 18: mgmt.PoolReintResp - (*ListPoolsReq)(nil), // 19: mgmt.ListPoolsReq - (*ListPoolsResp)(nil), // 20: mgmt.ListPoolsResp - (*ListContReq)(nil), // 21: mgmt.ListContReq - (*ListContResp)(nil), // 22: mgmt.ListContResp - (*PoolQueryReq)(nil), // 23: mgmt.PoolQueryReq - (*StorageUsageStats)(nil), // 24: mgmt.StorageUsageStats - (*PoolRebuildStatus)(nil), // 25: mgmt.PoolRebuildStatus - (*PoolQueryResp)(nil), // 26: mgmt.PoolQueryResp - (*PoolProperty)(nil), // 27: mgmt.PoolProperty - (*PoolSetPropReq)(nil), // 28: mgmt.PoolSetPropReq - (*PoolSetPropResp)(nil), // 29: mgmt.PoolSetPropResp - (*PoolGetPropReq)(nil), // 30: mgmt.PoolGetPropReq - (*PoolGetPropResp)(nil), // 31: mgmt.PoolGetPropResp - (*PoolUpgradeReq)(nil), // 32: mgmt.PoolUpgradeReq - (*PoolQueryTargetReq)(nil), // 33: mgmt.PoolQueryTargetReq - (*StorageTargetUsage)(nil), // 34: mgmt.StorageTargetUsage - (*PoolQueryTargetInfo)(nil), // 35: mgmt.PoolQueryTargetInfo - (*PoolQueryTargetResp)(nil), // 36: mgmt.PoolQueryTargetResp - (*PoolRebuildStartReq)(nil), // 37: mgmt.PoolRebuildStartReq - (*PoolRebuildStopReq)(nil), // 38: mgmt.PoolRebuildStopReq - (*PoolSelfHealEvalReq)(nil), // 39: mgmt.PoolSelfHealEvalReq - (*ListPoolsResp_Pool)(nil), // 40: mgmt.ListPoolsResp.Pool - (*ListContResp_Cont)(nil), // 41: mgmt.ListContResp.Cont + (PoolQueryTargetInfo_TargetState)(0), // 3: mgmt.PoolQueryTargetInfo.TargetState + (*PoolCreateReq)(nil), // 4: mgmt.PoolCreateReq + (*PoolCreateResp)(nil), // 5: mgmt.PoolCreateResp + (*PoolDestroyReq)(nil), // 6: mgmt.PoolDestroyReq + (*PoolDestroyResp)(nil), // 7: mgmt.PoolDestroyResp + (*PoolEvictReq)(nil), // 8: mgmt.PoolEvictReq + (*PoolEvictResp)(nil), // 9: mgmt.PoolEvictResp + (*PoolExcludeReq)(nil), // 10: mgmt.PoolExcludeReq + (*PoolExcludeResp)(nil), // 11: mgmt.PoolExcludeResp + (*PoolDrainReq)(nil), // 12: mgmt.PoolDrainReq + (*PoolDrainResp)(nil), // 13: mgmt.PoolDrainResp + (*PoolExtendReq)(nil), // 14: mgmt.PoolExtendReq + (*PoolExtendResp)(nil), // 15: mgmt.PoolExtendResp + (*PoolReintReq)(nil), // 16: mgmt.PoolReintReq + (*PoolReintResp)(nil), // 17: mgmt.PoolReintResp + (*ListPoolsReq)(nil), // 18: mgmt.ListPoolsReq + (*ListPoolsResp)(nil), // 19: mgmt.ListPoolsResp + (*ListContReq)(nil), // 20: mgmt.ListContReq + (*ListContResp)(nil), // 21: mgmt.ListContResp + (*PoolQueryReq)(nil), // 22: mgmt.PoolQueryReq + (*StorageUsageStats)(nil), // 23: mgmt.StorageUsageStats + (*PoolRebuildStatus)(nil), // 24: mgmt.PoolRebuildStatus + (*PoolQueryResp)(nil), // 25: mgmt.PoolQueryResp + (*PoolProperty)(nil), // 26: mgmt.PoolProperty + (*PoolSetPropReq)(nil), // 27: mgmt.PoolSetPropReq + (*PoolSetPropResp)(nil), // 28: mgmt.PoolSetPropResp + (*PoolGetPropReq)(nil), // 29: mgmt.PoolGetPropReq + (*PoolGetPropResp)(nil), // 30: mgmt.PoolGetPropResp + (*PoolUpgradeReq)(nil), // 31: mgmt.PoolUpgradeReq + (*PoolQueryTargetReq)(nil), // 32: mgmt.PoolQueryTargetReq + (*StorageTargetUsage)(nil), // 33: mgmt.StorageTargetUsage + (*PoolQueryTargetInfo)(nil), // 34: mgmt.PoolQueryTargetInfo + (*PoolQueryTargetResp)(nil), // 35: mgmt.PoolQueryTargetResp + (*PoolRebuildStartReq)(nil), // 36: mgmt.PoolRebuildStartReq + (*PoolRebuildStopReq)(nil), // 37: mgmt.PoolRebuildStopReq + (*PoolSelfHealEvalReq)(nil), // 38: mgmt.PoolSelfHealEvalReq + (*ListPoolsResp_Pool)(nil), // 39: mgmt.ListPoolsResp.Pool + (*ListContResp_Cont)(nil), // 40: mgmt.ListContResp.Cont } var file_mgmt_pool_proto_depIdxs = []int32{ - 27, // 0: mgmt.PoolCreateReq.properties:type_name -> mgmt.PoolProperty - 40, // 1: mgmt.ListPoolsResp.pools:type_name -> mgmt.ListPoolsResp.Pool - 41, // 2: mgmt.ListContResp.containers:type_name -> mgmt.ListContResp.Cont + 26, // 0: mgmt.PoolCreateReq.properties:type_name -> mgmt.PoolProperty + 39, // 1: mgmt.ListPoolsResp.pools:type_name -> mgmt.ListPoolsResp.Pool + 40, // 2: mgmt.ListContResp.containers:type_name -> mgmt.ListContResp.Cont 0, // 3: mgmt.StorageUsageStats.media_type:type_name -> mgmt.StorageMediaType 2, // 4: mgmt.PoolRebuildStatus.state:type_name -> mgmt.PoolRebuildStatus.State - 25, // 5: mgmt.PoolQueryResp.rebuild:type_name -> mgmt.PoolRebuildStatus - 24, // 6: mgmt.PoolQueryResp.tier_stats:type_name -> mgmt.StorageUsageStats + 24, // 5: mgmt.PoolQueryResp.rebuild:type_name -> mgmt.PoolRebuildStatus + 23, // 6: mgmt.PoolQueryResp.tier_stats:type_name -> mgmt.StorageUsageStats 1, // 7: mgmt.PoolQueryResp.state:type_name -> mgmt.PoolServiceState - 27, // 8: mgmt.PoolSetPropReq.properties:type_name -> mgmt.PoolProperty - 27, // 9: mgmt.PoolGetPropReq.properties:type_name -> mgmt.PoolProperty - 27, // 10: mgmt.PoolGetPropResp.properties:type_name -> mgmt.PoolProperty + 26, // 8: mgmt.PoolSetPropReq.properties:type_name -> mgmt.PoolProperty + 26, // 9: mgmt.PoolGetPropReq.properties:type_name -> mgmt.PoolProperty + 26, // 10: mgmt.PoolGetPropResp.properties:type_name -> mgmt.PoolProperty 0, // 11: mgmt.StorageTargetUsage.media_type:type_name -> mgmt.StorageMediaType - 3, // 12: mgmt.PoolQueryTargetInfo.type:type_name -> mgmt.PoolQueryTargetInfo.TargetType - 4, // 13: mgmt.PoolQueryTargetInfo.state:type_name -> mgmt.PoolQueryTargetInfo.TargetState - 34, // 14: mgmt.PoolQueryTargetInfo.space:type_name -> mgmt.StorageTargetUsage - 35, // 15: mgmt.PoolQueryTargetResp.infos:type_name -> mgmt.PoolQueryTargetInfo - 16, // [16:16] is the sub-list for method output_type - 16, // [16:16] is the sub-list for method input_type - 16, // [16:16] is the sub-list for extension type_name - 16, // [16:16] is the sub-list for extension extendee - 0, // [0:16] is the sub-list for field type_name + 3, // 12: mgmt.PoolQueryTargetInfo.state:type_name -> mgmt.PoolQueryTargetInfo.TargetState + 33, // 13: mgmt.PoolQueryTargetInfo.space:type_name -> mgmt.StorageTargetUsage + 34, // 14: mgmt.PoolQueryTargetResp.infos:type_name -> mgmt.PoolQueryTargetInfo + 15, // [15:15] is the sub-list for method output_type + 15, // [15:15] is the sub-list for method input_type + 15, // [15:15] is the sub-list for extension type_name + 15, // [15:15] is the sub-list for extension extendee + 0, // [0:15] is the sub-list for field type_name } func init() { file_mgmt_pool_proto_init() } @@ -4011,7 +3939,7 @@ func file_mgmt_pool_proto_init() { File: protoimpl.DescBuilder{ GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: file_mgmt_pool_proto_rawDesc, - NumEnums: 5, + NumEnums: 4, NumMessages: 37, NumExtensions: 0, NumServices: 0, diff --git a/src/control/lib/control/pool.go b/src/control/lib/control/pool.go index 5bca12dfdb7..d1dee8907aa 100644 --- a/src/control/lib/control/pool.go +++ b/src/control/lib/control/pool.go @@ -642,7 +642,6 @@ func PoolQueryTargets(ctx context.Context, rpcClient UnaryInvoker, req *PoolQuer // For using the pretty printer that dmg uses for this target info. func convertPoolTargetInfo(pbInfo *mgmtpb.PoolQueryTargetInfo) (*daos.PoolQueryTargetInfo, error) { pqti := new(daos.PoolQueryTargetInfo) - pqti.Type = daos.PoolQueryTargetType(pbInfo.Type) pqti.State = daos.PoolQueryTargetState(pbInfo.State) pqti.Space = []*daos.StorageUsageStats{ { diff --git a/src/control/lib/daos/api/libdaos_pool_stubs.go b/src/control/lib/daos/api/libdaos_pool_stubs.go index 675ac215ab2..0c24029839a 100644 --- a/src/control/lib/daos/api/libdaos_pool_stubs.go +++ b/src/control/lib/daos/api/libdaos_pool_stubs.go @@ -184,7 +184,6 @@ var ( } daos_default_PoolQueryTargetInfo daos.PoolQueryTargetInfo = daos.PoolQueryTargetInfo{ - Type: daos.PoolQueryTargetType(1), State: daos.PoolTargetStateUp, Space: func() []*daos.StorageUsageStats { tiStats := make([]*daos.StorageUsageStats, len(daos_default_PoolInfo.TierStats)) @@ -365,7 +364,6 @@ func daos_pool_query_target(poolHdl C.daos_handle_t, tgt C.uint32_t, rank C.uint daos_pool_query_target_SetTgt = tgt daos_pool_query_target_SetRank = rank - info.ta_type = C.daos_target_type_t(daos_pool_query_target_Info.Type) info.ta_state = C.daos_target_state_t(daos_pool_query_target_Info.State) info.ta_space = daos_gds2cds(daos_pool_query_target_Info.Space) diff --git a/src/control/lib/daos/api/pool.go b/src/control/lib/daos/api/pool.go index 36ccc7e1b4d..57e5f03a096 100644 --- a/src/control/lib/daos/api/pool.go +++ b/src/control/lib/daos/api/pool.go @@ -412,7 +412,6 @@ func PoolQuery(ctx context.Context, sysName, poolID string, queryMask daos.PoolQ func newPoolTargetInfo(ptinfo *C.daos_target_info_t) *daos.PoolQueryTargetInfo { return &daos.PoolQueryTargetInfo{ - Type: daos.PoolQueryTargetType(ptinfo.ta_type), State: daos.PoolQueryTargetState(ptinfo.ta_state), Space: []*daos.StorageUsageStats{ { diff --git a/src/control/lib/daos/pool.go b/src/control/lib/daos/pool.go index 90e669bd559..d51b6312cf6 100644 --- a/src/control/lib/daos/pool.go +++ b/src/control/lib/daos/pool.go @@ -87,12 +87,11 @@ type ( SelfHealPolicy string `json:"self_heal_policy"` } - PoolQueryTargetType int32 + // PoolQueryTargetState represents the current state of the pool target. PoolQueryTargetState int32 // PoolQueryTargetInfo contains information about a single target PoolQueryTargetInfo struct { - Type PoolQueryTargetType `json:"target_type"` State PoolQueryTargetState `json:"target_state"` Space []*StorageUsageStats `json:"space"` MemFileBytes uint64 `json:"mem_file_bytes"` @@ -455,18 +454,6 @@ func (prs *PoolRebuildState) UnmarshalJSON(data []byte) error { return nil } -func (ptt PoolQueryTargetType) String() string { - ptts, ok := mgmtpb.PoolQueryTargetInfo_TargetType_name[int32(ptt)] - if !ok { - return "invalid" - } - return strings.ToLower(ptts) -} - -func (pqtt PoolQueryTargetType) MarshalJSON() ([]byte, error) { - return []byte(`"` + pqtt.String() + `"`), nil -} - const ( PoolTargetStateUnknown = PoolQueryTargetState(mgmtpb.PoolQueryTargetInfo_STATE_UNKNOWN) // PoolTargetStateDownOut indicates the target is not available diff --git a/src/include/daos_pool.h b/src/include/daos_pool.h index ed732686299..0752278c458 100644 --- a/src/include/daos_pool.h +++ b/src/include/daos_pool.h @@ -21,19 +21,6 @@ extern "C" { #include -/** Type of storage target */ -typedef enum { - DAOS_TP_UNKNOWN, - /** Rotating disk */ - DAOS_TP_HDD, - /** Flash-based */ - DAOS_TP_SSD, - /** Persistent memory */ - DAOS_TP_PM, - /** Volatile memory */ - DAOS_TP_VM, -} daos_target_type_t; - /** Current state of the storage target */ typedef enum { DAOS_TS_UNKNOWN, @@ -75,8 +62,8 @@ struct daos_space { /** Target information */ typedef struct { - /** Target type */ - daos_target_type_t ta_type; + /** padding - not used */ + uint32_t ta_padding; /** Target state */ daos_target_state_t ta_state; /** Target performance */ diff --git a/src/mgmt/pool.pb-c.c b/src/mgmt/pool.pb-c.c index aecd13dd3f1..3c59fbd0523 100644 --- a/src/mgmt/pool.pb-c.c +++ b/src/mgmt/pool.pb-c.c @@ -4297,40 +4297,6 @@ const ProtobufCMessageDescriptor mgmt__storage_target_usage__descriptor = (ProtobufCMessageInit) mgmt__storage_target_usage__init, NULL,NULL,NULL /* reserved[123] */ }; -static const ProtobufCEnumValue mgmt__pool_query_target_info__target_type__enum_values_by_number[5] = -{ - { "UNKNOWN", "MGMT__POOL_QUERY_TARGET_INFO__TARGET_TYPE__UNKNOWN", 0 }, - { "HDD", "MGMT__POOL_QUERY_TARGET_INFO__TARGET_TYPE__HDD", 1 }, - { "SSD", "MGMT__POOL_QUERY_TARGET_INFO__TARGET_TYPE__SSD", 2 }, - { "PM", "MGMT__POOL_QUERY_TARGET_INFO__TARGET_TYPE__PM", 3 }, - { "VM", "MGMT__POOL_QUERY_TARGET_INFO__TARGET_TYPE__VM", 4 }, -}; -static const ProtobufCIntRange mgmt__pool_query_target_info__target_type__value_ranges[] = { -{0, 0},{0, 5} -}; -static const ProtobufCEnumValueIndex mgmt__pool_query_target_info__target_type__enum_values_by_name[5] = -{ - { "HDD", 1 }, - { "PM", 3 }, - { "SSD", 2 }, - { "UNKNOWN", 0 }, - { "VM", 4 }, -}; -const ProtobufCEnumDescriptor mgmt__pool_query_target_info__target_type__descriptor = -{ - PROTOBUF_C__ENUM_DESCRIPTOR_MAGIC, - "mgmt.PoolQueryTargetInfo.TargetType", - "TargetType", - "Mgmt__PoolQueryTargetInfo__TargetType", - "mgmt", - 5, - mgmt__pool_query_target_info__target_type__enum_values_by_number, - 5, - mgmt__pool_query_target_info__target_type__enum_values_by_name, - 1, - mgmt__pool_query_target_info__target_type__value_ranges, - NULL,NULL,NULL,NULL /* reserved[1234] */ -}; static const ProtobufCEnumValue mgmt__pool_query_target_info__target_state__enum_values_by_number[7] = { { "STATE_UNKNOWN", "MGMT__POOL_QUERY_TARGET_INFO__TARGET_STATE__STATE_UNKNOWN", 0 }, @@ -4369,20 +4335,8 @@ const ProtobufCEnumDescriptor mgmt__pool_query_target_info__target_state__descri mgmt__pool_query_target_info__target_state__value_ranges, NULL,NULL,NULL,NULL /* reserved[1234] */ }; -static const ProtobufCFieldDescriptor mgmt__pool_query_target_info__field_descriptors[5] = +static const ProtobufCFieldDescriptor mgmt__pool_query_target_info__field_descriptors[4] = { - { - "type", - 1, - PROTOBUF_C_LABEL_NONE, - PROTOBUF_C_TYPE_ENUM, - 0, /* quantifier_offset */ - offsetof(Mgmt__PoolQueryTargetInfo, type), - &mgmt__pool_query_target_info__target_type__descriptor, - NULL, - 0, /* flags */ - 0,NULL,NULL /* reserved1,reserved2, etc */ - }, { "state", 2, @@ -4433,16 +4387,15 @@ static const ProtobufCFieldDescriptor mgmt__pool_query_target_info__field_descri }, }; static const unsigned mgmt__pool_query_target_info__field_indices_by_name[] = { - 4, /* field[4] = md_on_ssd_active */ - 3, /* field[3] = mem_file_bytes */ - 2, /* field[2] = space */ - 1, /* field[1] = state */ - 0, /* field[0] = type */ + 3, /* field[3] = md_on_ssd_active */ + 2, /* field[2] = mem_file_bytes */ + 1, /* field[1] = space */ + 0, /* field[0] = state */ }; static const ProtobufCIntRange mgmt__pool_query_target_info__number_ranges[1 + 1] = { - { 1, 0 }, - { 0, 5 } + { 2, 0 }, + { 0, 4 } }; const ProtobufCMessageDescriptor mgmt__pool_query_target_info__descriptor = { @@ -4452,7 +4405,7 @@ const ProtobufCMessageDescriptor mgmt__pool_query_target_info__descriptor = "Mgmt__PoolQueryTargetInfo", "mgmt", sizeof(Mgmt__PoolQueryTargetInfo), - 5, + 4, mgmt__pool_query_target_info__field_descriptors, mgmt__pool_query_target_info__field_indices_by_name, 1, mgmt__pool_query_target_info__number_ranges, diff --git a/src/mgmt/pool.pb-c.h b/src/mgmt/pool.pb-c.h index fbfd62b33ae..d357bc1f33b 100644 --- a/src/mgmt/pool.pb-c.h +++ b/src/mgmt/pool.pb-c.h @@ -71,26 +71,6 @@ typedef enum _Mgmt__PoolRebuildStatus__State { MGMT__POOL_REBUILD_STATUS__STATE__DONE = 2 PROTOBUF_C__FORCE_ENUM_TO_BE_INT_SIZE(MGMT__POOL_REBUILD_STATUS__STATE) } Mgmt__PoolRebuildStatus__State; -typedef enum _Mgmt__PoolQueryTargetInfo__TargetType { - MGMT__POOL_QUERY_TARGET_INFO__TARGET_TYPE__UNKNOWN = 0, - /* - * Rotating disk - */ - MGMT__POOL_QUERY_TARGET_INFO__TARGET_TYPE__HDD = 1, - /* - * Flash-based - */ - MGMT__POOL_QUERY_TARGET_INFO__TARGET_TYPE__SSD = 2, - /* - * Persistent memory - */ - MGMT__POOL_QUERY_TARGET_INFO__TARGET_TYPE__PM = 3, - /* - * Volatile memory - */ - MGMT__POOL_QUERY_TARGET_INFO__TARGET_TYPE__VM = 4 - PROTOBUF_C__FORCE_ENUM_TO_BE_INT_SIZE(MGMT__POOL_QUERY_TARGET_INFO__TARGET_TYPE) -} Mgmt__PoolQueryTargetInfo__TargetType; typedef enum _Mgmt__PoolQueryTargetInfo__TargetState { MGMT__POOL_QUERY_TARGET_INFO__TARGET_STATE__STATE_UNKNOWN = 0, /* @@ -1122,10 +1102,6 @@ struct _Mgmt__StorageTargetUsage struct _Mgmt__PoolQueryTargetInfo { ProtobufCMessage base; - /* - * Target type jsee enum daos_target_type_t - */ - Mgmt__PoolQueryTargetInfo__TargetType type; /* * target state see enum daos_target_state_t */ @@ -1149,7 +1125,7 @@ struct _Mgmt__PoolQueryTargetInfo }; #define MGMT__POOL_QUERY_TARGET_INFO__INIT \ { PROTOBUF_C_MESSAGE_INIT (&mgmt__pool_query_target_info__descriptor) \ - , MGMT__POOL_QUERY_TARGET_INFO__TARGET_TYPE__UNKNOWN, MGMT__POOL_QUERY_TARGET_INFO__TARGET_STATE__STATE_UNKNOWN, 0,NULL, 0, 0 } + , MGMT__POOL_QUERY_TARGET_INFO__TARGET_STATE__STATE_UNKNOWN, 0,NULL, 0, 0 } /* @@ -2085,7 +2061,6 @@ extern const ProtobufCMessageDescriptor mgmt__pool_upgrade_req__descriptor; extern const ProtobufCMessageDescriptor mgmt__pool_query_target_req__descriptor; extern const ProtobufCMessageDescriptor mgmt__storage_target_usage__descriptor; extern const ProtobufCMessageDescriptor mgmt__pool_query_target_info__descriptor; -extern const ProtobufCEnumDescriptor mgmt__pool_query_target_info__target_type__descriptor; extern const ProtobufCEnumDescriptor mgmt__pool_query_target_info__target_state__descriptor; extern const ProtobufCMessageDescriptor mgmt__pool_query_target_resp__descriptor; extern const ProtobufCMessageDescriptor mgmt__pool_rebuild_start_req__descriptor; diff --git a/src/mgmt/srv_drpc.c b/src/mgmt/srv_drpc.c index d71f44d1c66..47202fce0a2 100644 --- a/src/mgmt/srv_drpc.c +++ b/src/mgmt/srv_drpc.c @@ -1984,7 +1984,6 @@ ds_mgmt_drpc_pool_query_targets(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) resp.infos[i] = &resp_infos[i]; mgmt__pool_query_target_info__init(resp.infos[i]); - resp.infos[i]->type = (Mgmt__PoolQueryTargetInfo__TargetType) infos[i].ta_type; resp.infos[i]->state = (Mgmt__PoolQueryTargetInfo__TargetState) infos[i].ta_state; D_ALLOC_ARRAY(resp.infos[i]->space, DAOS_MEDIA_MAX); if (resp.infos[i]->space == NULL) diff --git a/src/mgmt/tests/mocks.c b/src/mgmt/tests/mocks.c index 5b30c71f7c1..9d93e8697e7 100644 --- a/src/mgmt/tests/mocks.c +++ b/src/mgmt/tests/mocks.c @@ -374,7 +374,6 @@ mock_ds_mgmt_pool_query_targets_gen_infos(uint32_t n_infos) D_ALLOC_ARRAY(infos, n_infos); for (i = 0; i < n_infos; i++) { - infos[i].ta_type = DAOS_TP_UNKNOWN; infos[i].ta_state = (i == 0) ? DAOS_TS_DOWN_OUT : DAOS_TS_UP_IN; infos[i].ta_space.s_total[DAOS_MEDIA_SCM] = 1000000000; infos[i].ta_space.s_free[DAOS_MEDIA_SCM] = 800000000 + i; diff --git a/src/mgmt/tests/srv_drpc_tests.c b/src/mgmt/tests/srv_drpc_tests.c index 821f9ab5765..5c79cab0734 100644 --- a/src/mgmt/tests/srv_drpc_tests.c +++ b/src/mgmt/tests/srv_drpc_tests.c @@ -1609,7 +1609,6 @@ expect_drpc_pool_query_targets_resp_with_targets(Drpc__Response *resp, for (i = 0; i < exp_infos_len; i++) { uint32_t j; - assert_int_equal(pqt_resp->infos[i]->type, infos[i].ta_type); assert_int_equal(pqt_resp->infos[i]->state, infos[i].ta_state); assert_int_equal(pqt_resp->infos[i]->n_space, DAOS_MEDIA_MAX); assert_int_equal(pqt_resp->infos[i]->mem_file_bytes, mem_file_bytes); diff --git a/src/pool/srv_cli.c b/src/pool/srv_cli.c index 857b03bb8c7..0391b8bbc49 100644 --- a/src/pool/srv_cli.c +++ b/src/pool/srv_cli.c @@ -644,7 +644,6 @@ pool_query_target_consume(uuid_t pool_uuid, crt_rpc_t *rpc, void *varg) D_DEBUG(DB_MGMT, DF_UUID ": Successfully queried pool rank %u target %u\n", DP_UUID(pool_uuid), arg->pqta_rank, arg->pqta_tgt_idx); - arg->pqta_info->ta_type = DAOS_TP_UNKNOWN; arg->pqta_info->ta_state = out->pqio_state; for (i = 0; i < DAOS_MEDIA_MAX; i++) { arg->pqta_info->ta_space.s_total[i] = out->pqio_space.s_total[i]; diff --git a/src/proto/mgmt/pool.proto b/src/proto/mgmt/pool.proto index bc6d041aa12..faaaf3b1f85 100644 --- a/src/proto/mgmt/pool.proto +++ b/src/proto/mgmt/pool.proto @@ -315,14 +315,7 @@ message StorageTargetUsage { // PoolQueryTargetInfo represents pool target query info for a single target. // The RPC response type (PoolQueryTargetResponse) contains a sequence of these. message PoolQueryTargetInfo { - enum TargetType { // See enum daos_target_type_t - UNKNOWN = 0; - HDD = 1; // Rotating disk - SSD = 2; // Flash-based - PM = 3; // Persistent memory - VM = 4; // Volatile memory - } - TargetType type = 1; // Target type jsee enum daos_target_type_t + reserved 1; enum TargetState { // See enum daos_target_state_t STATE_UNKNOWN = 0; From 8778eeae4b21cc60dd44398bd313f488a64c90f3 Mon Sep 17 00:00:00 2001 From: Dalton Bohning Date: Mon, 22 Dec 2025 07:51:33 -0800 Subject: [PATCH 096/253] DAOS-18377 build: Create 2.7.103-tb (#17297) Test build 2 for DAOS 2.8. Signed-off-by: Dalton Bohning --- TAG | 2 +- VERSION | 2 +- utils/rpms/daos.changelog | 3 +++ utils/rpms/daos.spec | 4 ++-- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/TAG b/TAG index 3dac9223c86..0a41151c039 100644 --- a/TAG +++ b/TAG @@ -1 +1 @@ -2.7.102-tb +2.7.103-tb diff --git a/VERSION b/VERSION index aab94590421..da75c3b7334 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.7.102 +2.7.103 diff --git a/utils/rpms/daos.changelog b/utils/rpms/daos.changelog index d255dac085d..40e3d18fde6 100644 --- a/utils/rpms/daos.changelog +++ b/utils/rpms/daos.changelog @@ -1,4 +1,7 @@ %changelog +* Fri Dec 19 2025 Dalton Bohning 2.7.103-1 +- Bump version to 2.7.103 + * Tue Nov 25 2025 Jeff Olivier 2.7.102-3 - Upgrade spdk - Add some missing dependencies numactl and pcituils diff --git a/utils/rpms/daos.spec b/utils/rpms/daos.spec index 3bb3d4a7a02..a8f5da38d1f 100644 --- a/utils/rpms/daos.spec +++ b/utils/rpms/daos.spec @@ -24,8 +24,8 @@ %endif Name: daos -Version: 2.7.102 -Release: 3%{?relval}%{?dist} +Version: 2.7.103 +Release: 1%{?relval}%{?dist} Summary: DAOS Storage Engine License: BSD-2-Clause-Patent From 4acf5a310a78a1780aa36585e0e6f21a1724d5bb Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Mon, 22 Dec 2025 13:49:57 -0500 Subject: [PATCH 097/253] DAOS-17639 test: Detect all server fabric_ifaces (#16913) Launch.py will detect all of the fastest interfaces common to all the specified server hosts and use them to populate the engine fabric_iface entries if no overrides are provided in the test yaml. Signed-off-by: Phil Henderson --- .../ftest/aggregation/continuous_write.yaml | 2 - .../ftest/aggregation/multiple_pool_cont.yaml | 11 +++-- src/tests/ftest/aggregation/space_rb.yaml | 2 - src/tests/ftest/container/boundary.yaml | 4 -- .../ftest/container/multiple_delete.yaml | 4 -- .../container/per_server_fault_domain.yaml | 4 -- .../ftest/container/snapshot_aggregation.yaml | 9 ++-- .../daos_agent_support_collect_log.yaml | 6 +-- .../daos_server_support_collect_log.yaml | 6 +-- .../ftest/control/daos_system_query.yaml | 2 - src/tests/ftest/control/dmg_scale.yaml | 4 -- .../control/dmg_support_collect_log.yaml | 6 +-- src/tests/ftest/control/dmg_system_start.yaml | 2 - .../ftest/control/dmg_telemetry_io_basic.yaml | 11 +++-- src/tests/ftest/daos_perf/large.yaml | 19 +++++---- src/tests/ftest/daos_perf/small.yaml | 12 ++++-- src/tests/ftest/daos_racer/multi.yaml | 7 ++-- src/tests/ftest/daos_racer/parallel.yaml | 8 ++-- src/tests/ftest/daos_racer/simple.yaml | 7 ++-- src/tests/ftest/daos_test/dfs.yaml | 15 ++++--- src/tests/ftest/daos_test/nvme_recovery.yaml | 17 ++++---- src/tests/ftest/daos_test/rebuild.yaml | 9 ++-- src/tests/ftest/daos_test/suite.yaml | 18 ++++---- src/tests/ftest/daos_vol/bigio.yaml | 9 ++-- src/tests/ftest/datamover/large_dir.yaml | 11 +++-- src/tests/ftest/datamover/large_file.yaml | 11 +++-- .../ftest/datamover/obj_large_posix.yaml | 10 +++-- .../ftest/datamover/serial_large_posix.yaml | 12 ++++-- src/tests/ftest/deployment/agent_failure.yaml | 4 -- .../ftest/deployment/basic_checkout.yaml | 16 ++++++-- .../deployment/critical_integration.yaml | 7 ++-- src/tests/ftest/deployment/disk_failure.yaml | 10 +++-- src/tests/ftest/deployment/io_sys_admin.yaml | 18 ++++++-- src/tests/ftest/deployment/ior_per_rank.yaml | 9 ++-- .../ftest/deployment/network_failure.yaml | 4 -- .../ftest/deployment/server_rank_failure.yaml | 4 -- .../ftest/deployment/target_failure.yaml | 4 -- src/tests/ftest/dfuse/fio_pil4dfs_small.yaml | 12 ++++-- src/tests/ftest/dfuse/fio_small.yaml | 12 ++++-- src/tests/ftest/dfuse/pil4dfs_dcache.yaml | 4 -- src/tests/ftest/dfuse/pil4dfs_fio.yaml | 4 -- src/tests/ftest/erasurecode/aggregation.yaml | 10 +++-- src/tests/ftest/erasurecode/cell_size.yaml | 9 ++-- .../ftest/erasurecode/cell_size_property.yaml | 9 ++-- src/tests/ftest/erasurecode/ior_smoke.yaml | 10 +++-- src/tests/ftest/erasurecode/mdtest_smoke.yaml | 9 ++-- .../erasurecode/multiple_rank_failure.yaml | 4 -- .../erasurecode/multiple_target_failure.yaml | 4 -- .../ftest/erasurecode/offline_rebuild.yaml | 10 +++-- .../offline_rebuild_aggregation.yaml | 10 +++-- .../erasurecode/offline_rebuild_single.yaml | 11 +++-- .../ftest/erasurecode/online_rebuild.yaml | 12 ++++-- .../erasurecode/online_rebuild_mdtest.yaml | 11 +++-- .../erasurecode/online_rebuild_single.yaml | 10 +++-- src/tests/ftest/erasurecode/rank_failure.yaml | 9 ++-- .../ftest/erasurecode/rebuild_disabled.yaml | 10 +++-- .../erasurecode/rebuild_disabled_single.yaml | 10 +++-- src/tests/ftest/erasurecode/rebuild_fio.yaml | 11 +++-- src/tests/ftest/erasurecode/restart.yaml | 11 +++-- src/tests/ftest/erasurecode/space_usage.yaml | 12 ++++-- src/tests/ftest/erasurecode/truncate.yaml | 11 +++-- src/tests/ftest/fault_injection/ec.yaml | 13 ++++-- src/tests/ftest/fault_injection/pool.yaml | 4 -- src/tests/ftest/io/io_consistency.yaml | 10 +++-- src/tests/ftest/io/large_file_count.yaml | 11 +++-- src/tests/ftest/io/macsio_test.yaml | 10 +++-- src/tests/ftest/io/seg_count.yaml | 9 ++-- src/tests/ftest/io/small_file_count.yaml | 11 +++-- src/tests/ftest/io/unaligned_io.yaml | 8 ++-- src/tests/ftest/ior/crash.yaml | 9 ++-- src/tests/ftest/ior/hard.yaml | 9 ++-- src/tests/ftest/ior/hard_rebuild.yaml | 10 +++-- .../ftest/ior/intercept_multi_client.yaml | 4 -- src/tests/ftest/ior/small.yaml | 4 -- src/tests/ftest/mdtest/small.yaml | 4 -- src/tests/ftest/nvme/enospace.yaml | 10 +++-- src/tests/ftest/nvme/fragmentation.yaml | 9 ++-- src/tests/ftest/nvme/health.yaml | 9 ++-- src/tests/ftest/nvme/io_verification.yaml | 13 ++++-- src/tests/ftest/nvme/object.yaml | 10 +++-- src/tests/ftest/nvme/pool_exclude.yaml | 15 +++++-- src/tests/ftest/nvme/pool_extend.yaml | 19 +++++++-- src/tests/ftest/osa/dmg_negative_test.yaml | 11 +++-- src/tests/ftest/osa/offline_drain.yaml | 21 ++++++++-- src/tests/ftest/osa/offline_extend.yaml | 20 +++++++-- .../ftest/osa/offline_parallel_test.yaml | 19 +++++++-- .../ftest/osa/offline_reintegration.yaml | 24 +++++++++-- src/tests/ftest/osa/online_drain.yaml | 18 ++++++-- src/tests/ftest/osa/online_extend.yaml | 21 ++++++++-- src/tests/ftest/osa/online_parallel_test.yaml | 12 ++++-- src/tests/ftest/osa/online_reintegration.yaml | 19 +++++++-- src/tests/ftest/performance/ior_easy.yaml | 4 -- src/tests/ftest/performance/ior_hard.yaml | 4 -- src/tests/ftest/performance/mdtest_easy.yaml | 4 -- src/tests/ftest/performance/mdtest_hard.yaml | 4 -- src/tests/ftest/pool/create.yaml | 8 ++-- src/tests/ftest/pool/create_all_hw.yaml | 4 -- src/tests/ftest/pool/create_capacity.yaml | 4 -- src/tests/ftest/pool/target_query.yaml | 9 ++-- .../ftest/rebuild/container_create_race.yaml | 4 -- .../ftest/rebuild/continues_after_stop.yaml | 4 -- src/tests/ftest/rebuild/mdtest.yaml | 4 -- src/tests/ftest/rebuild/no_cap.yaml | 9 ++-- .../ftest/rebuild/pool_destroy_race.yaml | 4 -- src/tests/ftest/rebuild/with_ior.yaml | 6 +-- src/tests/ftest/recovery/cat_recov_core.yaml | 4 -- src/tests/ftest/recovery/check_policy.yaml | 4 -- src/tests/ftest/recovery/check_repair.yaml | 4 -- .../recovery/check_start_corner_case.yaml | 4 -- .../ftest/recovery/check_start_options.yaml | 2 - src/tests/ftest/recovery/check_stop.yaml | 4 -- src/tests/ftest/recovery/ms_membership.yaml | 4 -- .../recovery/pool_list_consolidation.yaml | 4 -- src/tests/ftest/recovery/pool_membership.yaml | 4 -- src/tests/ftest/scrubber/aggregation.yaml | 13 ++++-- src/tests/ftest/scrubber/basic.yaml | 12 ++++-- .../scrubber/check_csum_metrics_mdtest.yaml | 9 ++-- src/tests/ftest/scrubber/csum_fault.yaml | 13 ++++-- src/tests/ftest/scrubber/frequency.yaml | 10 +++-- src/tests/ftest/scrubber/rebuild.yaml | 11 +++-- src/tests/ftest/scrubber/snapshot.yaml | 11 +++-- .../ftest/scrubber/target_auto_eviction.yaml | 13 ++++-- src/tests/ftest/server/cpu_usage.yaml | 7 +++- .../ftest/server/daos_server_restart.yaml | 4 -- src/tests/ftest/server/metadata.yaml | 4 -- src/tests/ftest/server/storage_tiers.yaml | 4 -- src/tests/ftest/soak/faults.yaml | 2 - src/tests/ftest/soak/harassers.yaml | 2 - src/tests/ftest/soak/smoke.yaml | 2 - src/tests/ftest/soak/stress.yaml | 2 - src/tests/ftest/telemetry/engine_events.yaml | 4 -- .../ftest/telemetry/pool_space_metrics.yaml | 11 +++-- src/tests/ftest/util/environment_utils.py | 39 ++++++++++-------- src/tests/ftest/util/network_utils.py | 41 +++++++++---------- src/tests/ftest/util/server_utils_params.py | 16 ++++++-- src/tests/ftest/vmd/fault_reintegration.yaml | 12 ++++-- src/tests/ftest/vmd/led.yaml | 9 ++-- 137 files changed, 688 insertions(+), 574 deletions(-) diff --git a/src/tests/ftest/aggregation/continuous_write.yaml b/src/tests/ftest/aggregation/continuous_write.yaml index 4e8e76ff4f9..713359c460f 100644 --- a/src/tests/ftest/aggregation/continuous_write.yaml +++ b/src/tests/ftest/aggregation/continuous_write.yaml @@ -11,8 +11,6 @@ server_config: 0: targets: 1 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server_0.log storage: auto diff --git a/src/tests/ftest/aggregation/multiple_pool_cont.yaml b/src/tests/ftest/aggregation/multiple_pool_cont.yaml index 3ced0823351..9a4d5d0b540 100644 --- a/src/tests/ftest/aggregation/multiple_pool_cont.yaml +++ b/src/tests/ftest/aggregation/multiple_pool_cont.yaml @@ -1,9 +1,12 @@ hosts: test_servers: 5 test_clients: 3 + timeout: 1300 + setup: start_servers_once: false + server_config: name: daos_server engines_per_host: 2 @@ -12,8 +15,6 @@ server_config: targets: 8 pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31416 log_file: daos_server0.log log_mask: DEBUG,MEM=ERR env_vars: @@ -23,19 +24,20 @@ server_config: targets: 8 pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31516 log_file: daos_server1.log log_mask: DEBUG,MEM=ERR env_vars: - DD_MASK=mgmt,md,rebuild storage: auto + pool: size: 40% svcn: 1 + container: type: POSIX control_method: daos + ior: client_processes: np: 12 @@ -47,5 +49,6 @@ ior: block_size: '512M' dfs_oclass: "EC_2P1G1" dfs_dir_oclass: "EC_2P1G1" + runtime: total_runtime: 800 # total seconds to run diff --git a/src/tests/ftest/aggregation/space_rb.yaml b/src/tests/ftest/aggregation/space_rb.yaml index c415cfc2418..dc3ae51c592 100644 --- a/src/tests/ftest/aggregation/space_rb.yaml +++ b/src/tests/ftest/aggregation/space_rb.yaml @@ -11,8 +11,6 @@ server_config: 0: targets: 4 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server_0.log storage: auto diff --git a/src/tests/ftest/container/boundary.yaml b/src/tests/ftest/container/boundary.yaml index 5d1b8ad52a3..2e3e7257aa9 100644 --- a/src/tests/ftest/container/boundary.yaml +++ b/src/tests/ftest/container/boundary.yaml @@ -10,8 +10,6 @@ server_config: engines: 0: targets: 4 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log env_vars: - DD_MASK=group_metadata_only @@ -19,8 +17,6 @@ server_config: storage: auto 1: targets: 4 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log env_vars: - DD_MASK=group_metadata_only diff --git a/src/tests/ftest/container/multiple_delete.yaml b/src/tests/ftest/container/multiple_delete.yaml index aa3447201d5..49843934f20 100644 --- a/src/tests/ftest/container/multiple_delete.yaml +++ b/src/tests/ftest/container/multiple_delete.yaml @@ -11,16 +11,12 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log log_mask: INFO storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log log_mask: INFO storage: auto diff --git a/src/tests/ftest/container/per_server_fault_domain.yaml b/src/tests/ftest/container/per_server_fault_domain.yaml index 2a741319f8e..3efa3128538 100644 --- a/src/tests/ftest/container/per_server_fault_domain.yaml +++ b/src/tests/ftest/container/per_server_fault_domain.yaml @@ -12,15 +12,11 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31416 log_file: daos_server_0.log storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31516 log_file: daos_server_1.log storage: auto diff --git a/src/tests/ftest/container/snapshot_aggregation.yaml b/src/tests/ftest/container/snapshot_aggregation.yaml index ca18d0b9894..b55d173060f 100644 --- a/src/tests/ftest/container/snapshot_aggregation.yaml +++ b/src/tests/ftest/container/snapshot_aggregation.yaml @@ -1,7 +1,9 @@ timeout: 360 + hosts: test_servers: 1 test_clients: 3 + server_config: name: daos_server engines_per_host: 2 @@ -10,25 +12,24 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log storage: auto system_ram_reserved: 8 + pool: scm_size: 80G nvme_size: 100G target_list: [0, 1] + container: control_method: daos type: POSIX + ior: flags: "-w -k" ppn: 2 diff --git a/src/tests/ftest/control/daos_agent_support_collect_log.yaml b/src/tests/ftest/control/daos_agent_support_collect_log.yaml index 1f6b4134142..e5fe5acce8c 100644 --- a/src/tests/ftest/control/daos_agent_support_collect_log.yaml +++ b/src/tests/ftest/control/daos_agent_support_collect_log.yaml @@ -1,20 +1,18 @@ hosts: test_servers: 3 test_clients: 1 + timeout: 120 + server_config: name: daos_server engines_per_host: 2 engines: 0: pinned_numa_node: 0 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log storage: auto 1: pinned_numa_node: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log storage: auto diff --git a/src/tests/ftest/control/daos_server_support_collect_log.yaml b/src/tests/ftest/control/daos_server_support_collect_log.yaml index 4fc6f224400..9ac8518685c 100644 --- a/src/tests/ftest/control/daos_server_support_collect_log.yaml +++ b/src/tests/ftest/control/daos_server_support_collect_log.yaml @@ -1,19 +1,17 @@ hosts: test_servers: 3 + timeout: 120 + server_config: name: daos_server engines_per_host: 2 engines: 0: pinned_numa_node: 0 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log storage: auto 1: pinned_numa_node: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log storage: auto diff --git a/src/tests/ftest/control/daos_system_query.yaml b/src/tests/ftest/control/daos_system_query.yaml index 97f7ea867ee..8b7ccf4e1bc 100644 --- a/src/tests/ftest/control/daos_system_query.yaml +++ b/src/tests/ftest/control/daos_system_query.yaml @@ -11,7 +11,6 @@ server_config: 0: targets: 4 nr_xs_helpers: 0 - fabric_iface_port: 31416 log_file: daos_server0.log log_mask: DEBUG,MEM=ERR env_vars: @@ -23,7 +22,6 @@ server_config: 1: targets: 4 nr_xs_helpers: 0 - fabric_iface_port: 31516 log_file: daos_server1.log log_mask: DEBUG,MEM=ERR env_vars: diff --git a/src/tests/ftest/control/dmg_scale.yaml b/src/tests/ftest/control/dmg_scale.yaml index 84f4e35bc4d..58ee5e85d20 100644 --- a/src/tests/ftest/control/dmg_scale.yaml +++ b/src/tests/ftest/control/dmg_scale.yaml @@ -15,16 +15,12 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log storage: auto targets: 8 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log storage: auto targets: 8 diff --git a/src/tests/ftest/control/dmg_support_collect_log.yaml b/src/tests/ftest/control/dmg_support_collect_log.yaml index 794ca16cacc..913693122b7 100644 --- a/src/tests/ftest/control/dmg_support_collect_log.yaml +++ b/src/tests/ftest/control/dmg_support_collect_log.yaml @@ -1,19 +1,17 @@ hosts: test_servers: 3 + timeout: 200 + server_config: name: daos_server engines_per_host: 2 engines: 0: pinned_numa_node: 0 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log storage: auto 1: pinned_numa_node: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log storage: auto diff --git a/src/tests/ftest/control/dmg_system_start.yaml b/src/tests/ftest/control/dmg_system_start.yaml index a741830e835..5acb169f123 100644 --- a/src/tests/ftest/control/dmg_system_start.yaml +++ b/src/tests/ftest/control/dmg_system_start.yaml @@ -5,7 +5,6 @@ server_config: engines_per_host: 2 engines: 0: - fabric_iface_port: 31416 log_file: daos_server0.log targets: 4 nr_xs_helpers: 0 @@ -14,7 +13,6 @@ server_config: class: ram scm_mount: /mnt/daos0 1: - fabric_iface_port: 31516 log_file: daos_server1.log targets: 4 nr_xs_helpers: 0 diff --git a/src/tests/ftest/control/dmg_telemetry_io_basic.yaml b/src/tests/ftest/control/dmg_telemetry_io_basic.yaml index ac1529b01ee..a11d9fef932 100644 --- a/src/tests/ftest/control/dmg_telemetry_io_basic.yaml +++ b/src/tests/ftest/control/dmg_telemetry_io_basic.yaml @@ -1,7 +1,9 @@ hosts: test_servers: 1 test_clients: 1 + timeout: 150 + server_config: name: daos_server engines_per_host: 2 @@ -9,26 +11,27 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log storage: auto + pool: scm_size: 2G + container: type: POSIX control_method: daos + block_sizes: [10M, 500M] transfer_sizes: [256K, 1M] + telemetry_metrics: io_test_metrics_valid: [0, 18446744073709552000] + ior: api: POSIX flags: "-v -w -k" diff --git a/src/tests/ftest/daos_perf/large.yaml b/src/tests/ftest/daos_perf/large.yaml index 55131bac852..9f07ece7b9f 100644 --- a/src/tests/ftest/daos_perf/large.yaml +++ b/src/tests/ftest/daos_perf/large.yaml @@ -1,18 +1,16 @@ hosts: test_servers: 2 test_clients: 2 + # some run can take long to run, but needs to be verified # by running consecutively for accurate time. timeout: 3600 + job_manager: class_name: Orterun mpi_type: openmpi manager_timeout: 3600 -pool: - size: 1TB -container: - type: POSIX - control_method: daos + server_config: name: daos_server engines_per_host: 2 @@ -20,17 +18,20 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log storage: auto + +pool: + size: 1TB + +container: + type: POSIX + daos_perf: test_command: 'U;p F;p V O;p' test_type: daos diff --git a/src/tests/ftest/daos_perf/small.yaml b/src/tests/ftest/daos_perf/small.yaml index 8006d5d9647..6298aea44f9 100644 --- a/src/tests/ftest/daos_perf/small.yaml +++ b/src/tests/ftest/daos_perf/small.yaml @@ -1,16 +1,21 @@ hosts: test_servers: 2 test_clients: 2 + timeout: 540 + job_manager: class_name: Orterun mpi_type: openmpi manager_timeout: 480 + pool: size: 500GB + container: type: POSIX control_method: daos + server_config: name: daos_server engines_per_host: 2 @@ -18,25 +23,24 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log storage: auto transport_config: allow_insecure: true + agent_config: transport_config: allow_insecure: true + dmg: transport_config: allow_insecure: true + daos_perf: test_command: 'U;p F;p V O;p' test_type: daos diff --git a/src/tests/ftest/daos_racer/multi.yaml b/src/tests/ftest/daos_racer/multi.yaml index f013e664ca2..85b1fc983b9 100644 --- a/src/tests/ftest/daos_racer/multi.yaml +++ b/src/tests/ftest/daos_racer/multi.yaml @@ -1,7 +1,9 @@ hosts: test_servers: 3 test_clients: 1 + timeout: 10800 + server_config: name: daos_server engines_per_host: 2 @@ -9,19 +11,16 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log log_mask: ERR storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log log_mask: ERR storage: auto + daos_racer: runtime: 7200 clush_timeout: 10080 diff --git a/src/tests/ftest/daos_racer/parallel.yaml b/src/tests/ftest/daos_racer/parallel.yaml index 2c0d9f67847..9c79b82efb4 100644 --- a/src/tests/ftest/daos_racer/parallel.yaml +++ b/src/tests/ftest/daos_racer/parallel.yaml @@ -1,7 +1,9 @@ hosts: test_servers: 4 test_clients: 4 + timeout: 1800 + server_config: name: daos_server engines_per_host: 2 @@ -9,23 +11,21 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log log_mask: "ERR" storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log log_mask: "ERR" storage: auto + job_manager: class_name: Orterun mpi_type: openmpi manager_timeout: 630 + daos_racer: runtime: 600 clush_timeout: 900 diff --git a/src/tests/ftest/daos_racer/simple.yaml b/src/tests/ftest/daos_racer/simple.yaml index 518a011f0db..6eda8bb011b 100644 --- a/src/tests/ftest/daos_racer/simple.yaml +++ b/src/tests/ftest/daos_racer/simple.yaml @@ -1,7 +1,9 @@ hosts: test_servers: 3 test_clients: 1 + timeout: 1800 + server_config: name: daos_server engines_per_host: 2 @@ -9,19 +11,16 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log log_mask: "ERR" storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log log_mask: "ERR" storage: auto + daos_racer: runtime: 600 clush_timeout: 900 diff --git a/src/tests/ftest/daos_test/dfs.yaml b/src/tests/ftest/daos_test/dfs.yaml index 982611e9fc0..e27476b60bb 100644 --- a/src/tests/ftest/daos_test/dfs.yaml +++ b/src/tests/ftest/daos_test/dfs.yaml @@ -3,13 +3,14 @@ hosts: test_servers: 4 test_clients: 4 + timeout: 4000 + timeouts: test_daos_dfs_unit: 2030 test_daos_dfs_parallel: 2060 test_daos_dfs_sys: 90 -pool: - scm_size: 8G + server_config: name: daos_server engines_per_host: 2 @@ -18,8 +19,6 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log env_vars: - D_LOG_FILE_APPEND_PID=1 @@ -31,8 +30,6 @@ server_config: 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log env_vars: - D_LOG_FILE_APPEND_PID=1 @@ -43,12 +40,18 @@ server_config: scm_mount: /mnt/daos1 transport_config: allow_insecure: True + agent_config: transport_config: allow_insecure: True + dmg: transport_config: allow_insecure: True + +pool: + scm_size: 8G + daos_tests: test_name: test_daos_dfs_unit: DAOS_DFS_Unit diff --git a/src/tests/ftest/daos_test/nvme_recovery.yaml b/src/tests/ftest/daos_test/nvme_recovery.yaml index 66b6c3581d5..0788f0d5c23 100644 --- a/src/tests/ftest/daos_test/nvme_recovery.yaml +++ b/src/tests/ftest/daos_test/nvme_recovery.yaml @@ -2,13 +2,13 @@ # required quantity is indicated by the placeholders hosts: test_servers: 2 + timeout: 600 + # Remove this once DAOS-5134 is resolved setup: start_servers_once: false -pool: - scm_size: 8G - nvme_size: 16G + server_config: name: daos_server engines_per_host: 2 @@ -16,8 +16,6 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log log_mask: DEBUG env_vars: @@ -26,8 +24,6 @@ server_config: 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log log_mask: DEBUG env_vars: @@ -35,12 +31,19 @@ server_config: storage: auto transport_config: allow_insecure: true + agent_config: transport_config: allow_insecure: true + dmg: transport_config: allow_insecure: true + +pool: + scm_size: 8G + nvme_size: 16G + daos_tests: num_clients: 1 test_name: diff --git a/src/tests/ftest/daos_test/rebuild.yaml b/src/tests/ftest/daos_test/rebuild.yaml index a76c093565a..2befad907b8 100644 --- a/src/tests/ftest/daos_test/rebuild.yaml +++ b/src/tests/ftest/daos_test/rebuild.yaml @@ -15,8 +15,6 @@ timeouts: test_rebuild_35: 180 test_rebuild_36: 200 test_rebuild_37: 250 -pool: - nvme_size: 0G server_config: name: daos_server @@ -25,8 +23,6 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log log_mask: DEBUG,MEM=ERR env_vars: @@ -37,8 +33,6 @@ server_config: 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log log_mask: DEBUG,MEM=ERR env_vars: @@ -57,6 +51,9 @@ dmg: transport_config: allow_insecure: false +pool: + nvme_size: 0G + daos_tests: num_clients: 1 num_replicas: 1 diff --git a/src/tests/ftest/daos_test/suite.yaml b/src/tests/ftest/daos_test/suite.yaml index 7fb851e4e56..c9810280ecc 100644 --- a/src/tests/ftest/daos_test/suite.yaml +++ b/src/tests/ftest/daos_test/suite.yaml @@ -2,6 +2,7 @@ # required quantity is indicated by the placeholders hosts: test_servers: 4 + # Note that subtests below can set their own timeout so this # should be a general average of all tests not including outliers # (I'm looking at you "rebuild tests") @@ -34,10 +35,7 @@ timeouts: test_daos_dedup: 220 test_daos_upgrade: 300 test_daos_pipeline: 60 -pool: - # This will create 8G of SCM and 16G of NVMe size of pool. - scm_size: 8G - nvme_size: 16G + server_config: name: daos_server engines_per_host: 2 @@ -45,8 +43,6 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log log_mask: DEBUG,MEM=ERR env_vars: @@ -60,8 +56,6 @@ server_config: 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log log_mask: DEBUG,MEM=ERR env_vars: @@ -75,12 +69,20 @@ server_config: transport_config: allow_insecure: true system_ram_reserved: 64 + agent_config: transport_config: allow_insecure: true + dmg: transport_config: allow_insecure: true + +pool: + # This will create 8G of SCM and 16G of NVMe size of pool. + scm_size: 8G + nvme_size: 16G + daos_tests: num_clients: test_daos_degraded_mode: 1 diff --git a/src/tests/ftest/daos_vol/bigio.yaml b/src/tests/ftest/daos_vol/bigio.yaml index 3103218ff79..668e834b39d 100644 --- a/src/tests/ftest/daos_vol/bigio.yaml +++ b/src/tests/ftest/daos_vol/bigio.yaml @@ -1,7 +1,9 @@ hosts: test_servers: 1 test_clients: 1 + timeout: 1000 + server_config: name: daos_server crt_timeout: 60 @@ -10,8 +12,6 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 4 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log env_vars: - D_LOG_FILE_APPEND_PID=1 @@ -21,19 +21,20 @@ server_config: 1: pinned_numa_node: 1 nr_xs_helpers: 4 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log env_vars: - D_LOG_FILE_APPEND_PID=1 - FI_LOG_LEVEL=warn - D_LOG_STDERR_IN_LOG=1 storage: auto + pool: size: 50% + container: type: POSIX control_method: daos + daos_vol_tests: testname: h5_partest_t_bigio client_processes: 6 diff --git a/src/tests/ftest/datamover/large_dir.yaml b/src/tests/ftest/datamover/large_dir.yaml index 887540271f7..dcad40f8ad8 100644 --- a/src/tests/ftest/datamover/large_dir.yaml +++ b/src/tests/ftest/datamover/large_dir.yaml @@ -1,7 +1,9 @@ hosts: test_servers: 3 test_clients: 1 + timeout: 420 + server_config: name: daos_server engines_per_host: 2 @@ -9,24 +11,23 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log log_mask: ERR storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log log_mask: ERR storage: auto + pool: size: 95% + container: type: POSIX control_method: daos + mdtest: client_ppn: dcp: 32 @@ -43,10 +44,12 @@ mdtest: depth: 4 branching_factor: 4 bytes: 4096 + dcp: bufsize: 4M chunksize: 128M client_processes: ppn: 32 + datamover: posix_root: "self.workdir" diff --git a/src/tests/ftest/datamover/large_file.yaml b/src/tests/ftest/datamover/large_file.yaml index de060d01125..9a273ef9783 100644 --- a/src/tests/ftest/datamover/large_file.yaml +++ b/src/tests/ftest/datamover/large_file.yaml @@ -1,7 +1,9 @@ hosts: test_servers: 3 test_clients: 1 + timeout: 420 + server_config: name: daos_server engines_per_host: 2 @@ -9,25 +11,24 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log log_mask: ERR storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log log_mask: ERR storage: auto + pool: size: 95% svcn: 1 + container: type: POSIX control_method: daos + ior: client_ppn: dcp: 20 @@ -40,10 +41,12 @@ ior: transfer_size: 4M block_size: '1G' # aggregate of 20G for dcp and 10G for fs_copy dfs_oclass: EC_4P2GX + dcp: bufsize: 4M chunksize: 128M client_processes: ppn: 32 + datamover: posix_root: "self.workdir" diff --git a/src/tests/ftest/datamover/obj_large_posix.yaml b/src/tests/ftest/datamover/obj_large_posix.yaml index 781aed81517..b15370b6e65 100644 --- a/src/tests/ftest/datamover/obj_large_posix.yaml +++ b/src/tests/ftest/datamover/obj_large_posix.yaml @@ -1,7 +1,9 @@ hosts: test_servers: 3 test_clients: 1 + timeout: 360 + server_config: name: daos_server engines_per_host: 2 @@ -9,24 +11,23 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log log_mask: ERR storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log log_mask: ERR storage: auto + pool: size: 60G + container: type: POSIX control_method: daos + mdtest: client_processes: np: 30 @@ -41,6 +42,7 @@ mdtest: depth: 2 branching_factor: 2 bytes: 4096 + dcp: client_processes: np: 30 diff --git a/src/tests/ftest/datamover/serial_large_posix.yaml b/src/tests/ftest/datamover/serial_large_posix.yaml index 0081c0faa80..854317d19c0 100644 --- a/src/tests/ftest/datamover/serial_large_posix.yaml +++ b/src/tests/ftest/datamover/serial_large_posix.yaml @@ -1,7 +1,9 @@ hosts: test_servers: 3 test_clients: 1 + timeout: 480 + server_config: name: daos_server engines_per_host: 2 @@ -9,24 +11,23 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log log_mask: ERR storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log log_mask: ERR storage: auto + pool: size: 60G + container: type: POSIX control_method: daos + mdtest: client_processes: np: 30 @@ -41,12 +42,15 @@ mdtest: depth: 2 branching_factor: 2 bytes: 4096 + dserialize: client_processes: np: 16 + ddeserialize: client_processes: np: 16 + dfuse: disable_caching: true enable_local_flock: true diff --git a/src/tests/ftest/deployment/agent_failure.yaml b/src/tests/ftest/deployment/agent_failure.yaml index 9944e6624f5..474b0d16a7b 100644 --- a/src/tests/ftest/deployment/agent_failure.yaml +++ b/src/tests/ftest/deployment/agent_failure.yaml @@ -17,15 +17,11 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log storage: auto diff --git a/src/tests/ftest/deployment/basic_checkout.yaml b/src/tests/ftest/deployment/basic_checkout.yaml index 79b9d105442..7fc7b5a63c2 100644 --- a/src/tests/ftest/deployment/basic_checkout.yaml +++ b/src/tests/ftest/deployment/basic_checkout.yaml @@ -1,7 +1,9 @@ hosts: test_servers: 3 test_clients: 1 + timeout: 2700 + server_config: name: daos_server engines_per_host: 2 @@ -9,20 +11,18 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log storage: auto + pool: size: 50% properties: ec_cell_sz:128KiB + container: type: POSIX properties: cksum:crc16,cksum_size:16384,srv_cksum:on @@ -38,18 +38,21 @@ ior_easy: &ior_easy_base sw_deadline: 30 sw_wearout: 1 sw_status_file: stoneWallingStatusFile + ior_dfs_sx: <<: *ior_easy_base api: DFS dfs_oclass: SX dfs_chunk: 1MiB transfer_size: 1MiB + ior_dfs_ec_8p2gx: <<: *ior_easy_base api: DFS dfs_oclass: EC_8P2GX dfs_chunk: 8MiB transfer_size: 8MiB + ior_dfs_ec_16p2gx: <<: *ior_easy_base api: DFS @@ -71,16 +74,19 @@ mdtest_easy: &mdtest_easy_base stonewall_timer: 30 stonewall_statusfile: stoneWallingStatusFile dfs_destroy: false + mdtest_dfs_s1: <<: *mdtest_easy_base dfs_oclass: S1 dfs_dir_oclass: SX dfs_chunk: 1MiB + mdtest_dfs_ec_8p2g1: <<: *mdtest_easy_base dfs_oclass: EC_8P2G1 dfs_dir_oclass: RP_3GX dfs_chunk: 8MiB + mdtest_dfs_ec_16p2g1: <<: *mdtest_easy_base dfs_oclass: EC_16P2G1 @@ -156,7 +162,9 @@ mdtest: - [POSIX, 4096, 4096, 2, 10, 5, ' '] - [DFS, 4096, 4096, 1, 25, 20, '-u'] - [POSIX, 0, 0, 2, 10, 5, '-u -C -T -r'] + dfuse: disable_caching: true + hdf5_vol: plugin_path: /usr/lib64/mpich/lib diff --git a/src/tests/ftest/deployment/critical_integration.yaml b/src/tests/ftest/deployment/critical_integration.yaml index 764c564eba0..212d121a6e8 100644 --- a/src/tests/ftest/deployment/critical_integration.yaml +++ b/src/tests/ftest/deployment/critical_integration.yaml @@ -1,7 +1,9 @@ hosts: test_servers: 3 test_clients: 1 + timeout: 300 + server_config: name: daos_server engines_per_host: 2 @@ -9,17 +11,14 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log storage: auto + check_remote_root_access: false # this is needed as on aurora cluster we use libfabric provided # by HPE, which does not reside in regular location. diff --git a/src/tests/ftest/deployment/disk_failure.yaml b/src/tests/ftest/deployment/disk_failure.yaml index 8f1f5826e9e..f318f3e2c99 100644 --- a/src/tests/ftest/deployment/disk_failure.yaml +++ b/src/tests/ftest/deployment/disk_failure.yaml @@ -1,9 +1,12 @@ hosts: test_servers: 3 test_clients: 1 + timeout: 900 + daos_server: pattern_timeout: 60 + server_config: name: daos_server engines_per_host: 2 @@ -11,27 +14,26 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log storage: auto targets: 16 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log storage: auto targets: 16 + pool: size: 80% svcn: 3 + container: type: POSIX control_method: daos oclass: RP_2GX properties: "cksum:crc16,rf:1" + ior: client_processes: np: 2 diff --git a/src/tests/ftest/deployment/io_sys_admin.yaml b/src/tests/ftest/deployment/io_sys_admin.yaml index 727a0bfa794..003b72bfe40 100644 --- a/src/tests/ftest/deployment/io_sys_admin.yaml +++ b/src/tests/ftest/deployment/io_sys_admin.yaml @@ -1,7 +1,9 @@ hosts: test_servers: 3 test_clients: 1 + timeout: 700 + server_config: name: daos_server engines_per_host: 2 @@ -9,37 +11,40 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log storage: auto + dmg: dmg_sub_command: storage storage: storage_sub_command: scan + pool_1: scm_size: 256MiB nvme_size: 16GiB + pool_2: scm_size: 10GiB nvme_size: 50GiB + pool_3: size: 90% + container_1: properties: cksum:crc16,cksum_size:16384,srv_cksum:on control_method: daos + container_2: type: POSIX properties: cksum:crc16,cksum_size:16384,srv_cksum:on control_method: daos oclass: RP_2GX + container_3: type: POSIX properties: cksum:crc16,cksum_size:16384,srv_cksum:on @@ -48,6 +53,7 @@ container_3: pool: size: 40% + container: type: POSIX properties: cksum:crc16,cksum_size:16384,srv_cksum:on @@ -72,6 +78,7 @@ largefilecount: mdtest_oclass: # Run once with S1 and then with EC_16P2G1 - S1 - EC_2P1G1 + ior: client_processes: ppn: 30 @@ -82,6 +89,7 @@ ior: signature: 123 transfer_size: '1Mib' block_size: '1Mib' + dfuse: disable_caching: True enable_local_flock: true @@ -98,11 +106,13 @@ mdtest: write_bytes: 4096 read_bytes: 4096 depth: 0 + dcp: bufsize: "64MB" chunksize: "128MB" client_processes: np: 16 + hdf5_vol: plugin_path: /usr/lib64/mpich/lib diff --git a/src/tests/ftest/deployment/ior_per_rank.yaml b/src/tests/ftest/deployment/ior_per_rank.yaml index 3475c594484..03918af29d0 100644 --- a/src/tests/ftest/deployment/ior_per_rank.yaml +++ b/src/tests/ftest/deployment/ior_per_rank.yaml @@ -1,7 +1,9 @@ hosts: test_servers: 2 test_clients: 2 + timeout: 1500 + server_config: name: daos_server engines_per_host: 2 @@ -9,26 +11,25 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log storage: auto + pool: mode: 146 size: 350G # Cannot use percentage, as it does not work when using pool create for per rank. properties: ec_cell_sz:128KiB + container: type: POSIX properties: cksum:crc16,cksum_size:16384,srv_cksum:on control_method: daos oclass: SX + ior: client_processes: ppn: 32 diff --git a/src/tests/ftest/deployment/network_failure.yaml b/src/tests/ftest/deployment/network_failure.yaml index 37536cdd9bd..12101be4d98 100644 --- a/src/tests/ftest/deployment/network_failure.yaml +++ b/src/tests/ftest/deployment/network_failure.yaml @@ -13,16 +13,12 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log targets: 8 storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log targets: 8 storage: auto diff --git a/src/tests/ftest/deployment/server_rank_failure.yaml b/src/tests/ftest/deployment/server_rank_failure.yaml index 3f4374cc011..3877aebca29 100644 --- a/src/tests/ftest/deployment/server_rank_failure.yaml +++ b/src/tests/ftest/deployment/server_rank_failure.yaml @@ -14,8 +14,6 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log log_mask: INFO storage: auto @@ -25,8 +23,6 @@ server_config: 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log log_mask: INFO storage: auto diff --git a/src/tests/ftest/deployment/target_failure.yaml b/src/tests/ftest/deployment/target_failure.yaml index e2053cb0972..99c9af40fec 100644 --- a/src/tests/ftest/deployment/target_failure.yaml +++ b/src/tests/ftest/deployment/target_failure.yaml @@ -11,15 +11,11 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log storage: auto diff --git a/src/tests/ftest/dfuse/fio_pil4dfs_small.yaml b/src/tests/ftest/dfuse/fio_pil4dfs_small.yaml index 66491601a06..7269a12acff 100644 --- a/src/tests/ftest/dfuse/fio_pil4dfs_small.yaml +++ b/src/tests/ftest/dfuse/fio_pil4dfs_small.yaml @@ -1,7 +1,9 @@ hosts: test_servers: 2 test_clients: 1 + timeout: 1000 + server_config: name: daos_server engines_per_host: 2 @@ -9,28 +11,28 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log storage: auto transport_config: allow_insecure: true + agent_config: transport_config: allow_insecure: true + dmg: transport_config: allow_insecure: true + pool: scm_size: 1600000000 nvme_size: 20000000000 + container: type: POSIX control_method: daos @@ -41,6 +43,7 @@ container: properties: cksum:crc16,cksum_size:16384,srv_cksum:on,rd_fac:1 rf2: properties: cksum:crc16,cksum_size:16384,srv_cksum:on,rd_fac:2 + fio: names: - global @@ -70,6 +73,7 @@ fio: rw: 'randrw' test: numjobs: 1 + dfuse: mount_dir: "/tmp/daos_dfuse" thread_count: 8 diff --git a/src/tests/ftest/dfuse/fio_small.yaml b/src/tests/ftest/dfuse/fio_small.yaml index ed343aa4875..089e5e57ccd 100644 --- a/src/tests/ftest/dfuse/fio_small.yaml +++ b/src/tests/ftest/dfuse/fio_small.yaml @@ -1,7 +1,9 @@ hosts: test_servers: 2 test_clients: 1 + timeout: 1000 + server_config: name: daos_server engines_per_host: 2 @@ -9,28 +11,28 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log storage: auto transport_config: allow_insecure: true + agent_config: transport_config: allow_insecure: true + dmg: transport_config: allow_insecure: true + pool: scm_size: 1600000000 nvme_size: 20000000000 + container: type: POSIX control_method: daos @@ -41,6 +43,7 @@ container: properties: cksum:crc16,cksum_size:16384,srv_cksum:on,rd_fac:1 rf2: properties: cksum:crc16,cksum_size:16384,srv_cksum:on,rd_fac:2 + fio: names: - global @@ -70,6 +73,7 @@ fio: rw: 'randrw' test: numjobs: 1 + dfuse: mount_dir: "/tmp/daos_dfuse" thread_count: 8 diff --git a/src/tests/ftest/dfuse/pil4dfs_dcache.yaml b/src/tests/ftest/dfuse/pil4dfs_dcache.yaml index 719451ba5ba..34b301878c6 100644 --- a/src/tests/ftest/dfuse/pil4dfs_dcache.yaml +++ b/src/tests/ftest/dfuse/pil4dfs_dcache.yaml @@ -10,14 +10,10 @@ server_config: engines: 0: pinned_numa_node: 0 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log storage: auto 1: pinned_numa_node: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log storage: auto diff --git a/src/tests/ftest/dfuse/pil4dfs_fio.yaml b/src/tests/ftest/dfuse/pil4dfs_fio.yaml index e5b62446fbf..24101b84412 100644 --- a/src/tests/ftest/dfuse/pil4dfs_fio.yaml +++ b/src/tests/ftest/dfuse/pil4dfs_fio.yaml @@ -10,15 +10,11 @@ server_config: engines: 0: pinned_numa_node: 0 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log log_mask: INFO storage: auto 1: pinned_numa_node: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log log_mask: INFO storage: auto diff --git a/src/tests/ftest/erasurecode/aggregation.yaml b/src/tests/ftest/erasurecode/aggregation.yaml index 5b289b7eaa5..a7addf82078 100644 --- a/src/tests/ftest/erasurecode/aggregation.yaml +++ b/src/tests/ftest/erasurecode/aggregation.yaml @@ -1,10 +1,13 @@ hosts: test_servers: 5 test_clients: 3 + timeout: 2500 + setup: start_agents_once: false start_servers_once: false + server_config: name: daos_server engines_per_host: 2 @@ -12,25 +15,24 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log log_mask: ERR storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log log_mask: ERR storage: auto + pool: size: 90% pool_query_timeout: 30 + container: type: POSIX control_method: daos + ior: api: "DFS" client_processes: diff --git a/src/tests/ftest/erasurecode/cell_size.yaml b/src/tests/ftest/erasurecode/cell_size.yaml index 5f22ee37a5f..bdcca24d7b5 100644 --- a/src/tests/ftest/erasurecode/cell_size.yaml +++ b/src/tests/ftest/erasurecode/cell_size.yaml @@ -1,7 +1,9 @@ hosts: test_servers: 5 test_clients: 3 + timeout: 1600 + server_config: name: daos_server engines_per_host: 2 @@ -9,19 +11,16 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log log_mask: ERR storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log log_mask: ERR storage: auto + pool: size: 93% cell_sizes: @@ -29,8 +28,10 @@ pool: - 64KiB - 128KiB - 1MiB + container: type: POSIX + ior: api: "DFS" client_processes: diff --git a/src/tests/ftest/erasurecode/cell_size_property.yaml b/src/tests/ftest/erasurecode/cell_size_property.yaml index 0710442abdc..d2aaa8fc024 100644 --- a/src/tests/ftest/erasurecode/cell_size_property.yaml +++ b/src/tests/ftest/erasurecode/cell_size_property.yaml @@ -1,7 +1,9 @@ hosts: test_servers: 5 test_clients: 3 + timeout: 900 + server_config: name: daos_server engines_per_host: 2 @@ -10,25 +12,23 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log log_mask: ERR storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log log_mask: ERR storage: auto + pool: size: 93% cell_sizes: - 4096 - 65536 - 131072 + container: type: POSIX cell_sizes: @@ -36,6 +36,7 @@ container: - 65536 - 131072 - 1048576 + ior: api: "DFS" client_processes: diff --git a/src/tests/ftest/erasurecode/ior_smoke.yaml b/src/tests/ftest/erasurecode/ior_smoke.yaml index 17b8ef5b43b..f10a60fd3ce 100644 --- a/src/tests/ftest/erasurecode/ior_smoke.yaml +++ b/src/tests/ftest/erasurecode/ior_smoke.yaml @@ -1,10 +1,13 @@ hosts: test_servers: 5 test_clients: 3 + timeout: 900 + setup: start_agents_once: False start_servers_once: False + server_config: name: daos_server engines_per_host: 2 @@ -13,24 +16,23 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log log_mask: ERR storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log log_mask: ERR storage: auto + pool: size: 93% + container: type: POSIX control_method: daos + ior: api: "DFS" client_processes: diff --git a/src/tests/ftest/erasurecode/mdtest_smoke.yaml b/src/tests/ftest/erasurecode/mdtest_smoke.yaml index 4e565f3a83b..4a5b0543027 100644 --- a/src/tests/ftest/erasurecode/mdtest_smoke.yaml +++ b/src/tests/ftest/erasurecode/mdtest_smoke.yaml @@ -1,7 +1,9 @@ hosts: test_servers: 5 test_clients: 3 + timeout: 600 + server_config: name: daos_server engines_per_host: 2 @@ -10,24 +12,23 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log log_mask: ERR storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log log_mask: ERR storage: auto + pool: size: 53% + container: type: POSIX control_method: daos + mdtest: client_processes: np_48: diff --git a/src/tests/ftest/erasurecode/multiple_rank_failure.yaml b/src/tests/ftest/erasurecode/multiple_rank_failure.yaml index cd0d2e77bb5..88806efa2f3 100644 --- a/src/tests/ftest/erasurecode/multiple_rank_failure.yaml +++ b/src/tests/ftest/erasurecode/multiple_rank_failure.yaml @@ -15,15 +15,11 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log storage: auto diff --git a/src/tests/ftest/erasurecode/multiple_target_failure.yaml b/src/tests/ftest/erasurecode/multiple_target_failure.yaml index cd0d2e77bb5..88806efa2f3 100644 --- a/src/tests/ftest/erasurecode/multiple_target_failure.yaml +++ b/src/tests/ftest/erasurecode/multiple_target_failure.yaml @@ -15,15 +15,11 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log storage: auto diff --git a/src/tests/ftest/erasurecode/offline_rebuild.yaml b/src/tests/ftest/erasurecode/offline_rebuild.yaml index d583887c24f..b84689f101b 100644 --- a/src/tests/ftest/erasurecode/offline_rebuild.yaml +++ b/src/tests/ftest/erasurecode/offline_rebuild.yaml @@ -7,11 +7,14 @@ hosts: 12_server: test_servers: server-[1-6] test_clients: 2 + timeout: 1200 + setup: # Test variants use different server counts, so ensure servers are stopped after each run start_agents_once: False start_servers_once: False + server_config: name: daos_server engines_per_host: 2 @@ -20,22 +23,21 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31416 log_file: daos_server0.log storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31517 log_file: daos_server1.log storage: auto + pool: size: 93% + container: type: POSIX control_method: daos + ior: api: "DFS" client_processes: diff --git a/src/tests/ftest/erasurecode/offline_rebuild_aggregation.yaml b/src/tests/ftest/erasurecode/offline_rebuild_aggregation.yaml index f199835e4d2..8f66eaf8f73 100644 --- a/src/tests/ftest/erasurecode/offline_rebuild_aggregation.yaml +++ b/src/tests/ftest/erasurecode/offline_rebuild_aggregation.yaml @@ -1,10 +1,13 @@ hosts: test_servers: 5 test_clients: 3 + timeout: 6000 + setup: start_agents_once: False start_servers_once: False + server_config: name: daos_server engines_per_host: 2 @@ -12,23 +15,22 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31416 log_file: daos_server0.log storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31517 log_file: daos_server1.log storage: auto + pool: size: 90% pool_query_timeout: 30 + container: type: POSIX control_method: daos + ior: api: "DFS" client_processes: diff --git a/src/tests/ftest/erasurecode/offline_rebuild_single.yaml b/src/tests/ftest/erasurecode/offline_rebuild_single.yaml index 210dd4589f6..0c94ac46f82 100644 --- a/src/tests/ftest/erasurecode/offline_rebuild_single.yaml +++ b/src/tests/ftest/erasurecode/offline_rebuild_single.yaml @@ -7,12 +7,16 @@ hosts: 12_server: test_servers: server-[1-6] test_clients: 1 + setup: start_servers_once: False + timeout: 900 + agent_config: #cache_expiration: 1 disable_caching: true + server_config: name: daos_server engines_per_host: 2 @@ -20,25 +24,24 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31416 log_file: daos_server0.log storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31517 log_file: daos_server1.log storage: auto + pool: size: 93% pool_query_timeout: 30 + container: type: POSIX single_data_set: # [object_qty, record_qty, dkey, akey, data_size] - [1, 1, 1, 1, 4194304] + objectclass: dfs_oclass_list: #- [EC_Object_Class, Minimum number of servers] diff --git a/src/tests/ftest/erasurecode/online_rebuild.yaml b/src/tests/ftest/erasurecode/online_rebuild.yaml index 74a6eb29ba5..20708ea4d2c 100644 --- a/src/tests/ftest/erasurecode/online_rebuild.yaml +++ b/src/tests/ftest/erasurecode/online_rebuild.yaml @@ -7,13 +7,17 @@ hosts: 12_server: test_servers: server-[1-6] test_clients: 2 + timeout: 1000 + setup: start_agents_once: False start_servers_once: False + agent_config: #cache_expiration: 1 disable_caching: true + server_config: name: daos_server engines_per_host: 2 @@ -21,27 +25,27 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31416 log_file: daos_server0.log storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31517 log_file: daos_server1.log storage: auto + pool: size: 93% + container: type: POSIX control_method: daos + daos: container: destroy: env_vars: - CRT_TIMEOUT=10 + ior: api: "DFS" client_processes: diff --git a/src/tests/ftest/erasurecode/online_rebuild_mdtest.yaml b/src/tests/ftest/erasurecode/online_rebuild_mdtest.yaml index a6e82be7837..39459319e7f 100644 --- a/src/tests/ftest/erasurecode/online_rebuild_mdtest.yaml +++ b/src/tests/ftest/erasurecode/online_rebuild_mdtest.yaml @@ -7,10 +7,13 @@ hosts: 12_server: test_servers: server-[1-6] test_clients: 2 + timeout: 1500 + setup: start_agents_once: False start_servers_once: False + server_config: name: daos_server engines_per_host: 2 @@ -18,8 +21,6 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31416 log_file: daos_server0.log log_mask: INFO storage: auto @@ -29,22 +30,24 @@ server_config: 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31517 log_file: daos_server1.log log_mask: INFO storage: auto env_vars: - NA_OFI_UNEXPECTED_TAG_MSG=0 + client: env_vars: - NA_OFI_UNEXPECTED_TAG_MSG=0 + pool: size: 93% + container: type: POSIX control_method: daos properties: rd_fac:2 + mdtest: client_processes: np: 4 diff --git a/src/tests/ftest/erasurecode/online_rebuild_single.yaml b/src/tests/ftest/erasurecode/online_rebuild_single.yaml index 781605d841b..e053eb98671 100644 --- a/src/tests/ftest/erasurecode/online_rebuild_single.yaml +++ b/src/tests/ftest/erasurecode/online_rebuild_single.yaml @@ -7,9 +7,12 @@ hosts: 12_server: test_servers: server-[1-6] test_clients: 1 + timeout: 1200 + setup: start_servers_once: False + server_config: name: daos_server engines_per_host: 2 @@ -17,26 +20,25 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31416 log_file: daos_server0.log storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31517 log_file: daos_server1.log storage: auto + pool: size: 93% pool_query_timeout: 30 properties: rd_fac:2 + container: type: POSIX single_data_set: # [object_qty, record_qty, dkey, akey, data_size] - [1, 1, 1, 1, 4194304] + objectclass: dfs_oclass_list: #- [EC_Object_Class, Minimum number of servers] diff --git a/src/tests/ftest/erasurecode/rank_failure.yaml b/src/tests/ftest/erasurecode/rank_failure.yaml index 51efe637095..f7f29738dc8 100644 --- a/src/tests/ftest/erasurecode/rank_failure.yaml +++ b/src/tests/ftest/erasurecode/rank_failure.yaml @@ -1,10 +1,13 @@ hosts: test_servers: 6 test_clients: 1 + timeout: 1800 + setup: start_agents_once: false start_servers_once: false + server_config: name: daos_server engines_per_host: 2 @@ -12,21 +15,19 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log log_mask: ERR storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log log_mask: ERR storage: auto + pool: scm_size: 8G + gen_io_conf: ranks: "11" targets: "4" diff --git a/src/tests/ftest/erasurecode/rebuild_disabled.yaml b/src/tests/ftest/erasurecode/rebuild_disabled.yaml index e970e043972..3cf796c63bf 100644 --- a/src/tests/ftest/erasurecode/rebuild_disabled.yaml +++ b/src/tests/ftest/erasurecode/rebuild_disabled.yaml @@ -7,11 +7,14 @@ hosts: 10_server: test_servers: server-[1-5] test_clients: 3 + timeout: 3500 + setup: # Test variants use different server counts, so ensure servers are stopped after each run start_agents_once: False start_servers_once: False + server_config: name: daos_server engines_per_host: 2 @@ -19,23 +22,22 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31416 log_file: daos_server0.log storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31517 log_file: daos_server1.log storage: auto + pool: size: 93% pool_query_timeout: 30 + container: type: POSIX control_method: daos + ior: api: "DFS" client_processes: diff --git a/src/tests/ftest/erasurecode/rebuild_disabled_single.yaml b/src/tests/ftest/erasurecode/rebuild_disabled_single.yaml index 006e75079fb..a91040c59e8 100644 --- a/src/tests/ftest/erasurecode/rebuild_disabled_single.yaml +++ b/src/tests/ftest/erasurecode/rebuild_disabled_single.yaml @@ -7,11 +7,14 @@ hosts: 10_server: test_servers: server-[1-5] test_clients: 1 + timeout: 400 + setup: # Test variants use different server counts, so ensure servers are stopped after each run start_agents_once: False start_servers_once: False + server_config: name: daos_server engines_per_host: 2 @@ -20,26 +23,25 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31416 log_file: daos_server0.log storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31517 log_file: daos_server1.log storage: auto + pool: size: 93% pool_query_timeout: 30 + container: type: POSIX control_method: daos single_data_set: # [object_qty, record_qty, dkey, akey, data_size] - [1, 1, 1, 1, 4194304] + objectclass: dfs_oclass_list: #- [EC_Object_Class, Minimum number of servers] diff --git a/src/tests/ftest/erasurecode/rebuild_fio.yaml b/src/tests/ftest/erasurecode/rebuild_fio.yaml index 6ec1a98faff..677e4f9a961 100644 --- a/src/tests/ftest/erasurecode/rebuild_fio.yaml +++ b/src/tests/ftest/erasurecode/rebuild_fio.yaml @@ -7,10 +7,13 @@ hosts: 12_server: test_servers: server-[1-6] test_clients: 1 + timeout: 1500 + setup: start_agents_once: False start_servers_once: False + server_config: name: daos_server engines_per_host: 2 @@ -18,8 +21,6 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31416 log_file: daos_server0.log log_mask: ERR targets: 2 @@ -27,16 +28,16 @@ server_config: 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31517 log_file: daos_server1.log log_mask: ERR targets: 2 storage: auto + pool: size: 60% aggregation_timeout: 180 set_logmasks: False + container: type: POSIX control_method: daos @@ -45,6 +46,7 @@ container: properties: rd_fac:1 rf2: properties: rd_fac:2 + fio: names: - test @@ -64,6 +66,7 @@ fio: randrw: rw: 'randrw' rw_read: 'randrw' + dfuse: mount_dir: "/tmp/daos_dfuse" disable_caching: True diff --git a/src/tests/ftest/erasurecode/restart.yaml b/src/tests/ftest/erasurecode/restart.yaml index 0adb0c937b4..d8cc0c1a844 100644 --- a/src/tests/ftest/erasurecode/restart.yaml +++ b/src/tests/ftest/erasurecode/restart.yaml @@ -1,10 +1,13 @@ hosts: test_servers: 5 test_clients: 3 + timeout: 1500 + setup: start_agents_once: False start_servers_once: False + server_config: name: daos_server engines_per_host: 2 @@ -12,26 +15,25 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log log_mask: ERR storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log log_mask: ERR storage: auto + pool: size: 40% svcn: 3 pool_query_timeout: 30 + container: type: POSIX control_method: daos + ior: api: "DFS" client_processes: @@ -53,5 +55,6 @@ ior: - ["EC_4P1GX", 6] - ["EC_4P2GX", 6] - ["EC_8P2GX", 10] + aggregation: threshold: "70%" diff --git a/src/tests/ftest/erasurecode/space_usage.yaml b/src/tests/ftest/erasurecode/space_usage.yaml index dc33af3aaec..e0ff34387eb 100644 --- a/src/tests/ftest/erasurecode/space_usage.yaml +++ b/src/tests/ftest/erasurecode/space_usage.yaml @@ -1,7 +1,9 @@ hosts: test_servers: 3 test_clients: 1 + timeout: 600 + server_config: name: daos_server engines_per_host: 2 @@ -9,25 +11,24 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log log_mask: ERR storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log log_mask: ERR storage: auto + pool: size: 95% properties: ec_cell_sz:128KiB,reclaim:disabled + container: type: POSIX control_method: daos + ior: &ior_base client_processes: ppn: 32 @@ -36,11 +37,14 @@ ior: &ior_base transfer_size: 32KiB # Partial update block_size: 16GiB # Aggregate 500 GiB flags: "-w -C -e -g -G 27 -k -Q 1 -v" + ior_ec_4p2gx: <<: *ior_base dfs_oclass: EC_4P2GX + ior_ec_4p1gx: <<: *ior_base dfs_oclass: EC_4P1GX + space_usage: max_diff_percent: 0.05 diff --git a/src/tests/ftest/erasurecode/truncate.yaml b/src/tests/ftest/erasurecode/truncate.yaml index 4cfe9433251..64a28c0ae5b 100644 --- a/src/tests/ftest/erasurecode/truncate.yaml +++ b/src/tests/ftest/erasurecode/truncate.yaml @@ -7,10 +7,13 @@ hosts: 10_server: test_servers: server-[1-5] test_clients: 1 + timeout: 300 + setup: start_agents_once: false start_servers_once: false + server_config: name: daos_server engines_per_host: 2 @@ -18,21 +21,19 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log log_mask: ERR storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log log_mask: ERR storage: auto + pool: size: 50% + container: type: POSIX control_method: daos @@ -41,6 +42,7 @@ container: properties: rd_fac:1 rf2: properties: rd_fac:2 + fio: names: - test @@ -54,6 +56,7 @@ fio: read_write: rw: 'write' truncate_size: '26214400' # 25Mb + dfuse: mount_dir: "/tmp/daos_dfuse" caching: !mux diff --git a/src/tests/ftest/fault_injection/ec.yaml b/src/tests/ftest/fault_injection/ec.yaml index ee607de4ae2..2a9b80c1b9c 100644 --- a/src/tests/ftest/fault_injection/ec.yaml +++ b/src/tests/ftest/fault_injection/ec.yaml @@ -1,10 +1,13 @@ hosts: test_servers: 7 test_clients: 1 + timeout: 300 + setup: start_agents_once: false start_servers_once: false + server_config: name: daos_server engines_per_host: 2 @@ -12,24 +15,23 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log storage: auto + pool: size: 93% properties: ec_cell_sz:64KiB + container: type: POSIX control_method: daos properties: cksum:crc16,cksum_size:16384,srv_cksum:on,rd_fac:2 + ior: api: "DFS" client_processes: @@ -46,6 +48,7 @@ ior: - "EC_2P2GX" - "EC_4P2GX" - "EC_8P2GX" + fio: names: - global @@ -62,8 +65,10 @@ fio: rw: 'rw' test: numjobs: 1 + dfuse: mount_dir: "/tmp/daos_dfuse" + faults: fault_list: - DAOS_CSUM_CORRUPT_FETCH diff --git a/src/tests/ftest/fault_injection/pool.yaml b/src/tests/ftest/fault_injection/pool.yaml index 5f1315f134b..a9a04d63ad7 100644 --- a/src/tests/ftest/fault_injection/pool.yaml +++ b/src/tests/ftest/fault_injection/pool.yaml @@ -12,16 +12,12 @@ server_config: pinned_numa_node: 0 targets: 4 nr_xs_helpers: 0 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log storage: auto 1: pinned_numa_node: 1 targets: 4 nr_xs_helpers: 0 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log storage: auto diff --git a/src/tests/ftest/io/io_consistency.yaml b/src/tests/ftest/io/io_consistency.yaml index d2fa53d73d2..43b783bacb2 100644 --- a/src/tests/ftest/io/io_consistency.yaml +++ b/src/tests/ftest/io/io_consistency.yaml @@ -1,7 +1,9 @@ hosts: test_servers: 2 test_clients: 2 + timeout: 210 + server_config: name: daos_server engines_per_host: 2 @@ -9,25 +11,24 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log log_mask: ERR storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log log_mask: ERR storage: auto + pool: scm_size: 5000000000 nvme_size: 20000000000 + container: type: POSIX control_method: daos + ior: client_processes: np: 10 @@ -51,5 +52,6 @@ ior: objectclass: SX: dfs_oclass: "SX" + dfuse: disable_caching: true diff --git a/src/tests/ftest/io/large_file_count.yaml b/src/tests/ftest/io/large_file_count.yaml index 6ff375cf3a9..3c7fe6499e0 100644 --- a/src/tests/ftest/io/large_file_count.yaml +++ b/src/tests/ftest/io/large_file_count.yaml @@ -1,7 +1,9 @@ hosts: test_servers: 5 test_clients: 3 + timeout: 5000 + server_config: name: daos_server engines_per_host: 2 @@ -9,22 +11,21 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log storage: auto + pool: size: 95% + container: type: POSIX control_method: daos + largefilecount: api: - DFS @@ -36,6 +37,7 @@ largefilecount: mdtest_oclass: # Run once with S1 and then with EC_16P2G1 - S1 - EC_2P1G1 + ior: np: 30 dfs_destroy: false @@ -46,6 +48,7 @@ ior: block_size: '7G' env_vars: - D_IL_REPORT=1 + dfuse: disable_caching: true diff --git a/src/tests/ftest/io/macsio_test.yaml b/src/tests/ftest/io/macsio_test.yaml index 663e4efc67c..ea3f5605577 100644 --- a/src/tests/ftest/io/macsio_test.yaml +++ b/src/tests/ftest/io/macsio_test.yaml @@ -1,7 +1,9 @@ hosts: test_servers: 1 test_clients: 3 + timeout: 120 + server_config: name: daos_server engines_per_host: 2 @@ -9,23 +11,22 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log storage: auto + pool: scm_size: 5G nvme_size: 10G + container: control_method: daos type: POSIX + macsio: interface: hdf5 parallel_file_mode: SIF 1 @@ -36,6 +37,7 @@ macsio: num_dumps: 2 debug_level: 1 processes: 6 + job_manager: !mux mpich: class_name: Mpirun diff --git a/src/tests/ftest/io/seg_count.yaml b/src/tests/ftest/io/seg_count.yaml index 8014a3c6711..7a2d5bd5c8e 100644 --- a/src/tests/ftest/io/seg_count.yaml +++ b/src/tests/ftest/io/seg_count.yaml @@ -1,7 +1,9 @@ hosts: test_servers: 4 test_clients: 4 + timeout: 2000 + server_config: name: daos_server engines_per_host: 2 @@ -9,22 +11,21 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log storage: auto + pool: size: 95% + container: type: POSIX control_method: daos + ior: client_processes: !mux slots_16: diff --git a/src/tests/ftest/io/small_file_count.yaml b/src/tests/ftest/io/small_file_count.yaml index 79e02c3d787..2a33a3ef934 100644 --- a/src/tests/ftest/io/small_file_count.yaml +++ b/src/tests/ftest/io/small_file_count.yaml @@ -1,7 +1,9 @@ hosts: test_servers: 3 test_clients: 1 + timeout: 750 + server_config: name: daos_server engines_per_host: 2 @@ -9,23 +11,22 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log storage: auto + pool: scm_size: 40G nvme_size: 300G + container: type: POSIX control_method: daos + largefilecount: api: - DFS @@ -37,6 +38,7 @@ largefilecount: mdtest_oclass: # Run once with S1 and then with EC_16P2G1 - S1 - EC_2P1G1 + ior: np: 30 dfs_destroy: false @@ -47,6 +49,7 @@ ior: block_size: '2G' env_vars: - D_IL_REPORT=1 + dfuse: disable_caching: true diff --git a/src/tests/ftest/io/unaligned_io.yaml b/src/tests/ftest/io/unaligned_io.yaml index bf1a6c19a85..3bcc450c8c5 100644 --- a/src/tests/ftest/io/unaligned_io.yaml +++ b/src/tests/ftest/io/unaligned_io.yaml @@ -1,6 +1,8 @@ hosts: test_servers: 4 + timeout: 900 + server_config: name: daos_server engines_per_host: 2 @@ -9,8 +11,6 @@ server_config: targets: 8 pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log log_mask: DEBUG,MEM=ERR env_vars: @@ -25,8 +25,6 @@ server_config: targets: 8 pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log log_mask: DEBUG,MEM=ERR env_vars: @@ -37,8 +35,10 @@ server_config: - D_LOG_FILE_APPEND_PID=1 - COVFILE=/tmp/test.cov storage: auto + pool: scm_size: 12G + datasize: sizes: - 20 diff --git a/src/tests/ftest/ior/crash.yaml b/src/tests/ftest/ior/crash.yaml index ba9579894d6..62c9c23f1f6 100644 --- a/src/tests/ftest/ior/crash.yaml +++ b/src/tests/ftest/ior/crash.yaml @@ -1,7 +1,9 @@ hosts: test_servers: 1 test_clients: 3 + timeout: 800 + server_config: name: daos_server engines_per_host: 2 @@ -9,24 +11,23 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log log_mask: ERR storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log log_mask: ERR storage: auto + pool: size: 90% + container: type: POSIX control_method: daos + ior: api: "DFS" client_processes: diff --git a/src/tests/ftest/ior/hard.yaml b/src/tests/ftest/ior/hard.yaml index e04de4c0414..71394243cff 100644 --- a/src/tests/ftest/ior/hard.yaml +++ b/src/tests/ftest/ior/hard.yaml @@ -1,7 +1,9 @@ hosts: test_servers: 5 test_clients: 3 + timeout: 1000 + server_config: name: daos_server engines_per_host: 2 @@ -9,8 +11,6 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log log_mask: ERR storage: @@ -21,8 +21,6 @@ server_config: 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log log_mask: ERR storage: @@ -30,12 +28,15 @@ server_config: class: dcpm scm_list: ["/dev/pmem1"] scm_mount: /mnt/daos1 + pool: scm_size: 500G + container: type: POSIX control_method: daos properties: dedup:memcmp + ior: client_processes: np: 48 diff --git a/src/tests/ftest/ior/hard_rebuild.yaml b/src/tests/ftest/ior/hard_rebuild.yaml index 54a89fc237f..32665ab18ad 100644 --- a/src/tests/ftest/ior/hard_rebuild.yaml +++ b/src/tests/ftest/ior/hard_rebuild.yaml @@ -7,10 +7,13 @@ hosts: 12_server: test_servers: server-[1-6] test_clients: 2 + timeout: 1000 + setup: start_agents_once: false start_servers_once: false + server_config: name: daos_server engines_per_host: 2 @@ -18,24 +21,23 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log log_mask: ERR storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log log_mask: ERR storage: auto + pool: size: 90% + container: type: POSIX control_method: daos + ior: api: "DFS" client_processes: diff --git a/src/tests/ftest/ior/intercept_multi_client.yaml b/src/tests/ftest/ior/intercept_multi_client.yaml index 4831c58ebb8..2fcf2a0454a 100644 --- a/src/tests/ftest/ior/intercept_multi_client.yaml +++ b/src/tests/ftest/ior/intercept_multi_client.yaml @@ -11,16 +11,12 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log log_mask: WARN storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log log_mask: WARN storage: auto diff --git a/src/tests/ftest/ior/small.yaml b/src/tests/ftest/ior/small.yaml index c9bc6d90ef8..9f5145debc8 100644 --- a/src/tests/ftest/ior/small.yaml +++ b/src/tests/ftest/ior/small.yaml @@ -15,15 +15,11 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log storage: auto transport_config: diff --git a/src/tests/ftest/mdtest/small.yaml b/src/tests/ftest/mdtest/small.yaml index 8fd080664f4..afa3efd2e3d 100644 --- a/src/tests/ftest/mdtest/small.yaml +++ b/src/tests/ftest/mdtest/small.yaml @@ -15,15 +15,11 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log storage: auto transport_config: diff --git a/src/tests/ftest/nvme/enospace.yaml b/src/tests/ftest/nvme/enospace.yaml index 43bdd6b787b..396ddee7cfc 100644 --- a/src/tests/ftest/nvme/enospace.yaml +++ b/src/tests/ftest/nvme/enospace.yaml @@ -1,9 +1,11 @@ hosts: test_servers: 2 test_clients: 2 + # Few tests are in loop for ~10 times and single loop # is taking ~600 seconds to finish so larger timeout to run tests. timeout: 7500 + server_config: name: daos_server engines_per_host: 2 @@ -11,25 +13,24 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 targets: 1 storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 targets: 1 storage: auto transport_config: allow_insecure: true + agent_config: transport_config: allow_insecure: true + dmg: transport_config: allow_insecure: true + pool: scm_size: 5G nvme_size: 5G @@ -43,6 +44,7 @@ container: control_method: daos register_cleanup: False # Skip teardown destroy. Test manually destroys containers. type: POSIX + ior: api: "DFS" client_processes: diff --git a/src/tests/ftest/nvme/fragmentation.yaml b/src/tests/ftest/nvme/fragmentation.yaml index 835a0d70ac3..279f2c50498 100644 --- a/src/tests/ftest/nvme/fragmentation.yaml +++ b/src/tests/ftest/nvme/fragmentation.yaml @@ -1,7 +1,9 @@ hosts: test_servers: 2 test_clients: 2 + timeout: 6000 + server_config: name: daos_server engines_per_host: 2 @@ -9,22 +11,21 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log storage: auto + pool: size: 95% + container: type: POSIX control_method: daos + ior: num_repeat: 30 num_parallel_job: 10 diff --git a/src/tests/ftest/nvme/health.yaml b/src/tests/ftest/nvme/health.yaml index 5eb9ea7f3e3..e8827d15070 100644 --- a/src/tests/ftest/nvme/health.yaml +++ b/src/tests/ftest/nvme/health.yaml @@ -1,7 +1,9 @@ hosts: test_servers: 2 test_clients: 1 + timeout: 900 + server_config: name: daos_server engines_per_host: 2 @@ -10,26 +12,25 @@ server_config: targets: 8 pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log storage: auto 1: targets: 8 pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log storage: auto transport_config: allow_insecure: True + agent_config: transport_config: allow_insecure: True + dmg: transport_config: allow_insecure: True + pool: max_num_pools: 40 total_pool_percentage: 95 diff --git a/src/tests/ftest/nvme/io_verification.yaml b/src/tests/ftest/nvme/io_verification.yaml index a183f319cd2..48fbb787825 100644 --- a/src/tests/ftest/nvme/io_verification.yaml +++ b/src/tests/ftest/nvme/io_verification.yaml @@ -1,7 +1,9 @@ hosts: test_servers: 3 test_clients: 1 + timeout: 4000 + server_config: name: daos_server engines_per_host: 2 @@ -9,30 +11,33 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log storage: auto + pool: num_pools: 4 + pool_0: size: 20% + pool_1: size: 30% + pool_2: size: 50% + pool_3: size: 60% + container: type: POSIX control_method: daos + ior: client_processes: np: 16 diff --git a/src/tests/ftest/nvme/object.yaml b/src/tests/ftest/nvme/object.yaml index 9df02748652..6fdac0a0a41 100644 --- a/src/tests/ftest/nvme/object.yaml +++ b/src/tests/ftest/nvme/object.yaml @@ -1,9 +1,11 @@ hosts: test_servers: 3 test_clients: 1 + timeouts: test_nvme_object_single_pool: 270 test_nvme_object_multiple_pools: 16000 + server_config: name: daos_server engines_per_host: 2 @@ -11,26 +13,26 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log storage: auto + pool_1: scm_size: 4GB nvme_size: 20G + pool_2: scm_size: 4GB nvme_size: 100GB + pool_3: scm_size: 4GB nvme_size: 350GB + container: object_qty: 10 record_size: diff --git a/src/tests/ftest/nvme/pool_exclude.yaml b/src/tests/ftest/nvme/pool_exclude.yaml index 1b576c018d7..f08fd7f24d4 100644 --- a/src/tests/ftest/nvme/pool_exclude.yaml +++ b/src/tests/ftest/nvme/pool_exclude.yaml @@ -1,13 +1,16 @@ hosts: test_servers: 5 test_clients: 3 + # If we define the server under test_servers, launch.py will convert it to the # actual server name passed into --test_servers. If above test_servers is hosts, # it'll be used as one of the servers at test startup time, so use something # other than hosts. timeout: 1000 + setup: start_servers_once: false + server_config: name: daos_server engines_per_host: 2 @@ -15,29 +18,28 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31416 log_file: daos_server0.log log_mask: ERR storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31516 log_file: daos_server1.log log_mask: ERR storage: auto + pool: scm_size: 50000000000 nvme_size: 300000000000 svcn: 4 rebuild_timeout: 180 pool_query_timeout: 30 + container: type: POSIX control_method: daos properties: cksum:crc16,cksum_size:16384,srv_cksum:on,rd_fac:2 + ior_flags_common: &ior_flags_common write_flags: "-w -F -k -G 1" read_flags: "-F -r -R -k -G 1" @@ -49,6 +51,7 @@ ior_flags_common: &ior_flags_common - ["NA", "NA", 4000, 50000000] # [4K, 51M] - ["NA", "NA", 1000000, 500000000] # [1M, 512M] - ["NA", "NA", 1000000000, 8000000000] # [1G, 8G] + ior: client_processes: np: 48 @@ -68,11 +71,15 @@ ior: <<: *ior_flags_common dfs_oclass: EC_8P2GX dfs_dir_oclass: EC_8P2GX + loop_test: iterations: 2 + aggregation: test_with_aggregation: true + rebuild: test_with_rebuild: true + checksum: test_with_checksum: false diff --git a/src/tests/ftest/nvme/pool_extend.yaml b/src/tests/ftest/nvme/pool_extend.yaml index 5664bb725d2..62744c5d5e5 100644 --- a/src/tests/ftest/nvme/pool_extend.yaml +++ b/src/tests/ftest/nvme/pool_extend.yaml @@ -1,15 +1,19 @@ hosts: test_servers: server-[1-2] test_clients: 2 + # If we define the server under test_servers, launch.py will convert it to the # actual server name passed into --test_servers. If above test_servers is hosts, # it'll be used as one of the servers at test startup time, so use something # other than hosts. extra_servers: test_servers: server-[3-5] + timeout: 1000 + setup: start_servers_once: false + server_config: name: daos_server engines_per_host: 2 @@ -17,31 +21,33 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31416 log_file: daos_server0.log storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31516 log_file: daos_server1.log storage: auto + pool: svcn: 4 rebuild_timeout: 120 pool_query_timeout: 30 + pool_qty_1: size: "50%" + pool_qty_2: size: "25%" + pool_qty_3: size: "16%" + container: type: POSIX control_method: daos properties: cksum:crc16,cksum_size:16384,srv_cksum:on,rd_fac:1 + ior_flags_common: &ior_flags_common write_flags: "-w -F -k -G 1" read_flags: "-F -r -R -k -G 1" @@ -53,6 +59,7 @@ ior_flags_common: &ior_flags_common - ["NA", "NA", 4000, 5000000] # [4K, 5.1M] - ["NA", "NA", 1000000, 500000000] # [1M, 512M] - ["NA", "NA", 1000000000, 8000000000] # [1G, 8G] + ior: client_processes: np: 48 @@ -68,11 +75,15 @@ ior: <<: *ior_flags_common dfs_oclass: EC_2P1GX dfs_dir_oclass: EC_2P1GX + loop_test: iterations: 3 + aggregation: test_with_aggregation: true + rebuild: test_with_rebuild: true + checksum: test_with_checksum: false diff --git a/src/tests/ftest/osa/dmg_negative_test.yaml b/src/tests/ftest/osa/dmg_negative_test.yaml index b2adbe6584f..f9e943defd6 100644 --- a/src/tests/ftest/osa/dmg_negative_test.yaml +++ b/src/tests/ftest/osa/dmg_negative_test.yaml @@ -1,14 +1,18 @@ hosts: test_servers: server-[1-2] test_clients: 1 + # If we define the server under test_servers, launch.py will convert it to the # actual server name passed into --test_servers. If above test_servers is hosts, # it'll be used as one of the servers at test startup time, so use something # other than hosts. extra_servers: test_servers: server-3 + timeout: 1800 + skip_add_log_msg: true + server_config: name: daos_server engines_per_host: 2 @@ -16,8 +20,6 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31416 log_file: daos_server0.log log_mask: DEBUG,MEM=ERR env_vars: @@ -26,19 +28,20 @@ server_config: 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31516 log_file: daos_server1.log log_mask: DEBUG,MEM=ERR env_vars: - DD_MASK=mgmt,md,rebuild storage: auto + pool: scm_size: 6000000000 nvme_size: 54000000000 svcn: 4 + container: properties: cksum:crc16,cksum_size:16384 + test_sequence: dmg_cmd_test: # Rank list, target_list, Pass/Fail Condition diff --git a/src/tests/ftest/osa/offline_drain.yaml b/src/tests/ftest/osa/offline_drain.yaml index a9fccca5aee..79500c0abac 100644 --- a/src/tests/ftest/osa/offline_drain.yaml +++ b/src/tests/ftest/osa/offline_drain.yaml @@ -1,10 +1,14 @@ hosts: test_servers: 3 test_clients: 1 + timeout: 2400 + setup: start_servers_once: false + skip_add_log_msg: true + server_config: name: daos_server engines_per_host: 2 @@ -12,8 +16,6 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31416 log_file: daos_server0.log log_mask: INFO,MEM=ERR env_vars: @@ -22,36 +24,40 @@ server_config: 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31516 log_file: daos_server1.log log_mask: INFO,MEM=ERR env_vars: - DD_MASK=mgmt,md storage: auto + pool: scm_size: 12000000000 nvme_size: 108000000000 svcn: 4 rebuild_timeout: 240 properties: scrub:timed + container: type: POSIX control_method: daos oclass: RP_3G6 properties: cksum:crc64,cksum_size:16384,srv_cksum:on,rd_fac:2 + dkeys: single: no_of_dkeys: - 50 + akeys: single: no_of_akeys: - 10 + record: 1KB: length: - 1024 + ior: clientslots: slots: 48 @@ -69,6 +75,7 @@ ior: # The values are set to be in the multiples of 10. # Values are appx GB. - [12000000000, 108000000000, 500000, 500000000] + mdtest: api: DFS client_processes: @@ -87,19 +94,25 @@ mdtest: read_bytes: 32768 verbosity_value: 1 depth: 0 + test_obj_class: oclass: - RP_2G8 - RP_4G1 - EC_2P1G1 + aggregation: test_with_aggregation: true + rebuild: test_with_rebuild: true + checksum: test_with_checksum: false + snapshot: test_with_snapshot: true + pool_capacity: pool_fillup: 10 pool_test_oclass: RP_2GX diff --git a/src/tests/ftest/osa/offline_extend.yaml b/src/tests/ftest/osa/offline_extend.yaml index 599d514db37..90b78c7a962 100644 --- a/src/tests/ftest/osa/offline_extend.yaml +++ b/src/tests/ftest/osa/offline_extend.yaml @@ -1,12 +1,17 @@ hosts: test_servers: server-[1-2] test_clients: 1 + extra_servers: test_servers: server-3 + timeout: 1100 + setup: start_servers_once: false + skip_add_log_msg: true + server_config: name: daos_server engines_per_host: 2 @@ -14,8 +19,6 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31416 log_file: daos_server0.log log_mask: DEBUG,MEM=ERR env_vars: @@ -24,23 +27,24 @@ server_config: 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31516 log_file: daos_server1.log log_mask: DEBUG,MEM=ERR env_vars: - DD_MASK=mgmt,md,rebuild storage: auto + pool: scm_size: 6000000000 nvme_size: 54000000000 svcn: 2 properties: scrub:lazy + container: type: POSIX control_method: daos oclass: RP_2G1 properties: cksum:crc64,cksum_size:16384,srv_cksum:on,rd_fac:1 + ior: clientslots: slots: 48 @@ -58,6 +62,7 @@ ior: # The values are set to be in the multiples of 10. # Values are appx GB. - [6000000000, 54000000000, 500000, 500000000] + mdtest: api: DFS client_processes: @@ -76,19 +81,26 @@ mdtest: read_bytes: 32768 verbosity_value: 1 depth: 0 + test_obj_class: oclass: - RP_2G8 - EC_2P1G1 + loop_test: iterations: 3 + aggregation: test_with_aggregation: true + rebuild: test_with_rebuild: true + checksum: test_with_checksum: false + snapshot: test_with_snapshot: true + test_ranks: rank_list: ["4", "5"] diff --git a/src/tests/ftest/osa/offline_parallel_test.yaml b/src/tests/ftest/osa/offline_parallel_test.yaml index e8dd4563339..b17bf53705f 100644 --- a/src/tests/ftest/osa/offline_parallel_test.yaml +++ b/src/tests/ftest/osa/offline_parallel_test.yaml @@ -1,12 +1,17 @@ hosts: test_servers: server-[1-2] test_clients: 1 + extra_servers: test_servers: server-3 + timeout: 700 + setup: start_servers_once: false + skip_add_log_msg: true + server_config: name: daos_server engines_per_host: 2 @@ -14,8 +19,6 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31416 log_file: daos_server0.log log_mask: DEBUG,MEM=ERR env_vars: @@ -24,22 +27,23 @@ server_config: 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31516 log_file: daos_server1.log log_mask: DEBUG,MEM=ERR env_vars: - DD_MASK=mgmt,md,rebuild storage: auto + pool: scm_size: 6000000000 nvme_size: 54000000000 svcn: 4 + container: type: POSIX control_method: daos oclass: RP_2G8 properties: cksum:crc64,cksum_size:16384,srv_cksum:on,rd_fac:1 + ior: clientslots: slots: 48 @@ -57,6 +61,7 @@ ior: # The values are set to be in the multiples of 10. # Values are appx GB. - [6000000000, 54000000000, 500000, 500000000] + mdtest: api: DFS client_processes: @@ -75,16 +80,22 @@ mdtest: read_bytes: 32768 verbosity_value: 1 depth: 0 + test_obj_class: oclass: - RP_2G8 + loop_test: iterations: 3 + aggregation: test_with_aggregation: true + rebuild: test_with_rebuild: true + checksum: test_with_checksum: false + system_stop_start: flags: true diff --git a/src/tests/ftest/osa/offline_reintegration.yaml b/src/tests/ftest/osa/offline_reintegration.yaml index 359c7bab9b6..ae8874d37d8 100644 --- a/src/tests/ftest/osa/offline_reintegration.yaml +++ b/src/tests/ftest/osa/offline_reintegration.yaml @@ -1,12 +1,17 @@ hosts: test_servers: 3 test_clients: 1 + timeout: 1300 + timeouts: test_osa_offline_reintegrate_with_less_pool_space: 1800 + setup: start_servers_once: false + skip_add_log_msg: true + server_config: name: daos_server engines_per_host: 2 @@ -14,8 +19,6 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31416 log_file: daos_server0.log log_mask: DEBUG,MEM=ERR env_vars: @@ -24,8 +27,6 @@ server_config: 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31516 log_file: daos_server1.log log_mask: DEBUG,MEM=ERR env_vars: @@ -33,12 +34,15 @@ server_config: storage: auto transport_config: allow_insecure: true + agent_config: transport_config: allow_insecure: true + dmg: transport_config: allow_insecure: true + pool: scm_size: 6000000000 nvme_size: 54000000000 @@ -46,11 +50,13 @@ pool: rebuild_timeout: 240 pool_query_timeout: 30 properties: scrub:timed,scrub_freq:1 + container: type: POSIX control_method: daos oclass: RP_3G6 properties: cksum:crc64,cksum_size:16384,srv_cksum:on,rd_fac:2 + ior: clientslots: slots: 48 @@ -68,6 +74,7 @@ ior: # The values are set to be in the multiples of 10. # Values are appx GB. - [6000000000, 54000000000, 500000, 500000000] + mdtest: api: DFS client_processes: @@ -86,24 +93,33 @@ mdtest: read_bytes: 32768 verbosity_value: 1 depth: 0 + test_obj_class: oclass: - RP_4G1 - EC_2P2G1 + loop_test: iterations: 3 + aggregation: test_with_aggregation: true + rebuild: test_with_rebuild: true + checksum: test_with_checksum: false + test_rf: test_with_rf: true + blank_node: test_with_blank_node: true + snapshot: test_with_snapshot: true + pool_capacity: pool_fillup: 10 pool_test_oclass: RP_2GX diff --git a/src/tests/ftest/osa/online_drain.yaml b/src/tests/ftest/osa/online_drain.yaml index 738683694b1..9738816b799 100644 --- a/src/tests/ftest/osa/online_drain.yaml +++ b/src/tests/ftest/osa/online_drain.yaml @@ -1,12 +1,17 @@ hosts: test_servers: 3 test_clients: 1 + timeout: 1000 + job_manager: manager_timeout: 300 + setup: start_servers_once: false + skip_add_log_msg: true + server_config: name: daos_server engines_per_host: 2 @@ -14,8 +19,6 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31416 log_file: daos_server0.log log_mask: DEBUG,MEM=ERR env_vars: @@ -24,24 +27,25 @@ server_config: 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31516 log_file: daos_server1.log log_mask: DEBUG,MEM=ERR env_vars: - DD_MASK=mgmt,md,rebuild storage: auto + pool: scm_size: 12G nvme_size: 108G rebuild_timeout: 120 pool_query_timeout: 30 properties: scrub:timed,scrub_freq:1 + container: type: POSIX control_method: daos properties: cksum:crc16,cksum_size:16384,srv_cksum:on,rd_fac:1 oclass: RP_2G4 + ior: client_processes: np: 2 @@ -59,6 +63,7 @@ ior: # The values are set to be in the multiples of 10. # Values are appx GB. - [4000000000, 18000000000, 40000, 500000000, PASS] # [4G, 18G, 40K, 510M, PASS] + mdtest: api: DFS client_processes: @@ -77,16 +82,21 @@ mdtest: read_bytes: 32768 verbosity_value: 1 depth: 0 + test_obj_class: oclass: - RP_3G6 - RP_4G1 - S1 + loop_test: iterations: 3 + aggregation: test_with_aggregation: true + rebuild: test_with_rebuild: true + checksum: test_with_checksum: false diff --git a/src/tests/ftest/osa/online_extend.yaml b/src/tests/ftest/osa/online_extend.yaml index 0d2db08dcbd..1522c357db9 100644 --- a/src/tests/ftest/osa/online_extend.yaml +++ b/src/tests/ftest/osa/online_extend.yaml @@ -1,18 +1,24 @@ hosts: test_servers: server-[1-2] test_clients: 1 + # If we define the server under test_servers, launch.py will convert it to the # actual server name passed into --test_servers. If above test_servers is hosts, # it'll be used as one of the servers at test startup time, so use something # other than hosts. extra_servers: test_servers: server-3 + timeout: 1000 + job_manager: manager_timeout: 330 + setup: start_servers_once: false + skip_add_log_msg: true + server_config: name: daos_server engines_per_host: 2 @@ -20,8 +26,6 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31416 log_file: daos_server0.log log_mask: DEBUG,MEM=ERR env_vars: @@ -30,13 +34,12 @@ server_config: 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31516 log_file: daos_server1.log log_mask: DEBUG,MEM=ERR env_vars: - DD_MASK=mgmt,md,rebuild storage: auto + pool: scm_size: 12000000000 nvme_size: 108000000000 @@ -44,11 +47,13 @@ pool: rebuild_timeout: 120 pool_query_timeout: 30 properties: scrub:lazy + container: type: POSIX control_method: daos properties: cksum:crc16,cksum_size:16384,srv_cksum:on,rd_fac:1 oclass: RP_2G1 + ior: client_processes: np: 2 @@ -66,6 +71,7 @@ ior: # The values are set to be in the multiples of 10. # Values are appx GB. - [4000000000, 18000000000, 40000, 500000000, PASS] # [4G, 18G, 40K, 510M, PASS] + mdtest: api: DFS client_processes: @@ -84,19 +90,26 @@ mdtest: read_bytes: 32768 verbosity_value: 1 depth: 0 + daos_racer: runtime: 480 clush_timeout: 1000 + test_obj_class: oclass: - S1 + loop_test: iterations: 3 + aggregation: test_with_aggregation: true + rebuild: test_with_rebuild: true + checksum: test_with_checksum: false + test_ranks: rank_list: ["4,5"] diff --git a/src/tests/ftest/osa/online_parallel_test.yaml b/src/tests/ftest/osa/online_parallel_test.yaml index 63668dfca69..4eedc63838f 100644 --- a/src/tests/ftest/osa/online_parallel_test.yaml +++ b/src/tests/ftest/osa/online_parallel_test.yaml @@ -1,10 +1,14 @@ hosts: test_servers: 3 test_clients: 1 + timeout: 1110 + job_manager: manager_timeout: 400 + skip_add_log_msg: true + server_config: name: daos_server engines_per_host: 2 @@ -12,8 +16,6 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31416 log_file: daos_server0.log log_mask: DEBUG,MEM=ERR env_vars: @@ -22,22 +24,23 @@ server_config: 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31516 log_file: daos_server1.log log_mask: DEBUG,MEM=ERR env_vars: - DD_MASK=mgmt,md,rebuild storage: auto + pool: scm_size: 12000000000 nvme_size: 108000000000 svcn: 4 + container: type: POSIX control_method: daos properties: cksum:crc16,cksum_size:16384,srv_cksum:on oclass: RP_2G1 + ior: no_parallel_job: 2 client_processes: @@ -57,6 +60,7 @@ ior: # The values are set to be in the multiples of 10. # Values are appx GB. - [4000000000, 18000000000, 4000, 50000000, PASS] # [4G, 18G, 4K, 51M, PASS] + daos_racer: runtime: 480 clush_timeout: 1000 diff --git a/src/tests/ftest/osa/online_reintegration.yaml b/src/tests/ftest/osa/online_reintegration.yaml index 79b313604be..b5e6f7f7309 100644 --- a/src/tests/ftest/osa/online_reintegration.yaml +++ b/src/tests/ftest/osa/online_reintegration.yaml @@ -1,12 +1,17 @@ hosts: test_servers: 3 test_clients: 1 + timeout: 1110 + job_manager: manager_timeout: 300 + setup: start_servers_once: false + skip_add_log_msg: true + server_config: name: daos_server engines_per_host: 2 @@ -14,8 +19,6 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31416 log_file: daos_server0.log log_mask: DEBUG,MEM=ERR env_vars: @@ -24,13 +27,12 @@ server_config: 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31516 log_file: daos_server1.log log_mask: DEBUG,MEM=ERR env_vars: - DD_MASK=mgmt,md,rebuild storage: auto + pool: scm_size: 12000000000 nvme_size: 108000000000 @@ -38,11 +40,13 @@ pool: rebuild_timeout: 120 pool_query_timeout: 30 properties: scrub:timed + container: type: POSIX control_method: daos properties: cksum:crc16,cksum_size:16384,srv_cksum:on,rd_fac:1 oclass: RP_2G1 + ior: client_processes: np: 2 @@ -60,9 +64,11 @@ ior: # The values are set to be in the multiples of 10. # Values are appx GB. - [4000000000, 18000000000, 40000, 500000000, PASS] # [4G, 18G, 40K, 510M, PASS] + daos_racer: runtime: 480 clush_timeout: 1000 + mdtest: api: DFS client_processes: @@ -81,15 +87,20 @@ mdtest: read_bytes: 32768 verbosity_value: 1 depth: 0 + test_obj_class: oclass: - RP_3G6 - RP_4G1 + loop_test: iterations: 3 + aggregation: test_with_aggregation: true + rebuild: test_with_rebuild: true + checksum: test_with_checksum: false diff --git a/src/tests/ftest/performance/ior_easy.yaml b/src/tests/ftest/performance/ior_easy.yaml index b846b179277..0b515361fa2 100644 --- a/src/tests/ftest/performance/ior_easy.yaml +++ b/src/tests/ftest/performance/ior_easy.yaml @@ -12,16 +12,12 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log log_mask: ERR storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log log_mask: ERR storage: auto diff --git a/src/tests/ftest/performance/ior_hard.yaml b/src/tests/ftest/performance/ior_hard.yaml index 91b897dbb0b..fb09a7514a5 100644 --- a/src/tests/ftest/performance/ior_hard.yaml +++ b/src/tests/ftest/performance/ior_hard.yaml @@ -12,16 +12,12 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log log_mask: ERR storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log log_mask: ERR storage: auto diff --git a/src/tests/ftest/performance/mdtest_easy.yaml b/src/tests/ftest/performance/mdtest_easy.yaml index d2925536b79..02070bbcbaa 100644 --- a/src/tests/ftest/performance/mdtest_easy.yaml +++ b/src/tests/ftest/performance/mdtest_easy.yaml @@ -12,16 +12,12 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log log_mask: ERR storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log log_mask: ERR storage: auto diff --git a/src/tests/ftest/performance/mdtest_hard.yaml b/src/tests/ftest/performance/mdtest_hard.yaml index 0599ea61319..949cfb93111 100644 --- a/src/tests/ftest/performance/mdtest_hard.yaml +++ b/src/tests/ftest/performance/mdtest_hard.yaml @@ -12,16 +12,12 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log log_mask: ERR storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log log_mask: ERR storage: auto diff --git a/src/tests/ftest/pool/create.yaml b/src/tests/ftest/pool/create.yaml index a1e7f0d30c9..c3e06b86211 100644 --- a/src/tests/ftest/pool/create.yaml +++ b/src/tests/ftest/pool/create.yaml @@ -1,11 +1,13 @@ hosts: test_servers: 3 test_clients: 1 + timeouts: test_create_max_pool_scm_only: 180 test_create_max_pool: 300 test_create_no_space: 300 test_create_no_space_loop: 3500 + server_config: name: daos_server engines_per_host: 2 @@ -13,21 +15,19 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log storage: auto + pool_1: scm_size: 1 svcn: 1 quantity: 1 + pool_2: size: 90% svcn: 1 diff --git a/src/tests/ftest/pool/create_all_hw.yaml b/src/tests/ftest/pool/create_all_hw.yaml index b47802955d6..c4fcf5cb770 100644 --- a/src/tests/ftest/pool/create_all_hw.yaml +++ b/src/tests/ftest/pool/create_all_hw.yaml @@ -44,8 +44,6 @@ server_config: engines: 0: pinned_numa_node: 0 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log storage: 0: @@ -57,8 +55,6 @@ server_config: bdev_list: ["aaaa:aa:aa.a"] 1: pinned_numa_node: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log storage: 0: diff --git a/src/tests/ftest/pool/create_capacity.yaml b/src/tests/ftest/pool/create_capacity.yaml index 6d8e9b359b6..06fa03b82c8 100644 --- a/src/tests/ftest/pool/create_capacity.yaml +++ b/src/tests/ftest/pool/create_capacity.yaml @@ -14,8 +14,6 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log log_mask: DEBUG targets: 1 @@ -28,8 +26,6 @@ server_config: 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log log_mask: DEBUG targets: 1 diff --git a/src/tests/ftest/pool/target_query.yaml b/src/tests/ftest/pool/target_query.yaml index e9e4fc53a7d..d146f50c38f 100644 --- a/src/tests/ftest/pool/target_query.yaml +++ b/src/tests/ftest/pool/target_query.yaml @@ -1,7 +1,9 @@ hosts: test_servers: 5 test_clients: 1 + timeout: 300 + server_config: name: daos_server engines_per_host: 2 @@ -9,26 +11,25 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31416 log_file: daos_server0.log targets: 2 storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31517 log_file: daos_server1.log targets: 2 storage: auto + pool: scm_size: 2G nvme_size: 16G pool_query_timeout: 30 + container: type: POSIX control_method: daos + ior: api: "DFS" client_processes: diff --git a/src/tests/ftest/rebuild/container_create_race.yaml b/src/tests/ftest/rebuild/container_create_race.yaml index 19876bb6cdb..45887acdc7c 100644 --- a/src/tests/ftest/rebuild/container_create_race.yaml +++ b/src/tests/ftest/rebuild/container_create_race.yaml @@ -13,16 +13,12 @@ server_config: targets: 2 pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log storage: auto 1: targets: 2 pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log storage: auto diff --git a/src/tests/ftest/rebuild/continues_after_stop.yaml b/src/tests/ftest/rebuild/continues_after_stop.yaml index 5f332674687..85861c95d1a 100644 --- a/src/tests/ftest/rebuild/continues_after_stop.yaml +++ b/src/tests/ftest/rebuild/continues_after_stop.yaml @@ -11,15 +11,11 @@ server_config: 0: targets: 4 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server_0.log storage: auto 1: targets: 4 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server_1.log storage: auto diff --git a/src/tests/ftest/rebuild/mdtest.yaml b/src/tests/ftest/rebuild/mdtest.yaml index 37022e4e44f..07d11c38d12 100644 --- a/src/tests/ftest/rebuild/mdtest.yaml +++ b/src/tests/ftest/rebuild/mdtest.yaml @@ -12,8 +12,6 @@ server_config: targets: 8 pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log log_mask: INFO storage: auto @@ -21,8 +19,6 @@ server_config: targets: 8 pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log log_mask: INFO storage: auto diff --git a/src/tests/ftest/rebuild/no_cap.yaml b/src/tests/ftest/rebuild/no_cap.yaml index 5ab0b089a8a..2f4f287a5a8 100644 --- a/src/tests/ftest/rebuild/no_cap.yaml +++ b/src/tests/ftest/rebuild/no_cap.yaml @@ -3,7 +3,9 @@ hosts: test_servers: 2 test_clients: 1 + timeout: 360 + server_config: name: daos_server # reduce cart timeout to make IV update return timeout @@ -15,26 +17,25 @@ server_config: targets: 1 pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log storage: auto 1: targets: 1 pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log storage: auto + container: control_method: daos properties: "rd_fac:1" + pool: scm_size: 1G pool_query_timeout: 30 pool_query_interval: 1 test_data_list: [1048576] oclass: "OC_RP_4G1" + rebuild: rank_to_kill: 1 diff --git a/src/tests/ftest/rebuild/pool_destroy_race.yaml b/src/tests/ftest/rebuild/pool_destroy_race.yaml index f1a43623dbf..987332712e8 100644 --- a/src/tests/ftest/rebuild/pool_destroy_race.yaml +++ b/src/tests/ftest/rebuild/pool_destroy_race.yaml @@ -14,16 +14,12 @@ server_config: targets: 4 pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log storage: auto 1: targets: 4 pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log storage: auto diff --git a/src/tests/ftest/rebuild/with_ior.yaml b/src/tests/ftest/rebuild/with_ior.yaml index 963180310ea..34c6f2d9eda 100644 --- a/src/tests/ftest/rebuild/with_ior.yaml +++ b/src/tests/ftest/rebuild/with_ior.yaml @@ -3,9 +3,11 @@ hosts: test_clients: 1 timeout: 360 + agent_config: #cache_expiration: 1 disable_caching: true + server_config: name: daos_server engines_per_host: 2 @@ -14,8 +16,6 @@ server_config: targets: 2 pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log log_mask: INFO storage: auto @@ -23,8 +23,6 @@ server_config: targets: 2 pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log log_mask: INFO storage: auto diff --git a/src/tests/ftest/recovery/cat_recov_core.yaml b/src/tests/ftest/recovery/cat_recov_core.yaml index 0e148ae1f2c..039ab7b130a 100644 --- a/src/tests/ftest/recovery/cat_recov_core.yaml +++ b/src/tests/ftest/recovery/cat_recov_core.yaml @@ -10,8 +10,6 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 0 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log log_mask: DEBUG,MEM=ERR env_vars: @@ -26,8 +24,6 @@ server_config: 1: pinned_numa_node: 1 nr_xs_helpers: 0 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log log_mask: DEBUG,MEM=ERR env_vars: diff --git a/src/tests/ftest/recovery/check_policy.yaml b/src/tests/ftest/recovery/check_policy.yaml index 12e26ea6686..55d0111c106 100644 --- a/src/tests/ftest/recovery/check_policy.yaml +++ b/src/tests/ftest/recovery/check_policy.yaml @@ -8,14 +8,10 @@ server_config: engines_per_host: 2 engines: 0: - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log nr_xs_helpers: 1 storage: auto 1: - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log nr_xs_helpers: 1 storage: auto diff --git a/src/tests/ftest/recovery/check_repair.yaml b/src/tests/ftest/recovery/check_repair.yaml index 65141559ced..f162fc0a68c 100644 --- a/src/tests/ftest/recovery/check_repair.yaml +++ b/src/tests/ftest/recovery/check_repair.yaml @@ -8,14 +8,10 @@ server_config: engines_per_host: 2 engines: 0: - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log nr_xs_helpers: 1 storage: auto 1: - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log nr_xs_helpers: 1 storage: auto diff --git a/src/tests/ftest/recovery/check_start_corner_case.yaml b/src/tests/ftest/recovery/check_start_corner_case.yaml index 2863ca4e4f0..d4ba8437916 100644 --- a/src/tests/ftest/recovery/check_start_corner_case.yaml +++ b/src/tests/ftest/recovery/check_start_corner_case.yaml @@ -9,14 +9,10 @@ server_config: engines_per_host: 2 engines: 0: - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log nr_xs_helpers: 1 storage: auto 1: - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log nr_xs_helpers: 1 storage: auto diff --git a/src/tests/ftest/recovery/check_start_options.yaml b/src/tests/ftest/recovery/check_start_options.yaml index 3d1825a81ed..857c79dd527 100644 --- a/src/tests/ftest/recovery/check_start_options.yaml +++ b/src/tests/ftest/recovery/check_start_options.yaml @@ -12,8 +12,6 @@ server_config: engines_per_host: 1 engines: 0: - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log nr_xs_helpers: 1 storage: auto diff --git a/src/tests/ftest/recovery/check_stop.yaml b/src/tests/ftest/recovery/check_stop.yaml index 2863ca4e4f0..d4ba8437916 100644 --- a/src/tests/ftest/recovery/check_stop.yaml +++ b/src/tests/ftest/recovery/check_stop.yaml @@ -9,14 +9,10 @@ server_config: engines_per_host: 2 engines: 0: - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log nr_xs_helpers: 1 storage: auto 1: - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log nr_xs_helpers: 1 storage: auto diff --git a/src/tests/ftest/recovery/ms_membership.yaml b/src/tests/ftest/recovery/ms_membership.yaml index db567fddca8..04665b2b39c 100644 --- a/src/tests/ftest/recovery/ms_membership.yaml +++ b/src/tests/ftest/recovery/ms_membership.yaml @@ -9,14 +9,10 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log storage: auto diff --git a/src/tests/ftest/recovery/pool_list_consolidation.yaml b/src/tests/ftest/recovery/pool_list_consolidation.yaml index 0e37358b66c..3abd589e99f 100644 --- a/src/tests/ftest/recovery/pool_list_consolidation.yaml +++ b/src/tests/ftest/recovery/pool_list_consolidation.yaml @@ -9,14 +9,10 @@ server_config: engines_per_host: 2 engines: 0: - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log nr_xs_helpers: 1 storage: auto 1: - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log nr_xs_helpers: 1 storage: auto diff --git a/src/tests/ftest/recovery/pool_membership.yaml b/src/tests/ftest/recovery/pool_membership.yaml index e51e12f24b1..39a990dddb3 100644 --- a/src/tests/ftest/recovery/pool_membership.yaml +++ b/src/tests/ftest/recovery/pool_membership.yaml @@ -9,17 +9,13 @@ server_config: engines: 0: storage: auto - fabric_iface_port: 31416 log_file: daos_server_0.log pinned_numa_node: 0 - fabric_iface: ib0 targets: 8 1: storage: auto - fabric_iface_port: 31516 log_file: daos_server_1.log pinned_numa_node: 1 - fabric_iface: ib1 targets: 8 pool: diff --git a/src/tests/ftest/scrubber/aggregation.yaml b/src/tests/ftest/scrubber/aggregation.yaml index b4d061357a2..cfee3de68f8 100644 --- a/src/tests/ftest/scrubber/aggregation.yaml +++ b/src/tests/ftest/scrubber/aggregation.yaml @@ -1,9 +1,12 @@ hosts: test_servers: 3 test_clients: 1 + timeout: 660 + setup: start_servers_once: false + server_config: name: daos_server engines_per_host: 2 @@ -12,8 +15,6 @@ server_config: targets: 8 pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31416 log_file: daos_server0.log log_mask: DEBUG,MEM=ERR env_vars: @@ -23,22 +24,23 @@ server_config: targets: 8 pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31516 log_file: daos_server1.log log_mask: DEBUG,MEM=ERR env_vars: - DD_MASK=mgmt,md,rebuild storage: auto + pool: size: 80% svcn: 4 rebuild_timeout: 120 pool_query_timeout: 30 + container: type: POSIX control_method: daos properties: "cksum:sha512,rd_fac:1" + ior: &ior_base ior_timeout: 60 test_file: daos:testFile @@ -46,16 +48,19 @@ ior: &ior_base flags: "-v -W -w -r -R -k" api: DFS dfs_oclass: RP_2GX + ior_small_block_size: <<: *ior_base transfer_size: 5M block_size: 250M + ior_large_block_size: <<: *ior_base transfer_size: 1M block_size: 20G client_processes: np: 6 + faults: fault_list: - DAOS_DELAYED_CSUM_CORRUPT_DISK diff --git a/src/tests/ftest/scrubber/basic.yaml b/src/tests/ftest/scrubber/basic.yaml index 35ceff204b4..e8ad2cdc111 100644 --- a/src/tests/ftest/scrubber/basic.yaml +++ b/src/tests/ftest/scrubber/basic.yaml @@ -1,9 +1,12 @@ hosts: test_servers: 2 test_clients: 1 + timeout: 150 + setup: start_servers_once: false + server_config: name: daos_server engines_per_host: 2 @@ -11,8 +14,6 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31416 log_file: daos_server0.log log_mask: DEBUG,MEM=ERR env_vars: @@ -21,8 +22,6 @@ server_config: 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31516 log_file: daos_server1.log log_mask: DEBUG,MEM=ERR env_vars: @@ -30,12 +29,15 @@ server_config: storage: auto transport_config: allow_insecure: true + agent_config: transport_config: allow_insecure: true + dmg: transport_config: allow_insecure: true + pool: scm_size: 6000000000 nvme_size: 54000000000 @@ -43,10 +45,12 @@ pool: rebuild_timeout: 120 pool_query_timeout: 30 properties: "scrub:timed,scrub_freq:1" + container: type: POSIX control_method: daos oclass: RP_2G1 + ior: ior_timeout: 60 client_processes: diff --git a/src/tests/ftest/scrubber/check_csum_metrics_mdtest.yaml b/src/tests/ftest/scrubber/check_csum_metrics_mdtest.yaml index 25e87b233a5..962a5fb73e9 100644 --- a/src/tests/ftest/scrubber/check_csum_metrics_mdtest.yaml +++ b/src/tests/ftest/scrubber/check_csum_metrics_mdtest.yaml @@ -1,7 +1,9 @@ hosts: test_servers: 2 test_clients: 2 + timeout: 360 + server_config: name: daos_server engines_per_host: 2 @@ -10,8 +12,6 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log log_mask: DEBUG,MEM=ERR env_vars: @@ -20,23 +20,24 @@ server_config: 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log log_mask: DEBUG,MEM=ERR env_vars: - DD_MASK=csum,mgmt,md,rebuild storage: auto + pool: size: 50% svcn: 4 pool_query_timeout: 30 properties: "scrub:timed,scrub_freq:1" + container: type: POSIX control_method: daos oclass: RP_2G1 properties: cksum:crc16,cksum_size:16384,srv_cksum:on + mdtest: client_processes: ppn: 4 diff --git a/src/tests/ftest/scrubber/csum_fault.yaml b/src/tests/ftest/scrubber/csum_fault.yaml index 4f2c294fa95..2ba60342792 100644 --- a/src/tests/ftest/scrubber/csum_fault.yaml +++ b/src/tests/ftest/scrubber/csum_fault.yaml @@ -1,9 +1,12 @@ hosts: test_servers: 2 test_clients: 1 + timeout: 150 + setup: start_servers_once: false + server_config: name: daos_server engines_per_host: 2 @@ -12,8 +15,6 @@ server_config: targets: 8 pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31416 log_file: daos_server0.log log_mask: DEBUG,MEM=ERR env_vars: @@ -23,8 +24,6 @@ server_config: targets: 8 pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31516 log_file: daos_server1.log log_mask: DEBUG,MEM=ERR env_vars: @@ -32,12 +31,15 @@ server_config: storage: auto transport_config: allow_insecure: true + agent_config: transport_config: allow_insecure: true + dmg: transport_config: allow_insecure: true + pool: scm_size: 6G nvme_size: 54G @@ -45,11 +47,13 @@ pool: rebuild_timeout: 120 pool_query_timeout: 30 properties: "scrub:timed,scrub_freq:1" + container: type: POSIX control_method: daos oclass: RP_2GX properties: "cksum:crc16" + ior: ior_timeout: 60 client_processes: @@ -68,6 +72,7 @@ ior: - [1M, 2G] obj_class: - RP_2GX + faults: fault_list: - DAOS_CSUM_CORRUPT_DISK diff --git a/src/tests/ftest/scrubber/frequency.yaml b/src/tests/ftest/scrubber/frequency.yaml index 14cc19321f0..f5b92ba96c9 100644 --- a/src/tests/ftest/scrubber/frequency.yaml +++ b/src/tests/ftest/scrubber/frequency.yaml @@ -1,9 +1,12 @@ hosts: test_servers: 2 test_clients: 1 + timeout: 1800 + setup: start_servers_once: false + server_config: name: daos_server engines_per_host: 2 @@ -11,8 +14,6 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31416 log_file: daos_server0.log log_mask: DEBUG,MEM=ERR env_vars: @@ -21,13 +22,12 @@ server_config: 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31516 log_file: daos_server1.log log_mask: DEBUG,MEM=ERR env_vars: - DD_MASK=mgmt,md,rebuild storage: auto + pool: scm_size: 6000000000 nvme_size: 54000000000 @@ -35,10 +35,12 @@ pool: rebuild_timeout: 120 pool_query_timeout: 30 properties: "scrub:timed" + container: type: POSIX control_method: daos oclass: RP_2G1 + ior: ior_timeout: 60 client_processes: diff --git a/src/tests/ftest/scrubber/rebuild.yaml b/src/tests/ftest/scrubber/rebuild.yaml index ab6cff21835..6b28ad4521e 100644 --- a/src/tests/ftest/scrubber/rebuild.yaml +++ b/src/tests/ftest/scrubber/rebuild.yaml @@ -1,9 +1,12 @@ hosts: test_servers: 3 test_clients: 1 + timeout: 400 + setup: start_servers_once: false + server_config: name: daos_server engines_per_host: 2 @@ -12,8 +15,6 @@ server_config: targets: 8 pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31416 log_file: daos_server0.log log_mask: DEBUG,MEM=ERR env_vars: @@ -23,13 +24,12 @@ server_config: targets: 8 pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31516 log_file: daos_server1.log log_mask: DEBUG,MEM=ERR env_vars: - DD_MASK=mgmt,md,rebuild storage: auto + pool: scm_size: 6G nvme_size: 54G @@ -37,11 +37,13 @@ pool: rebuild_timeout: 120 pool_query_timeout: 30 properties: "scrub:timed,scrub_freq:1,scrub_thresh:2" + container: type: POSIX control_method: daos oclass: RP_2GX properties: "cksum:sha256,rd_fac:1" + ior: ior_timeout: 60 client_processes: @@ -55,6 +57,7 @@ ior: block_size: 2G dfs_oclass: RP_2GX dfs_dir_oclass: RP_2GX + faults: fault_list: - DAOS_CSUM_CORRUPT_DISK diff --git a/src/tests/ftest/scrubber/snapshot.yaml b/src/tests/ftest/scrubber/snapshot.yaml index fe332df4be9..6c8791bcbfa 100644 --- a/src/tests/ftest/scrubber/snapshot.yaml +++ b/src/tests/ftest/scrubber/snapshot.yaml @@ -1,9 +1,12 @@ hosts: test_servers: 3 test_clients: 1 + timeout: 400 + setup: start_servers_once: false + server_config: name: daos_server engines_per_host: 2 @@ -12,8 +15,6 @@ server_config: targets: 8 pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31416 log_file: daos_server0.log log_mask: DEBUG,MEM=ERR env_vars: @@ -23,13 +24,12 @@ server_config: targets: 8 pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31516 log_file: daos_server1.log log_mask: DEBUG,MEM=ERR env_vars: - DD_MASK=mgmt,md,rebuild storage: auto + pool: scm_size: 6G nvme_size: 54G @@ -37,11 +37,13 @@ pool: rebuild_timeout: 120 pool_query_timeout: 30 properties: "scrub:timed,scrub_freq:2" + container: type: POSIX control_method: daos oclass: RP_2GX properties: "cksum:crc64,rd_fac:1" + ior: ior_timeout: 60 client_processes: @@ -55,6 +57,7 @@ ior: block_size: 2G dfs_oclass: RP_2GX dfs_dir_oclass: RP_2GX + faults: fault_list: - DAOS_CSUM_CORRUPT_DISK diff --git a/src/tests/ftest/scrubber/target_auto_eviction.yaml b/src/tests/ftest/scrubber/target_auto_eviction.yaml index 2638f410d07..60f21945562 100644 --- a/src/tests/ftest/scrubber/target_auto_eviction.yaml +++ b/src/tests/ftest/scrubber/target_auto_eviction.yaml @@ -1,9 +1,12 @@ hosts: test_servers: 2 test_clients: 1 + timeout: 400 + setup: start_servers_once: false + server_config: name: daos_server engines_per_host: 2 @@ -12,8 +15,6 @@ server_config: targets: 8 pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31416 log_file: daos_server0.log log_mask: DEBUG,MEM=ERR env_vars: @@ -23,8 +24,6 @@ server_config: targets: 8 pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31516 log_file: daos_server1.log log_mask: DEBUG,MEM=ERR env_vars: @@ -32,12 +31,15 @@ server_config: storage: auto transport_config: allow_insecure: true + agent_config: transport_config: allow_insecure: true + dmg: transport_config: allow_insecure: true + pool: scm_size: 6G nvme_size: 54G @@ -45,11 +47,13 @@ pool: rebuild_timeout: 120 pool_query_timeout: 30 properties: "scrub:timed,scrub_freq:1,scrub_thresh:2" + container: type: POSIX control_method: daos oclass: RP_2GX properties: "cksum:crc16" + ior: ior_timeout: 60 client_processes: @@ -63,6 +67,7 @@ ior: block_size: 2G dfs_oclass: RP_2GX dfs_dir_oclass: RP_2GX + faults: fault_list: - DAOS_CSUM_CORRUPT_DISK diff --git a/src/tests/ftest/server/cpu_usage.yaml b/src/tests/ftest/server/cpu_usage.yaml index 4bf4a7713ec..ec11f001e14 100644 --- a/src/tests/ftest/server/cpu_usage.yaml +++ b/src/tests/ftest/server/cpu_usage.yaml @@ -1,25 +1,30 @@ hosts: test_servers: 1 test_clients: 1 + timeout: 130 + server_config: engines_per_host: 1 engines: 0: targets: 8 nr_xs_helpers: 8 - fabric_iface: ib0 storage: auto + ior: client_processes: np: 1 flags: "-v -D 60 -w -r" transfer_size: 1M block_size: 1G + pool: scm_size: 10G nvme_size: 100G + container: type: POSIX control_method: daos + usage_limit: 200 diff --git a/src/tests/ftest/server/daos_server_restart.yaml b/src/tests/ftest/server/daos_server_restart.yaml index 3fa1cd0a742..99263715abf 100644 --- a/src/tests/ftest/server/daos_server_restart.yaml +++ b/src/tests/ftest/server/daos_server_restart.yaml @@ -15,15 +15,11 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log storage: auto diff --git a/src/tests/ftest/server/metadata.yaml b/src/tests/ftest/server/metadata.yaml index 48a6c84ef61..93ca27ab35b 100644 --- a/src/tests/ftest/server/metadata.yaml +++ b/src/tests/ftest/server/metadata.yaml @@ -17,8 +17,6 @@ server_config: nr_xs_helpers: 4 first_core: 0 pinned_numa_node: 0 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log log_mask: DEBUG,MEM=ERR env_vars: @@ -34,8 +32,6 @@ server_config: nr_xs_helpers: 4 first_core: 0 pinned_numa_node: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log log_mask: DEBUG,MEM=ERR env_vars: diff --git a/src/tests/ftest/server/storage_tiers.yaml b/src/tests/ftest/server/storage_tiers.yaml index 6edced47786..6529446c22d 100644 --- a/src/tests/ftest/server/storage_tiers.yaml +++ b/src/tests/ftest/server/storage_tiers.yaml @@ -14,15 +14,11 @@ timeout: 30 engine_0: &engine_0_base pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log engine_1: &engine_1_base pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log storage_0: &storage_dcpm diff --git a/src/tests/ftest/soak/faults.yaml b/src/tests/ftest/soak/faults.yaml index 165cb573031..4ce3c4880d6 100644 --- a/src/tests/ftest/soak/faults.yaml +++ b/src/tests/ftest/soak/faults.yaml @@ -22,7 +22,6 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 2 - fabric_iface_port: 31317 log_file: daos_server0.log log_mask: ERR env_vars: @@ -31,7 +30,6 @@ server_config: 1: pinned_numa_node: 1 nr_xs_helpers: 2 - fabric_iface_port: 31417 log_file: daos_server1.log log_mask: ERR env_vars: diff --git a/src/tests/ftest/soak/harassers.yaml b/src/tests/ftest/soak/harassers.yaml index af7065fb3d8..a2032e85d61 100644 --- a/src/tests/ftest/soak/harassers.yaml +++ b/src/tests/ftest/soak/harassers.yaml @@ -22,7 +22,6 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 2 - fabric_iface_port: 31317 log_file: daos_server0.log log_mask: INFO env_vars: @@ -32,7 +31,6 @@ server_config: 1: pinned_numa_node: 1 nr_xs_helpers: 2 - fabric_iface_port: 31417 log_file: daos_server1.log log_mask: INFO env_vars: diff --git a/src/tests/ftest/soak/smoke.yaml b/src/tests/ftest/soak/smoke.yaml index ca1d4fb7a4c..948fe1f8802 100644 --- a/src/tests/ftest/soak/smoke.yaml +++ b/src/tests/ftest/soak/smoke.yaml @@ -24,7 +24,6 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 2 - fabric_iface_port: 31317 log_file: daos_server0.log log_mask: ERR env_vars: @@ -33,7 +32,6 @@ server_config: 1: pinned_numa_node: 1 nr_xs_helpers: 2 - fabric_iface_port: 31417 log_file: daos_server1.log log_mask: ERR env_vars: diff --git a/src/tests/ftest/soak/stress.yaml b/src/tests/ftest/soak/stress.yaml index 15a6a3033a3..b3e9b87dd91 100644 --- a/src/tests/ftest/soak/stress.yaml +++ b/src/tests/ftest/soak/stress.yaml @@ -27,7 +27,6 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 2 - fabric_iface_port: 31317 log_file: daos_server0.log log_mask: ERR env_vars: @@ -37,7 +36,6 @@ server_config: 1: pinned_numa_node: 1 nr_xs_helpers: 2 - fabric_iface_port: 31417 log_file: daos_server1.log log_mask: ERR env_vars: diff --git a/src/tests/ftest/telemetry/engine_events.yaml b/src/tests/ftest/telemetry/engine_events.yaml index 713eac94e9f..4fa3be16d3d 100644 --- a/src/tests/ftest/telemetry/engine_events.yaml +++ b/src/tests/ftest/telemetry/engine_events.yaml @@ -9,14 +9,10 @@ server_config: 0: targets: 4 nr_xs_helpers: 0 - fabric_iface: ib0 - fabric_iface_port: 31416 log_file: daos_server_0.log storage: auto 1: targets: 4 nr_xs_helpers: 0 - fabric_iface: ib1 - fabric_iface_port: 31516 log_file: daos_server_1.log storage: auto diff --git a/src/tests/ftest/telemetry/pool_space_metrics.yaml b/src/tests/ftest/telemetry/pool_space_metrics.yaml index 9a2e261f6b2..a7c2632cb3b 100644 --- a/src/tests/ftest/telemetry/pool_space_metrics.yaml +++ b/src/tests/ftest/telemetry/pool_space_metrics.yaml @@ -1,7 +1,9 @@ hosts: test_servers: 2 test_clients: 1 + timeout: 180 + server_config: name: daos_server engines_per_host: 2 @@ -10,28 +12,28 @@ server_config: targets: 4 pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_engine_0.log storage: auto 1: targets: 4 pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 32317 log_file: daos_engine_1.log storage: auto + pool_scm: scm_size: 1G nvme_size: 0 + pool_scm_nvme: size: 80% + container: type: POSIX control_method: daos properties: rd_fac:0 oclass: SX + ior: api: DFS transfer_size: 1048576 # 1MiB @@ -43,6 +45,7 @@ ior: dfs_destroy: false env_vars: - D_LOG_MASK=INFO + mpirun: args: "--bind-to socket" diff --git a/src/tests/ftest/util/environment_utils.py b/src/tests/ftest/util/environment_utils.py index bed802347f3..8835fd90eb6 100644 --- a/src/tests/ftest/util/environment_utils.py +++ b/src/tests/ftest/util/environment_utils.py @@ -10,9 +10,8 @@ from ClusterShell.NodeSet import NodeSet # pylint: disable=import-error,no-name-in-module -from util.host_utils import get_local_host from util.network_utils import (PROVIDER_ALIAS, SUPPORTED_PROVIDERS, NetworkException, - get_common_provider, get_fastest_interface) + get_common_provider, get_fastest_interfaces) from util.run_utils import run_remote @@ -172,7 +171,7 @@ def set_defaults(self, logger, servers=None, clients=None, provider=None, insecu if self.user_dir is None: self.user_dir = os.path.join(self.log_dir, "user") if self.interface is None: - self.interface = self._default_interface(logger, all_hosts) + self.interface = self._default_interface(logger, servers) if self.provider is None: self.provider = self._default_provider(logger, servers) if self.insecure_mode is None: @@ -327,15 +326,18 @@ def _default_interface(self, logger, hosts): Returns: str: the default interface; can be None """ - interface = os.environ.get("D_INTERFACE") - if interface is None and hosts: - # Find all the /sys/class/net interfaces on the launch node (excluding lo) - logger.debug("Detecting network devices - D_INTERFACE not set") - try: - interface = get_fastest_interface(logger, hosts | get_local_host()) - except NetworkException as error: - raise TestEnvironmentException("Error obtaining a default interface!") from error - return interface + if not hosts: + return None + + logger.debug( + "Detecting network devices on %s - %s not set", hosts, self.__ENV_VAR_MAP['interface']) + try: + interfaces = get_fastest_interfaces(logger, hosts) + except NetworkException as error: + raise TestEnvironmentException("Error obtaining a default interface!") from error + + logger.debug(" Found interface(s): %s", ",".join(interfaces)) + return ",".join(interfaces) @property def provider(self): @@ -373,12 +375,13 @@ def _default_provider(self, logger, hosts): Returns: str: the default provider; can be None """ - if not hosts: + if not hosts or self.interface is None: return None + first_interface = self.interface.split(",", maxsplit=1)[0] logger.debug( - "Detecting provider for %s - %s not set", - self.interface, self.__ENV_VAR_MAP['provider']) + "Detecting provider for %s on %s - %s not set", + first_interface, hosts, self.__ENV_VAR_MAP['provider']) provider = None supported = list(SUPPORTED_PROVIDERS) @@ -392,7 +395,7 @@ def _default_provider(self, logger, hosts): supported = list(filter(lambda x: 'verbs' not in x, supported)) # Detect all supported providers for this interface that are common to all of the hosts - common_providers = get_common_provider(logger, hosts, self.interface, supported) + common_providers = get_common_provider(logger, hosts, first_interface, supported) if common_providers: # Select the preferred found provider based upon SUPPORTED_PROVIDERS order logger.debug("Supported providers detected: %s", common_providers) @@ -404,9 +407,9 @@ def _default_provider(self, logger, hosts): # Report an error if a provider cannot be found if not provider: raise TestEnvironmentException( - f"Error obtaining a supported provider for {self.interface} from: {supported}") + f"Error obtaining a supported provider for {first_interface} from: {supported}") - logger.debug(" Found %s provider for %s", provider, self.interface) + logger.debug(" Found %s provider for %s", provider, first_interface) return provider @property diff --git a/src/tests/ftest/util/network_utils.py b/src/tests/ftest/util/network_utils.py index e3802364d8f..3f8757bdd3d 100644 --- a/src/tests/ftest/util/network_utils.py +++ b/src/tests/ftest/util/network_utils.py @@ -1,5 +1,6 @@ """ (C) Copyright 2022-2024 Intel Corporation. + (C) Copyright 2025 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -384,8 +385,8 @@ def get_interface_providers(interface, provider_data): return providers -def get_fastest_interface(logger, hosts, verbose=True): - """Get the fastest active interface common to all hosts. +def get_fastest_interfaces(logger, hosts, verbose=True): + """Get the fastest active interfaces common to all hosts. Args: logger (Logger): logger for the messages produced by this method @@ -396,12 +397,12 @@ def get_fastest_interface(logger, hosts, verbose=True): NetworkException: if there is an error detecting the fastest active interface Returns: - str: the fastest active interface common to all hosts specified + list: the fastest active interfaces common to all hosts specified """ common_interfaces = get_common_interfaces(logger, hosts, verbose) # Find the speed of each common active interface in order to be able to choose the fastest - interface_speeds = {} + interfaces_at_speed = {} for interface in common_interfaces: detected_speeds = get_interface_speeds(logger, hosts, interface, verbose) speed_list = [] @@ -411,26 +412,22 @@ def get_fastest_interface(logger, hosts, verbose=True): speed_hosts.add(node_set) if speed_list and speed_hosts == hosts: # Only include interface speeds if a speed is detected on all the hosts - interface_speeds[interface] = min(speed_list) + min_speed = min(speed_list) + if min_speed not in interfaces_at_speed: + interfaces_at_speed[min_speed] = [] + interfaces_at_speed[min_speed].append(interface) + fastest_interfaces = None logger.info("Active network interface speeds on %s:", hosts) - available_interfaces = {} - for interface in sorted(interface_speeds): - logger.info(" - %-8s (speed: %6s)", interface, interface_speeds[interface]) - - # Only include the first active interface (as determined by alphabetic sort) for each speed - if interface_speeds[interface] not in available_interfaces: - available_interfaces[interface_speeds[interface]] = interface - - logger.info("Available interfaces on %s: %s", hosts, available_interfaces) - try: - # Select the fastest active interface available by sorting the speed - interface = available_interfaces[sorted(available_interfaces)[-1]] - except IndexError as error: - raise NetworkException("Error obtaining a default interface!") from error - - logger.info("Fastest interface detected on %s: %s", hosts, interface) - return interface + for speed in sorted(interfaces_at_speed): + fastest_interfaces = sorted(interfaces_at_speed[speed]) + logger.info(" - speed: %7s => %s", speed, fastest_interfaces) + + if fastest_interfaces is None: + raise NetworkException(f"Error obtaining default interfaces w/ equal speed on {hosts}!") + + logger.info("Fastest interfaces detected on %s: %s", hosts, fastest_interfaces) + return fastest_interfaces def get_common_provider(logger, hosts, interface, supported=None, verbose=True): diff --git a/src/tests/ftest/util/server_utils_params.py b/src/tests/ftest/util/server_utils_params.py index c6fba0bbf5d..4ce4a39bc71 100644 --- a/src/tests/ftest/util/server_utils_params.py +++ b/src/tests/ftest/util/server_utils_params.py @@ -486,9 +486,19 @@ def __init__(self, base_namespace, index, provider=None, max_storage_tiers=MAX_S self._max_storage_tiers = max_storage_tiers super().__init__(os.path.join(*namespace)) - # Use environment variables to get default parameters - default_interface = os.environ.get("DAOS_TEST_FABRIC_IFACE", "eth0") - default_port = int(os.environ.get("D_PORT", 31416)) + # Use environment variables to get default parameters. Supports lists to define values for + # multiple engines through comma-separated strings. If the index exceeds the list length + # then values are reused round-robin style. + try: + _defaults = os.environ.get("DAOS_TEST_FABRIC_IFACE").split(",") + default_interface = list(filter(None, _defaults))[index % len(_defaults)] + except (AttributeError, IndexError): + default_interface = f"eth{index}" + try: + _defaults = [int(port) for port in os.environ.get("D_PORT").split(",")] + default_port = list(filter(None, _defaults))[index % len(_defaults)] + except (AttributeError, ValueError, IndexError): + default_port = 31317 + (100 * index) # All log files should be placed in the same directory on each host # to enable easy log file archiving by launch.py diff --git a/src/tests/ftest/vmd/fault_reintegration.yaml b/src/tests/ftest/vmd/fault_reintegration.yaml index 735e059937b..ed5bbd1a0c3 100644 --- a/src/tests/ftest/vmd/fault_reintegration.yaml +++ b/src/tests/ftest/vmd/fault_reintegration.yaml @@ -1,9 +1,12 @@ hosts: test_servers: 3 test_clients: 1 + timeout: 360 + setup: start_servers_once: false + server_config: name: daos_server engines_per_host: 2 @@ -11,28 +14,27 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log storage: auto 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log storage: auto + pool: size: 90% svcn: 3 rebuild_timeout: 120 pool_query_timeout: 30 + container: type: POSIX control_method: daos properties: cksum:crc16,cksum_size:16384,srv_cksum:on,rd_fac:2 file_oclass: RP_3G1 dir_oclass: RP_3G1 + ior: client_processes: np: 48 @@ -49,7 +51,9 @@ ior: # - [transfersize, blocksize] # The values are set to be in the multiples of 10. - [4000, 5000000] # [4K, 5M] + aggregation: test_with_aggregation: true + rebuild: test_with_rebuild: true diff --git a/src/tests/ftest/vmd/led.yaml b/src/tests/ftest/vmd/led.yaml index 1dec111e091..e77437e6a35 100644 --- a/src/tests/ftest/vmd/led.yaml +++ b/src/tests/ftest/vmd/led.yaml @@ -1,7 +1,9 @@ hosts: test_servers: 2 test_clients: 1 + timeout: 300 + server_config: name: daos_server engines_per_host: 2 @@ -9,8 +11,6 @@ server_config: 0: pinned_numa_node: 0 nr_xs_helpers: 1 - fabric_iface: ib0 - fabric_iface_port: 31317 log_file: daos_server0.log storage: 0: @@ -24,8 +24,6 @@ server_config: 1: pinned_numa_node: 1 nr_xs_helpers: 1 - fabric_iface: ib1 - fabric_iface_port: 31417 log_file: daos_server1.log storage: 0: @@ -36,14 +34,17 @@ server_config: class: nvme bdev_list: ["bbbb:bb:bb.b"] bdev_class: nvme + pool: mode: 146 name: daos_server size: 50% properties: ec_cell_sz:128KiB + container: type: POSIX properties: cksum:crc16,cksum_size:16384,srv_cksum:on control_method: daos + dfuse: disable_caching: True From f4eb910353391bfde5986acc7a48799327d7be0e Mon Sep 17 00:00:00 2001 From: Liang Zhen Date: Tue, 23 Dec 2025 12:38:44 +0800 Subject: [PATCH 098/253] DAOS-18192 rebuild: global resource control for rebuild (#17267) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * DAOS-18192 rebuild: global resource control for rebuild Previously, the resource controls within the rebuild system—such as the limits on ULT (user-level thread) counts and DMA buffer usage—were scoped per pool. This meant that when rebuild or migration operations occurred across multiple pools, each pool operated within its own local resource boundaries. As a result, simultaneous rebuilds in several pools could lead to excessive system-wide consumption of threads and memory, potentially impacting performance and system stability. This patch reworks the rebuild migration resource management to introduce global limits on the number of ULTs and DMA buffers the rebuild system can use across all pools. A centralized migration resource manager is established per target to coordinate these resources across all active pools, preventing overallocation and minimizing resource contention. This patch also bumps the inflight size limit to 50% of DMA buffer Signed-off-by: Liang Zhen --- src/object/srv_internal.h | 21 +- src/object/srv_mod.c | 8 + src/object/srv_obj_migrate.c | 471 ++++++++++++++++++++++++----------- 3 files changed, 342 insertions(+), 158 deletions(-) diff --git a/src/object/srv_internal.h b/src/object/srv_internal.h index 577f55ccf45..3d3fbc15447 100644 --- a/src/object/srv_internal.h +++ b/src/object/srv_internal.h @@ -26,6 +26,8 @@ extern struct dss_module_key obj_module_key; +struct migr_res_manager; + /* Per pool attached to the migrate tls(per xstream) */ struct migrate_pool_tls { /* POOL UUID and pool to be migrated */ @@ -75,18 +77,13 @@ struct migrate_pool_tls { */ uint32_t mpt_tgt_obj_ult_cnt; uint32_t mpt_tgt_dkey_ult_cnt; + /* The current in-flight data size */ + uint64_t mpt_inflight_size; - /* reference count for the structure */ - uint64_t mpt_refcount; + struct migr_res_manager *mpt_rmg; - /* The current in-flight iod, mainly used for controlling - * rebuild in-flight rate to avoid the DMA buffer overflow. - */ - uint64_t mpt_inflight_size; - uint64_t mpt_inflight_max_size; - ABT_cond mpt_inflight_cond; - ABT_mutex mpt_inflight_mutex; - uint32_t mpt_inflight_max_ult; + /* reference count for the structure */ + uint64_t mpt_refcount; uint32_t mpt_opc; /* The new layout version for upgrade job */ @@ -147,6 +144,10 @@ struct obj_tgt_punch_args { void migrate_pool_tls_destroy(struct migrate_pool_tls *tls); +int +obj_migrate_init(void); +void +obj_migrate_fini(void); struct obj_tls { d_sg_list_t ot_echo_sgl; diff --git a/src/object/srv_mod.c b/src/object/srv_mod.c index 822658e7c69..a668089a71d 100644 --- a/src/object/srv_mod.c +++ b/src/object/srv_mod.c @@ -40,9 +40,16 @@ obj_mod_init(void) D_ERROR("failed to obj_ec_codec_init\n"); goto out_class; } + rc = obj_migrate_init(); + if (rc) { + D_ERROR("failed to init migration resource managers\n"); + goto out_ec; + } return 0; +out_ec: + obj_ec_codec_fini(); out_class: obj_class_fini(); out_utils: @@ -55,6 +62,7 @@ obj_mod_init(void) static int obj_mod_fini(void) { + obj_migrate_fini(); obj_ec_codec_fini(); obj_class_fini(); obj_utils_fini(); diff --git a/src/object/srv_obj_migrate.c b/src/object/srv_obj_migrate.c index 4c6a801962b..73236fe6c43 100644 --- a/src/object/srv_obj_migrate.c +++ b/src/object/srv_obj_migrate.c @@ -31,15 +31,76 @@ #pragma GCC diagnostic ignored "-Wframe-larger-than=" #endif -/* Max in-flight data size per xstream */ -/* Set the total in-flight size to be 25% of MAX DMA size for +/* Max in-flight transfer size per xstream */ +/* Set the total in-flight size to be 50% of MAX DMA size for * the moment, will adjust it later if needed. */ -#define MIGRATE_MAX_SIZE (1 << 28) -/* Max migrate ULT number on the server */ -#define MIGRATE_DEFAULT_MAX_ULT 4096 +#define MIGR_TGT_INF_DATA (1 << 29) + +/* Threshold for very large transfers. + * This may exceed the MIGR_TGT_INF_DATA limit to prevent starvation. + * Only one such transfer is allowed at a time. + */ +#define MIGR_INF_DATA_HULK (1 << 28) + +/* Low water mark for DMA buffer usage, hulk transfer is allowed in this case. + */ +#define MIGR_INF_DATA_LWM (1 << 28) + #define ENV_MIGRATE_ULT_CNT "D_MIGRATE_ULT_CNT" + +/* Number of migration ULTs per target */ +#define MIGR_TGT_ULTS_MIN 100 +#define MIGR_TGT_ULTS_DEF 500 +#define MIGR_TGT_ULTS_MAX 2000 + +/* 1/3 object ults, 2/3 key ULTs */ +#define MIGR_OBJ_ULT_PERCENT 33 + +#define MIGR_TGT_OBJ_ULTS(ults) ((ults * MIGR_OBJ_ULT_PERCENT) / 100) +#define MIGR_TGT_KEY_ULTS(ults) (ults - MIGR_TGT_OBJ_ULTS(ults)) + +enum { + MIGR_OBJ = 0, + MIGR_KEY, + MIGR_DATA, + MIGR_MAX, +}; + +/* resource consumed by migration */ +struct migr_resource { + const char *res_name; + /* upper limit of the resource */ + long res_limit; + /* resource amount in "unit" */ + long res_units; + /* number of waiters on this resource */ + int res_waiters; + /* Only used by MIGR_DATA, it always allows exactly one ULT to use unbounded + * buffer for super large value (rare). + */ + int res_hulk; + /* ABT_cond for waiters */ + ABT_cond res_cond; +}; + +/* migration resources manager */ +struct migr_res_manager { + ABT_mutex rmg_mutex; + struct migr_resource rmg_resources[MIGR_MAX]; +}; + +struct migr_engine_res { + /* total ULTs per target, it a tunable which can be set by admin */ + unsigned int er_max_ults; + /* dss_tgt_nr resource managers */ + struct migr_res_manager *er_rmgs; +}; + +static struct migr_engine_res migr_eng_res; + struct migrate_one { + struct migrate_pool_tls *mo_tls; daos_key_t mo_dkey; uint64_t mo_dkey_hash; uuid_t mo_pool_uuid; @@ -114,6 +175,7 @@ struct iter_cont_arg { /* Argument for object iteration and migrate */ struct iter_obj_arg { + struct migrate_pool_tls *pool_tls; uuid_t pool_uuid; uuid_t cont_uuid; daos_unit_oid_t oid; @@ -391,10 +453,6 @@ migrate_pool_tls_destroy(struct migrate_pool_tls *tls) D_FREE(tls->mpt_svc_list.rl_ranks); if (tls->mpt_done_eventual) ABT_eventual_free(&tls->mpt_done_eventual); - if (tls->mpt_inflight_cond) - ABT_cond_free(&tls->mpt_inflight_cond); - if (tls->mpt_inflight_mutex) - ABT_mutex_free(&tls->mpt_inflight_mutex); if (daos_handle_is_valid(tls->mpt_root_hdl)) obj_tree_destroy(tls->mpt_root_hdl); if (daos_handle_is_valid(tls->mpt_migrated_root_hdl)) @@ -450,14 +508,12 @@ migrate_pool_tls_create(uuid_t pool_uuid, unsigned int version, unsigned int gen uint32_t new_layout_ver, uint32_t opc, struct migrate_pool_tls **p_tls, d_rank_list_t *svc_list, uint8_t tgt_status, uint32_t tgt_in_ver) { - uint32_t max_migrate_ult = MIGRATE_DEFAULT_MAX_ULT; struct obj_tls *obj_tls = obj_tls_get(); uint32_t tgt_id = dss_get_module_info()->dmi_tgt_id; struct migrate_pool_tls *pool_tls = NULL; struct ds_pool_child *pool_child = NULL; int rc = 0; - d_getenv_uint(ENV_MIGRATE_ULT_CNT, &max_migrate_ult); D_ASSERT(generation != (unsigned int)(-1)); pool_child = ds_pool_child_lookup(pool_uuid); @@ -485,14 +541,6 @@ migrate_pool_tls_create(uuid_t pool_uuid, unsigned int version, unsigned int gen if (rc != ABT_SUCCESS) D_GOTO(out, rc = dss_abterr2der(rc)); - rc = ABT_cond_create(&pool_tls->mpt_inflight_cond); - if (rc != ABT_SUCCESS) - D_GOTO(out, rc = dss_abterr2der(rc)); - - rc = ABT_mutex_create(&pool_tls->mpt_inflight_mutex); - if (rc != ABT_SUCCESS) - D_GOTO(out, rc = dss_abterr2der(rc)); - uuid_copy(pool_tls->mpt_pool_uuid, pool_uuid); uuid_copy(pool_tls->mpt_poh_uuid, pool_hdl_uuid); uuid_copy(pool_tls->mpt_coh_uuid, co_hdl_uuid); @@ -508,8 +556,6 @@ migrate_pool_tls_create(uuid_t pool_uuid, unsigned int version, unsigned int gen pool_tls->mpt_pool = ds_pool_child_lookup(pool_uuid); if (pool_tls->mpt_pool == NULL) D_GOTO(out, rc = -DER_NO_HDL); - pool_tls->mpt_inflight_max_size = MIGRATE_MAX_SIZE / dss_tgt_nr; - pool_tls->mpt_inflight_max_ult = max_migrate_ult / dss_tgt_nr; pool_tls->mpt_tgt_obj_ult_cnt = 0; pool_tls->mpt_tgt_dkey_ult_cnt = 0; @@ -656,12 +702,10 @@ mrone_obj_fetch(struct migrate_one *mrone, daos_handle_t oh, d_sg_list_t *sgls, daos_iod_t *iods, int iod_num, daos_epoch_t eph, uint32_t flags, d_iov_t *csum_iov_fetch) { - struct migrate_pool_tls *tls; + struct migrate_pool_tls *tls = mrone->mo_tls; int rc = 0; - tls = migrate_pool_tls_lookup(mrone->mo_pool_uuid, - mrone->mo_pool_tls_version, mrone->mo_generation); - if (tls == NULL || tls->mpt_fini) { + if (tls->mpt_fini) { D_WARN("someone aborted the rebuild " DF_UUID "\n", DP_UUID(mrone->mo_pool_uuid)); D_GOTO(out, rc = migrate_pool_tls_get_status(tls)); } @@ -694,7 +738,6 @@ mrone_obj_fetch(struct migrate_one *mrone, daos_handle_t oh, d_sg_list_t *sgls, } out: - migrate_pool_tls_put(tls); return rc; } @@ -1771,81 +1814,131 @@ migrate_one_destroy(struct migrate_one *mrone) if (mrone->mo_iods_csums) D_FREE(mrone->mo_iods_csums); + if (mrone->mo_tls) + migrate_pool_tls_put(mrone->mo_tls); D_FREE(mrone); } -enum { - OBJ_ULT = 1, - DKEY_ULT = 2, -}; - -static inline uint32_t -migrate_tgt_ult_cnt(struct migrate_pool_tls *tls, int ult_type) +static bool +migr_res_is_hulk(int res_type, long units) { - if (ult_type == OBJ_ULT) - return tls->mpt_tgt_obj_ult_cnt; - else - return tls->mpt_tgt_dkey_ult_cnt; + return res_type == MIGR_DATA && units >= MIGR_INF_DATA_HULK; } static int -migrate_tgt_enter(struct migrate_pool_tls *tls, int ult_type, bool *yielded) +migrate_res_hold(struct migrate_pool_tls *tls, int res_type, long units, bool *yielded) { - uint32_t ult_cnt = 0; - int rc = 0; + struct dss_module_info *dmi = dss_get_module_info(); + struct migr_res_manager *rmg; + struct migr_resource *res; + bool is_hulk; + bool waited = false; + int rc = 0; + + D_ASSERT(dmi->dmi_xs_id != 0); + + rmg = &migr_eng_res.er_rmgs[dmi->dmi_tgt_id]; + if (tls->mpt_rmg == NULL) { + tls->mpt_rmg = rmg; + } else { + D_ASSERTF(tls->mpt_rmg == rmg, "target=%d, rmg_off=%d\n", dmi->dmi_tgt_id, + (int)(tls->mpt_rmg - &migr_eng_res.er_rmgs[0])); + } - D_ASSERT(dss_get_module_info()->dmi_xs_id != 0); + res = &rmg->rmg_resources[res_type]; + is_hulk = migr_res_is_hulk(res_type, units); + while (1) { + if (tls->mpt_fini) { + rc = migrate_pool_tls_get_status(tls); + D_GOTO(out, rc); + } - ult_cnt = migrate_tgt_ult_cnt(tls, ult_type); - while (tls->mpt_inflight_max_ult / 2 <= ult_cnt) { - D_DEBUG(DB_REBUILD, "tgt %u max %u\n", ult_cnt, tls->mpt_inflight_max_ult); + if (is_hulk && res->res_hulk == 0 && res->res_units < MIGR_INF_DATA_LWM) { + /* skip the limit check and allow (only) one hulk transfer at a time */ + res->res_units += units; + res->res_hulk = 1; + break; - if (yielded) - *yielded = true; - ABT_mutex_lock(tls->mpt_inflight_mutex); - ABT_cond_wait(tls->mpt_inflight_cond, tls->mpt_inflight_mutex); - ABT_mutex_unlock(tls->mpt_inflight_mutex); - if (tls->mpt_fini) - D_GOTO(out, rc = migrate_pool_tls_get_status(tls)); + } else if (!is_hulk && res->res_units + units <= res->res_limit) { + res->res_units += units; + break; + } + + ABT_mutex_lock(rmg->rmg_mutex); + res->res_waiters++; + if (res->res_waiters >= 100 && res->res_waiters % 100 == 0) { + D_DEBUG(DB_REBUILD, + "%d waiters are waiting on res=%s (target=%d, unit=%lu)\n", + res->res_waiters, res->res_name, dmi->dmi_tgt_id, units); + } - ult_cnt = migrate_tgt_ult_cnt(tls, ult_type); + ABT_cond_wait(res->res_cond, rmg->rmg_mutex); + res->res_waiters--; + ABT_mutex_unlock(rmg->rmg_mutex); + waited = true; } + if (yielded) + *yielded = waited; - if (ult_type == OBJ_ULT) + /* per-pool counters for rebuild status tracking */ + if (res_type == MIGR_OBJ) tls->mpt_tgt_obj_ult_cnt++; - else + else if (res_type == MIGR_KEY) tls->mpt_tgt_dkey_ult_cnt++; + else + tls->mpt_inflight_size += units; + + D_DEBUG(DB_REBUILD, + "res=%s, hold=%lu, used=%lu, limit=%lu, waited=%d)\n" DF_RB + " obj_ults=%u, key_ults=%u, inf_data=" DF_U64 ")\n", + res->res_name, units, res->res_units, res->res_limit, waited, DP_RB_MPT(tls), + tls->mpt_tgt_obj_ult_cnt, tls->mpt_tgt_dkey_ult_cnt, tls->mpt_inflight_size); out: return rc; } static void -migrate_tgt_try_wakeup(struct migrate_pool_tls *tls, int ult_type) +migrate_res_release(struct migrate_pool_tls *tls, int res_type, long units) { - uint32_t ult_cnt = 0; + struct migr_res_manager *rmg; + struct migr_resource *res; - ult_cnt = migrate_tgt_ult_cnt(tls, ult_type); - D_ASSERT(dss_get_module_info()->dmi_xs_id != 0); - if (tls->mpt_inflight_max_ult / 2 > ult_cnt) { - ABT_mutex_lock(tls->mpt_inflight_mutex); - ABT_cond_signal(tls->mpt_inflight_cond); - ABT_mutex_unlock(tls->mpt_inflight_mutex); - } -} + rmg = tls->mpt_rmg; + D_ASSERT(rmg != NULL); -static void -migrate_tgt_exit(struct migrate_pool_tls *tls, int ult_type) -{ - D_ASSERT(dss_get_module_info()->dmi_xs_id != 0); - if (ult_type == OBJ_ULT) { + res = &rmg->rmg_resources[res_type]; + + D_DEBUG(DB_REBUILD, + "%s: release=%lu, used=%lu, limit=%lu\n" DF_RB + " obj_ults=%u, key_ults=%u, inf_data=" DF_U64 ")\n", + res->res_name, units, res->res_units, res->res_limit, DP_RB_MPT(tls), + tls->mpt_tgt_obj_ult_cnt, tls->mpt_tgt_dkey_ult_cnt, tls->mpt_inflight_size); + + if (res_type == MIGR_OBJ) { D_ASSERT(tls->mpt_tgt_obj_ult_cnt > 0); tls->mpt_tgt_obj_ult_cnt--; - } else { + } else if (res_type == MIGR_KEY) { D_ASSERT(tls->mpt_tgt_dkey_ult_cnt > 0); tls->mpt_tgt_dkey_ult_cnt--; + } else { + D_ASSERT(tls->mpt_inflight_size >= units); + tls->mpt_inflight_size -= units; + } + + D_ASSERT(res->res_units >= units); + res->res_units -= units; + + if (migr_res_is_hulk(res_type, units)) { + D_ASSERT(res->res_hulk == 1); + res->res_hulk = 0; + } + + if (res->res_waiters > 0) { + ABT_mutex_lock(rmg->rmg_mutex); + ABT_cond_signal(res->res_cond); + ABT_mutex_unlock(rmg->rmg_mutex); } - migrate_tgt_try_wakeup(tls, ult_type); } static void @@ -1859,9 +1952,8 @@ migrate_one_ult(void *arg) while (daos_fail_check(DAOS_REBUILD_TGT_REBUILD_HANG)) dss_sleep(0); - tls = migrate_pool_tls_lookup(mrone->mo_pool_uuid, - mrone->mo_pool_tls_version, mrone->mo_generation); - if (tls == NULL || tls->mpt_fini) { + tls = mrone->mo_tls; + if (tls->mpt_fini) { D_WARN("someone aborted the rebuild " DF_UUID "\n", DP_UUID(mrone->mo_pool_uuid)); goto out; } @@ -1874,26 +1966,14 @@ migrate_one_ult(void *arg) data_size, mrone->mo_iod_num, mrone->mo_iods_num_from_parity); D_ASSERT(data_size != (daos_size_t)-1); - D_DEBUG(DB_REBUILD, DF_RB ": mrone %p inflight_size " DF_U64 " max " DF_U64 "\n", - DP_RB_MPT(tls), mrone, tls->mpt_inflight_size, tls->mpt_inflight_max_size); - - while (tls->mpt_inflight_size + data_size >= tls->mpt_inflight_max_size && - tls->mpt_inflight_max_size != 0 && tls->mpt_inflight_size != 0 && - !tls->mpt_fini) { - D_DEBUG(DB_REBUILD, DF_RB ": mrone %p wait " DF_U64 "/" DF_U64 "/" DF_U64 "\n", - DP_RB_MPT(tls), mrone, tls->mpt_inflight_size, tls->mpt_inflight_max_size, - data_size); - ABT_mutex_lock(tls->mpt_inflight_mutex); - ABT_cond_wait(tls->mpt_inflight_cond, tls->mpt_inflight_mutex); - ABT_mutex_unlock(tls->mpt_inflight_mutex); - } - if (tls->mpt_fini) + rc = migrate_res_hold(tls, MIGR_DATA, data_size, NULL); + if (rc) D_GOTO(out, rc); - tls->mpt_inflight_size += data_size; rc = migrate_dkey(tls, mrone, data_size); - tls->mpt_inflight_size -= data_size; + + migrate_res_release(tls, MIGR_DATA, data_size); D_DEBUG(DB_REBUILD, DF_RB ": " DF_UOID " layout %u migrate dkey " DF_KEY " inflight_size " DF_U64 @@ -1917,11 +1997,8 @@ migrate_one_ult(void *arg) tls->mpt_fini = 1; } out: + migrate_res_release(tls, MIGR_KEY, 1); migrate_one_destroy(mrone); - if (tls != NULL) { - migrate_tgt_exit(tls, DKEY_ULT); - migrate_pool_tls_put(tls); - } } /* If src_iod is NULL, it will try to merge the recxs inside dst_iod */ @@ -2326,29 +2403,28 @@ migrate_one_create(struct enum_unpack_arg *arg, struct dc_obj_enum_unpack_io *io d_sg_list_t *sgls = io->ui_sgls; uint32_t version = io->ui_version; struct dc_object *obj = NULL; - struct migrate_pool_tls *tls; + struct migrate_pool_tls *tls = iter_arg->pool_tls; struct migrate_one *mrone = NULL; bool inline_copy = true; int i; int rc = 0; - tls = migrate_pool_tls_lookup(iter_arg->pool_uuid, iter_arg->version, iter_arg->generation); - if (tls == NULL || tls->mpt_fini) { + if (tls->mpt_fini) { D_WARN("someone aborted the rebuild " DF_UUID "dkey " DF_KEY "iod_nr %d\n", DP_UUID(iter_arg->pool_uuid), DP_KEY(dkey), iod_eph_total); - D_GOTO(put, rc = 0); + D_GOTO(out, rc = 0); } D_DEBUG(DB_REBUILD, DF_RB ": migrate dkey " DF_KEY " iod nr %d\n", DP_RB_MPT(tls), DP_KEY(dkey), iod_eph_total); if (iod_eph_total == 0 || tls->mpt_fini) { D_DEBUG(DB_REBUILD, DF_RB ": no need eph_total %d version %u fini %d\n", DP_RB_MPT(tls), iod_eph_total, version, tls->mpt_fini); - D_GOTO(put, rc = 0); + D_GOTO(out, rc = 0); } D_ALLOC_PTR(mrone); if (mrone == NULL) - D_GOTO(put, rc = -DER_NOMEM); + D_GOTO(out, rc = -DER_NOMEM); D_INIT_LIST_HEAD(&mrone->mo_list); D_ALLOC_ARRAY(mrone->mo_iods, iod_eph_total); @@ -2457,8 +2533,7 @@ migrate_one_create(struct enum_unpack_arg *arg, struct dc_obj_enum_unpack_io *io d_list_del_init(&mrone->mo_list); migrate_one_destroy(mrone); } -put: - migrate_pool_tls_put(tls); +out: return rc; } @@ -2466,14 +2541,14 @@ static int migrate_enum_unpack_cb(struct dc_obj_enum_unpack_io *io, void *data) { struct enum_unpack_arg *arg = data; + struct migrate_pool_tls *tls = arg->arg->pool_tls; uint32_t shard = arg->arg->shard; struct migrate_one *mo; uint32_t unpack_tgt_off; uint32_t migrate_tgt_off; bool merged = false; bool create_migrate_one = false; - int rc = 0; - struct migrate_pool_tls *tls; + int rc = 0; struct dc_object *obj = NULL; uint32_t parity_shard = -1; uint32_t layout_ver; @@ -2495,9 +2570,7 @@ migrate_enum_unpack_cb(struct dc_obj_enum_unpack_io *io, void *data) if (rc < 0) return rc; - tls = migrate_pool_tls_lookup(arg->arg->pool_uuid, arg->arg->version, - arg->arg->generation); - if (tls == NULL || tls->mpt_fini) { + if (tls->mpt_fini) { D_WARN("someone aborted the rebuild " DF_UUID "\n", DP_UUID(arg->arg->pool_uuid)); D_GOTO(put, rc = 0); } @@ -2614,7 +2687,6 @@ migrate_enum_unpack_cb(struct dc_obj_enum_unpack_io *io, void *data) put: if (obj) obj_decref(obj); - migrate_pool_tls_put(tls); return rc; } @@ -2626,10 +2698,10 @@ migrate_obj_punch_one(void *data) struct ds_cont_child *cont; int rc; - tls = migrate_pool_tls_lookup(arg->pool_uuid, arg->version, arg->generation); - if (tls == NULL || tls->mpt_fini) { + tls = arg->pool_tls; + if (tls->mpt_fini) { D_WARN("someone aborted the rebuild " DF_UUID "\n", DP_UUID(arg->pool_uuid)); - D_GOTO(put, rc = 0); + D_GOTO(out, rc = 0); } D_DEBUG(DB_REBUILD, DF_RB ": tls %p version %d punch " DF_U64 " " DF_UOID "\n", @@ -2637,22 +2709,20 @@ migrate_obj_punch_one(void *data) rc = migrate_get_cont_child(tls, arg->cont_uuid, &cont, true); if (rc != 0 || cont == NULL) - D_GOTO(put, rc); + D_GOTO(out, rc); D_ASSERT(arg->punched_epoch != 0); rc = vos_obj_punch(cont->sc_hdl, arg->oid, arg->punched_epoch, tls->mpt_version, VOS_OF_REPLAY_PC, NULL, 0, NULL, NULL); ds_cont_child_put(cont); -put: +out: if (rc) DL_ERROR(rc, DF_RB ": " DF_UOID " migrate punch failed", DP_RB_MPT(tls), DP_UOID(arg->oid)); - if (tls) { - if (tls->mpt_status == 0 && rc != 0) - tls->mpt_status = rc; - migrate_pool_tls_put(tls); - } + + if (tls->mpt_status == 0 && rc != 0) + tls->mpt_status = rc; return rc; } @@ -2660,16 +2730,15 @@ migrate_obj_punch_one(void *data) static int migrate_start_ult(struct enum_unpack_arg *unpack_arg) { - struct migrate_pool_tls *tls; struct iter_obj_arg *arg = unpack_arg->arg; + struct migrate_pool_tls *tls = arg->pool_tls; struct migrate_one *mrone; struct migrate_one *tmp; int rc = 0; - tls = migrate_pool_tls_lookup(arg->pool_uuid, arg->version, arg->generation); - if (tls == NULL || tls->mpt_fini) { + if (tls->mpt_fini) { D_WARN("someone aborted the rebuild " DF_UUID "\n", DP_UUID(arg->pool_uuid)); - D_GOTO(put, rc = 0); + D_GOTO(out, rc = 0); } d_list_for_each_entry_safe(mrone, tmp, &unpack_arg->merge_list, mo_list) { @@ -2687,21 +2756,24 @@ migrate_start_ult(struct enum_unpack_arg *unpack_arg) continue; } - rc = migrate_tgt_enter(tls, DKEY_ULT, NULL); + rc = migrate_res_hold(tls, MIGR_KEY, 1, NULL); if (rc) break; d_list_del_init(&mrone->mo_list); - rc = dss_ult_create(migrate_one_ult, mrone, DSS_XS_VOS, - arg->tgt_idx, MIGRATE_STACK_SIZE, NULL); + + migrate_pool_tls_get(tls); + mrone->mo_tls = tls; + + D_ASSERT(arg->tgt_idx == dss_get_module_info()->dmi_tgt_id); + rc = dss_ult_create(migrate_one_ult, mrone, DSS_XS_SELF, 0, MIGRATE_STACK_SIZE, + NULL); if (rc) { - migrate_tgt_exit(tls, DKEY_ULT); + migrate_res_release(tls, MIGR_KEY, 1); migrate_one_destroy(mrone); break; } } - -put: - migrate_pool_tls_put(tls); +out: return rc; } @@ -3011,9 +3083,16 @@ migrate_fini_one_ult(void *data) arg->stop_count++; ABT_mutex_unlock(arg->stop_lock); - ABT_mutex_lock(tls->mpt_inflight_mutex); - ABT_cond_broadcast(tls->mpt_inflight_cond); - ABT_mutex_unlock(tls->mpt_inflight_mutex); + if (tls->mpt_rmg) { + struct migr_res_manager *rmg = tls->mpt_rmg; + int i; + + /* NB: no big deal but ULTs of all pools will be waken up */ + ABT_mutex_lock(rmg->rmg_mutex); + for (i = 0; i < MIGR_MAX; i++) + ABT_cond_broadcast(rmg->rmg_resources[i].res_cond); + ABT_mutex_unlock(rmg->rmg_mutex); + } migrate_pool_tls_put(tls); /* lookup */ rc = ABT_eventual_wait(tls->mpt_done_eventual, NULL); @@ -3066,6 +3145,16 @@ migrate_obj_punch(struct iter_obj_arg *arg) arg->tgt_idx, MIGRATE_STACK_SIZE); } +static void +obj_iter_arg_free(struct iter_obj_arg *arg) +{ + if (arg->pool_tls) + migrate_pool_tls_put(arg->pool_tls); + if (arg->snaps) + D_FREE(arg->snaps); + D_FREE(arg); +} + /** * This ULT manages migration one object ID for one container. It does not do * the data migration itself - instead it iterates akeys/dkeys as a client and @@ -3089,8 +3178,8 @@ migrate_obj_ult(void *data) int i; int rc = 0; - tls = migrate_pool_tls_lookup(arg->pool_uuid, arg->version, arg->generation); - if (tls == NULL || tls->mpt_fini) { + tls = arg->pool_tls; + if (tls->mpt_fini) { D_WARN("someone aborted the rebuild " DF_UUID "\n", DP_UUID(arg->pool_uuid)); D_GOTO(free_notls, rc); } @@ -3209,17 +3298,13 @@ migrate_obj_ult(void *data) tls->mpt_status = rc; D_DEBUG(DB_REBUILD, - DF_RB ": stop migrate obj " DF_UOID "for shard %u ult %u/%u " DF_U64 " : " DF_RC + DF_RB ": stop migrate obj " DF_UOID "for shard %u ult %u/%u " DF_U64 " : " DF_RC "\n", DP_RB_MPT(tls), DP_UOID(arg->oid), arg->shard, tls->mpt_tgt_obj_ult_cnt, tls->mpt_tgt_dkey_ult_cnt, tls->mpt_obj_count, DP_RC(rc)); free_notls: - if (tls != NULL) - migrate_tgt_exit(tls, OBJ_ULT); - - D_FREE(arg->snaps); - D_FREE(arg); - migrate_pool_tls_put(tls); + migrate_res_release(tls, MIGR_OBJ, 1); + obj_iter_arg_free(arg); } struct migrate_obj_val { @@ -3248,6 +3333,8 @@ migrate_one_object(daos_unit_oid_t oid, daos_epoch_t eph, daos_epoch_t punched_e if (obj_arg == NULL) return -DER_NOMEM; + migrate_pool_tls_get(tls); + obj_arg->pool_tls = tls; obj_arg->oid = oid; obj_arg->epoch = eph; obj_arg->shard = shard; @@ -3268,8 +3355,8 @@ migrate_one_object(daos_unit_oid_t oid, daos_epoch_t eph, daos_epoch_t punched_e sizeof(*obj_arg->snaps) * cont_arg->snap_cnt); } - rc = dss_ult_create(migrate_obj_ult, obj_arg, DSS_XS_VOS, - tgt_idx, MIGRATE_STACK_SIZE, NULL); + D_ASSERT(tgt_idx == dss_get_module_info()->dmi_tgt_id); + rc = dss_ult_create(migrate_obj_ult, obj_arg, DSS_XS_SELF, 0, MIGRATE_STACK_SIZE, NULL); if (rc) goto free; @@ -3285,8 +3372,7 @@ migrate_one_object(daos_unit_oid_t oid, daos_epoch_t eph, daos_epoch_t punched_e return 0; free: - D_FREE(obj_arg->snaps); - D_FREE(obj_arg); + obj_iter_arg_free(obj_arg); return rc; } @@ -3313,7 +3399,7 @@ migrate_obj_iter_cb(daos_handle_t ih, d_iov_t *key_iov, d_iov_t *val_iov, void * DF_RB ": obj migrate " DF_UUID "/" DF_UOID " %" PRIx64 " eph " DF_U64 " start\n", DP_RB_MPT(arg->pool_tls), DP_UUID(arg->cont_uuid), DP_UOID(*oid), ih.cookie, epoch); - rc = migrate_tgt_enter(arg->pool_tls, OBJ_ULT, &yielded); + rc = migrate_res_hold(arg->pool_tls, MIGR_OBJ, 1, &yielded); if (rc) { DL_ERROR(rc, DF_RB ": " DF_UUID " enter migrate failed.", DP_RB_MPT(arg->pool_tls), DP_UUID(arg->cont_uuid)); @@ -3324,11 +3410,11 @@ migrate_obj_iter_cb(daos_handle_t ih, d_iov_t *key_iov, d_iov_t *val_iov, void * if (rc != 0) { DL_ERROR(rc, DF_RB ": obj " DF_UOID " migration failed", DP_RB_MPT(arg->pool_tls), DP_UOID(*oid)); - migrate_tgt_exit(arg->pool_tls, OBJ_ULT); + migrate_res_release(arg->pool_tls, MIGR_OBJ, 1); return rc; } - /* migrate_tgt_enter possibly yielded the ULT, let's re-probe before delete */ + /* migrate_res_hold possibly yielded the ULT, let's re-probe before delete */ if (yielded) { d_iov_set(&tmp_iov, oid, sizeof(*oid)); rc = dbtree_iter_probe(ih, BTR_PROBE_EQ, DAOS_INTENT_MIGRATION, &tmp_iov, NULL); @@ -4349,3 +4435,92 @@ ds_object_migrate_send(struct ds_pool *pool, uuid_t pool_hdl_uuid, uuid_t cont_h return rc; } + +static int +migr_res_init(struct migr_resource *res, const char *name, long limit) +{ + int rc; + + memset(res, 0, sizeof(*res)); + res->res_name = name; + res->res_limit = limit; + rc = ABT_cond_create(&res->res_cond); + + return (rc != ABT_SUCCESS) ? dss_abterr2der(rc) : 0; +} + +static void +migr_res_fini(struct migr_resource *res) +{ + if (res->res_cond) + ABT_cond_free(&res->res_cond); +} + +int +obj_migrate_init(void) +{ + unsigned int ults = MIGR_TGT_ULTS_DEF; + int i; + int rc = 0; + + D_CASSERT(MIGR_TGT_INF_DATA > MIGR_INF_DATA_LWM); + D_CASSERT(MIGR_TGT_INF_DATA > MIGR_INF_DATA_HULK); + + d_getenv_uint(ENV_MIGRATE_ULT_CNT, &ults); + if (ults < MIGR_TGT_ULTS_MIN) + ults = MIGR_TGT_ULTS_MIN; + if (ults > MIGR_TGT_ULTS_MAX) + ults = MIGR_TGT_ULTS_MAX; + + memset(&migr_eng_res, 0, sizeof(migr_eng_res)); + migr_eng_res.er_max_ults = ults; + + D_ASSERT(dss_tgt_nr > 0); + D_ALLOC(migr_eng_res.er_rmgs, sizeof(struct migr_res_manager) * dss_tgt_nr); + if (!migr_eng_res.er_rmgs) + return -DER_NOMEM; + + for (i = 0; i < dss_tgt_nr; i++) { + struct migr_res_manager *rmg = &migr_eng_res.er_rmgs[i]; + + rc = ABT_mutex_create(&rmg->rmg_mutex); + if (rc != ABT_SUCCESS) + D_GOTO(out, rc = dss_abterr2der(rc)); + + rc = migr_res_init(&rmg->rmg_resources[MIGR_OBJ], "OBJ", MIGR_TGT_OBJ_ULTS(ults)); + if (rc) + D_GOTO(out, rc); + + rc = migr_res_init(&rmg->rmg_resources[MIGR_KEY], "KEY", MIGR_TGT_KEY_ULTS(ults)); + if (rc) + D_GOTO(out, rc); + + rc = migr_res_init(&rmg->rmg_resources[MIGR_DATA], "DATA", MIGR_TGT_INF_DATA); + if (rc) + D_GOTO(out, rc); + } + return 0; +out: + obj_migrate_fini(); + return rc; +} + +void +obj_migrate_fini(void) +{ + int i; + int j; + + if (migr_eng_res.er_rmgs) { + for (i = 0; i < dss_tgt_nr; i++) { + struct migr_res_manager *rmg = &migr_eng_res.er_rmgs[i]; + + for (j = 0; j < MIGR_MAX; j++) + migr_res_fini(&rmg->rmg_resources[j]); + if (rmg->rmg_mutex) + ABT_mutex_free(&rmg->rmg_mutex); + } + D_FREE(migr_eng_res.er_rmgs); + } + memset(&migr_eng_res, 0, sizeof(migr_eng_res)); +} From 83c061044960f38c3ee0c11a5f0c2c829fc9b9c3 Mon Sep 17 00:00:00 2001 From: Nasf-Fan Date: Tue, 23 Dec 2025 14:16:05 +0800 Subject: [PATCH 099/253] DAOS-18362 ddb: prov_mem recreate rdb-pool for system with md-on-ssd (#17288) DDB prov_mem sub-command needs to recreate rdb-pool for the system with md-on-ssd, then admin can operate rdb-pool under md-on-ssd mode via ddb. Signed-off-by: Fan Yong --- src/utils/ddb/ddb_mgmt.c | 43 ++++++++++++++++++++++++++++++---------- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/src/utils/ddb/ddb_mgmt.c b/src/utils/ddb/ddb_mgmt.c index e9ec2fe7436..12387df6444 100644 --- a/src/utils/ddb/ddb_mgmt.c +++ b/src/utils/ddb/ddb_mgmt.c @@ -32,6 +32,7 @@ ddb_auto_calculate_tmpfs_mount_size(unsigned int *tmpfs_mount_size) int rc = 0; int pool_list_cnt; uint64_t pool_size; + uint64_t rdb_size; uint64_t total_size; const unsigned long GiB = (1ul << 30); @@ -47,17 +48,32 @@ ddb_auto_calculate_tmpfs_mount_size(unsigned int *tmpfs_mount_size) total_size = 0; d_list_for_each_entry(pool_info, &pool_list, spi_link) { if ((pool_info->spi_blob_sz[SMD_DEV_TYPE_META] == 0) || - (pool_info->spi_flags[SMD_DEV_TYPE_META] & SMD_POOL_IN_CREATION)) { + (pool_info->spi_flags[SMD_DEV_TYPE_META] & SMD_POOL_IN_CREATION)) continue; + + rdb_size = 0; + rc = smd_rdb_get_blob_sz(pool_info->spi_id, &rdb_size); + if (rc == 0) { + /** Align to 4K */ + rdb_size = D_ALIGNUP(rdb_size, 1ULL << 12); + total_size += rdb_size; + } else if (rc == -DER_NONEXIST) { + rc = 0; + } else { + D_ERROR("Failed to extract the size of rdb for " DF_UUID ": " DF_RC "\n", + DP_UUID(pool_info->spi_id), DP_RC(rc)); + break; } + D_ASSERT(pool_info->spi_scm_sz > 0); /** Align to 4K */ pool_size = (D_ALIGNUP(pool_info->spi_scm_sz, 1ULL << 12)) * pool_info->spi_tgt_cnt[SMD_DEV_TYPE_META]; total_size += pool_size; - D_INFO("Pool " DF_UUID " required scm size: " DF_U64 "", DP_UUID(pool_info->spi_id), - pool_size); + + D_INFO("Pool " DF_UUID " required scm size " DF_U64 ", rdb size " DF_U64 "\n", + DP_UUID(pool_info->spi_id), pool_size, rdb_size); } d_list_for_each_entry_safe(pool_info, tmp, &pool_list, spi_link) { @@ -140,6 +156,7 @@ ddb_recreate_pooltgts(const char *storage_path) struct smd_pool_info *pool_info = NULL; struct smd_pool_info *tmp; d_list_t pool_list; + daos_size_t rdb_size; int rc = 0; int pool_list_cnt; @@ -152,20 +169,26 @@ ddb_recreate_pooltgts(const char *storage_path) } d_list_for_each_entry(pool_info, &pool_list, spi_link) { - if ((pool_info->spi_blob_sz[SMD_DEV_TYPE_META] == 0) || - (pool_info->spi_flags[SMD_DEV_TYPE_META] & SMD_POOL_IN_CREATION)) { + if (pool_info->spi_blob_sz[SMD_DEV_TYPE_META] == 0 || + pool_info->spi_flags[SMD_DEV_TYPE_META] & SMD_POOL_IN_CREATION) continue; + + rdb_size = 0; + rc = smd_rdb_get_blob_sz(pool_info->spi_id, &rdb_size); + if (rc != 0 && rc != -DER_NONEXIST) { + D_ERROR("Failed to extract the size of rdb for " DF_UUID ": " DF_RC "\n", + DP_UUID(pool_info->spi_id), DP_RC(rc)); + break; } - D_INFO("Recreating files for the pool " DF_UUID "", DP_UUID(pool_info->spi_id)); + D_INFO("Recreating files for the pool " DF_UUID "\n", DP_UUID(pool_info->spi_id)); D_ASSERT(pool_info->spi_scm_sz > 0); - /* specify rdb_blob_sz as zero to skip rdb file creation */ + rc = ds_mgmt_tgt_recreate(pool_info->spi_id, pool_info->spi_scm_sz, - pool_info->spi_tgt_cnt[SMD_DEV_TYPE_META], 0, + pool_info->spi_tgt_cnt[SMD_DEV_TYPE_META], rdb_size, storage_path, NULL); - if (rc) { + if (rc != 0) break; - } } d_list_for_each_entry_safe(pool_info, tmp, &pool_list, spi_link) { From dde7e4c9087eb1699322db940e197fb3561c8e04 Mon Sep 17 00:00:00 2001 From: Dalton Bohning Date: Tue, 23 Dec 2025 09:07:53 -0800 Subject: [PATCH 100/253] DAOS-18025 test: add ftest/rebuild/interactive.py (#17171) Add ftest/rebuild/interactive.py - Verify dmg pool rebuild stop/start - After dmg pool exclude - After dmg pool reintegrate Signed-off-by: Dalton Bohning --- src/tests/ftest/rebuild/interactive.py | 201 +++++++++++++++++++++++ src/tests/ftest/rebuild/interactive.yaml | 49 ++++++ src/tests/ftest/util/data_utils.py | 81 +++++++++ src/tests/ftest/util/test_utils_pool.py | 22 +++ 4 files changed, 353 insertions(+) create mode 100644 src/tests/ftest/rebuild/interactive.py create mode 100644 src/tests/ftest/rebuild/interactive.yaml diff --git a/src/tests/ftest/rebuild/interactive.py b/src/tests/ftest/rebuild/interactive.py new file mode 100644 index 00000000000..5dc968650a4 --- /dev/null +++ b/src/tests/ftest/rebuild/interactive.py @@ -0,0 +1,201 @@ +""" + (C) Copyright 2025 Hewlett Packard Enterprise Development LP + + SPDX-License-Identifier: BSD-2-Clause-Patent +""" +import time +from functools import partial + +from apricot import TestWithServers +from data_utils import assert_val_in_list +from ior_utils import get_ior +from job_manager_utils import get_job_manager + + +class RbldInteractive(TestWithServers): + """Test class for interactive rebuild tests. + + :avocado: recursive + """ + + def test_rebuild_interactive(self): + """ + Use Cases: + Pool rebuild with interactive start/stop. + + :avocado: tags=all,daily_regression + :avocado: tags=hw,large + :avocado: tags=rebuild,pool + :avocado: tags=RbldInteractive,test_rebuild_interactive + """ + self.log_step("Setup pool") + pool = self.get_pool(connect=False) + + # Collect server configuration information + server_count = len(self.hostlist_servers) + engines_per_host = int(self.server_managers[0].get_config_value('engines_per_host') or 1) + targets_per_engine = int(self.server_managers[0].get_config_value('targets')) + self.log.info( + 'Running with %s servers, %s engines per server, and %s targets per engine', + server_count, engines_per_host, targets_per_engine) + + self.log_step('Create container and run IOR') + cont_ior = self.get_container(pool, namespace='/run/cont_ior/*') + ior_flags_write = self.params.get('flags_write', '/run/ior/*') + ior_ppn = self.params.get('ppn', '/run/ior/*') + + job_manager = get_job_manager(self, subprocess=False) + ior = get_ior( + self, job_manager, self.hostlist_clients, self.workdir, None, namespace='/run/ior/*') + ior.manager.job.update_params(flags=ior_flags_write, dfs_oclass=cont_ior.oclass.value) + ior.run(cont_ior.pool, cont_ior, None, ior_ppn, display_space=False) + + self.__run_rebuild_interactive( + pool, cont_ior, ior, + num_ranks_to_exclude=1, + exclude_method='dmg pool exclude', + reint_method='dmg pool reintegrate') + + self.log_step('Test Passed') + + def __run_rebuild_interactive(self, pool, cont_ior, ior, + num_ranks_to_exclude, exclude_method, reint_method): + """Run interactive rebuild test sequence. + + Args: + pool (TestPool): pool to use + cont_ior (TestContainer): container used for IOR + iort (Ior): the Ior object + num_ranks_to_exclude (int): number of ranks to exclude/reintegrate + exclude_method (str): method to exclude ranks. Must be in + - 'dmg pool exclude' + - 'dmg system exclude' + reint_method (str): method to reintegrate ranks. Must be in + - 'dmg pool reintegrate' + - 'dmg system reintegrate' + """ + # Time to wait between rebuild start and manual stop. + # If we stop too early rebuild might not have started yet. + # Ideally, if we could poll the "actual" rebuild status this would not be necessary. + secs_between_rebuild_start_and_manual_stop = 4 + + ior_flags_read = self.params.get('flags_read', '/run/ior/*') + ior_ppn = self.params.get('ppn', '/run/ior/*') + + self.log_step('Verify pool state before rebuild') + self.__verify_pool_query( + pool, rebuild_status=0, rebuild_state=['idle', 'done'], disabled_ranks=[]) + + ranks_to_exclude = self.random.sample( + list(self.server_managers[0].ranks.keys()), k=num_ranks_to_exclude) + self.log_step(f'Exclude random rank {ranks_to_exclude}') + if exclude_method == 'dmg pool exclude': + pool.exclude(ranks_to_exclude) + elif exclude_method == 'dmg system exclude': + pool.dmg.system_exclude(ranks_to_exclude) + else: + self.fail(f'Unsupported exclude_method: {exclude_method}') + + self.log_step(f'{exclude_method} - Wait for rebuild to start') + pool.wait_for_rebuild_to_start(interval=1) + + self.log_step(f'{exclude_method} - Manually stop rebuild') + time.sleep(secs_between_rebuild_start_and_manual_stop) + pool.rebuild_stop() + + self.log_step(f'{exclude_method} - Wait for rebuild to stop') + pool.wait_for_rebuild_to_stop(interval=3) + + self.log_step(f'{exclude_method} - Verify pool state after rebuild stopped') + self.__verify_pool_query( + pool, rebuild_status=-2027, rebuild_state=['idle'], + disabled_ranks=ranks_to_exclude) + + self.log_step(f'{exclude_method} - Verify IOR after rebuild stopped') + ior.manager.job.update_params(flags=ior_flags_read) + ior.run(cont_ior.pool, cont_ior, None, ior_ppn, display_space=False) + + self.log_step(f'{exclude_method} - Manually start rebuild') + pool.rebuild_start() + + self.log_step(f'{exclude_method} - Wait for rebuild to start') + pool.wait_for_rebuild_to_start(interval=1) + + self.log_step(f'{exclude_method} - Wait for rebuild to end') + pool.wait_for_rebuild_to_end(interval=3) + + self.log_step(f'{exclude_method} - Verify pool state after rebuild completed') + self.__verify_pool_query( + pool, rebuild_status=0, rebuild_state=['idle', 'done'], + disabled_ranks=ranks_to_exclude) + + self.log_step(f'{exclude_method} - Verify IOR after rebuild completed') + ior.manager.job.update_params(flags=ior_flags_read) + ior.run(cont_ior.pool, cont_ior, None, ior_ppn, display_space=False) + + self.log_step('Reintegrate excluded ranks') + if reint_method == 'dmg pool reintegrate': + pool.reintegrate(ranks_to_exclude) + elif reint_method == 'dmg system reintegrate': + pool.dmg.system_reintegrate(ranks_to_exclude) + else: + self.fail(f'Unsupported reint_method: {reint_method}') + + self.log_step(f'{reint_method} - Wait for rebuild to start') + pool.wait_for_rebuild_to_start(interval=1) + + self.log_step(f'{reint_method} - Manually stop rebuild') + time.sleep(secs_between_rebuild_start_and_manual_stop) + pool.rebuild_stop() + + self.log_step(f'{reint_method} - Wait for rebuild to stop') + pool.wait_for_rebuild_to_stop(interval=3) + + self.log_step(f'{reint_method} - Verify pool state after rebuild stopped') + self.__verify_pool_query( + pool, rebuild_status=-2027, rebuild_state=['idle'], + disabled_ranks=[]) + + self.log_step(f'{reint_method} - Verify IOR after rebuild stopped') + ior.manager.job.update_params(flags=ior_flags_read) + ior.run(cont_ior.pool, cont_ior, None, ior_ppn, display_space=False) + + self.log_step(f'{reint_method} - Manually start rebuild') + pool.rebuild_start() + + self.log_step(f'{reint_method} - Wait for rebuild to start') + pool.wait_for_rebuild_to_start(interval=1) + + self.log_step(f'{reint_method} - Wait for rebuild to end') + pool.wait_for_rebuild_to_end(interval=3) + + self.log_step(f'{reint_method} - Verify pool state after rebuild completed') + self.__verify_pool_query( + pool, rebuild_status=0, rebuild_state=['idle', 'done'], disabled_ranks=[]) + + self.log_step(f'{reint_method} - Verify IOR after rebuild completed') + ior.manager.job.update_params(flags=ior_flags_read) + ior.run(cont_ior.pool, cont_ior, None, ior_ppn, display_space=False) + + def __verify_pool_query(self, pool, rebuild_status, rebuild_state, disabled_ranks): + """Verify pool query. + + Args: + pool (TestPool): pool to query + rebuild_status (int): expected rebuild status + rebuild_state (str/list): expected rebuild state + disabled_ranks (list): expected disabled ranks + + """ + try: + pool.verify_query( + { + 'rebuild': { + 'status': rebuild_status, + 'state': partial(assert_val_in_list, allowed_list=rebuild_state) + }, + 'disabled_ranks': disabled_ranks + }, + use_cached_query=True) + except AssertionError as error: + self.fail(f'Unexpected pool query response: {str(error)}') diff --git a/src/tests/ftest/rebuild/interactive.yaml b/src/tests/ftest/rebuild/interactive.yaml new file mode 100644 index 00000000000..0cd95906695 --- /dev/null +++ b/src/tests/ftest/rebuild/interactive.yaml @@ -0,0 +1,49 @@ +hosts: + test_servers: 7 + test_clients: 1 + +timeout: 400 + +server_config: + name: daos_server + engines_per_host: 2 + engines: + 0: + pinned_numa_node: 0 + nr_xs_helpers: 1 + log_file: daos_server0.log + log_mask: DEBUG,MEM=ERR + env_vars: + - DD_MASK=group_metadata_only,io,epc,rebuild + - D_LOG_FILE_APPEND_PID=1 + - D_LOG_FILE_APPEND_RANK=1 + storage: auto + 1: + pinned_numa_node: 1 + nr_xs_helpers: 1 + log_file: daos_server1.log + log_mask: DEBUG,MEM=ERR + env_vars: + - DD_MASK=group_metadata_only,io,epc,rebuild + - D_LOG_FILE_APPEND_PID=1 + - D_LOG_FILE_APPEND_RANK=1 + storage: auto + +pool: + size: 90% + pool_query_timeout: 30 + properties: rd_fac:3 + +cont_ior: + type: POSIX + properties: rd_fac:3 + oclass: EC_8P3GX + +ior: + ppn: 16 + test_file: /testFile + api: DFS + transfer_size: 1M + block_size: 128M + flags_write: "-v -w -k -G 1 -F" + flags_read: "-v -r -R -k -G 1 -F" diff --git a/src/tests/ftest/util/data_utils.py b/src/tests/ftest/util/data_utils.py index 74ead097879..005e3626526 100644 --- a/src/tests/ftest/util/data_utils.py +++ b/src/tests/ftest/util/data_utils.py @@ -1,5 +1,6 @@ """ (C) Copyright 2023 Intel Corporation. + (C) Copyright 2025 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -150,3 +151,83 @@ def dict_subtract(dict1, dict2): except TypeError as error: raise TypeError('Invalid type for key {}'.format(key)) from error return dict3 + + +def assert_val_in_list(val, allowed_list): + """Assert whether a value is in the allowed list. + + Args: + val (object): value to check + allowed_list (list): list of allowed values + + Returns: + bool: True if val is in allowed_list + + Raises: + AssertionError: if val is not in allowed_list + """ + if val not in allowed_list: + raise AssertionError(f'Expected one of {allowed_list}') + return True + + +def assert_dict_subset(subset, full): + """Assert that a dictionary is a subset of another dictionary. + + For example: + assert_dict_subset({'a': 1}, {'a': 1, 'b': 2}) -> passes + assert_dict_subset({'a': 2}, {'a': 1, 'b': 2}) -> AssertionError + assert_dict_subset({'c': 2}, {'a': 1, 'b': 2}) -> AssertionError + + Args: + subset (dict): Expected subset dictionary, where only keys in the subset are verified + in the full dictionary. + Expected values can be type callable(actual_value) -> bool for custom verification. + full (dict): Full dictionary to verify against the subset + + Raises: + AssertionError: if subset is not contained in full + """ + + def _format_keys(keys): + """Convert list of keys to ["key1"]["key2"] format.""" + return ''.join(map(lambda k: f'["{k}"]', keys)) + + def _assert_subset(prev_keys, expected, actual): + """Recursively verify expected dict matches actual dict.""" + for key, expected_value in expected.items(): + cur_keys = prev_keys + [key] + try: + actual_value = actual[key] + except KeyError as error: + raise AssertionError( + f'Missing expected key {_format_keys(cur_keys)}') from error + + if callable(expected_value): + # Use custom callable to verify value + try: + if not expected_value(actual_value): + raise AssertionError( + f'{_format_keys(cur_keys)} = {actual_value} ; ' + f'expected to satisfy {expected_value.__name__}') + except AssertionError as error: + # If the custom callable raised an AssertionError, use its error message + raise AssertionError( + f'{_format_keys(cur_keys)} = {actual_value} ; {str(error)}') from error + + elif type(expected_value) is not type(actual_value): + # Types must match + raise AssertionError( + f'type({_format_keys(cur_keys)}) = {type(actual_value)} ; ' + f'expected {type(expected_value)}') + + elif isinstance(expected_value, dict): + # Recursively verify nested dict + _assert_subset(cur_keys, expected_value, actual_value) + + elif expected_value != actual_value: + # Compare leaf values + raise AssertionError( + f'{_format_keys(cur_keys)} = {actual_value} ; expected "{expected_value}"') + + _assert_subset([], subset, full) diff --git a/src/tests/ftest/util/test_utils_pool.py b/src/tests/ftest/util/test_utils_pool.py index c6cdde5c3ba..a863c38cfdd 100644 --- a/src/tests/ftest/util/test_utils_pool.py +++ b/src/tests/ftest/util/test_utils_pool.py @@ -12,6 +12,7 @@ from avocado import TestFail, fail_on from command_utils import BasicParameter +from data_utils import assert_dict_subset from dmg_utils import DmgCommand, DmgJsonCommandFailure from exception_utils import CommandFailure from general_utils import DaosTestError, check_file_exists @@ -1601,3 +1602,24 @@ def verify_uuid_directory(self, host, scm_mount): else: self.log.info("%s does not exist on %s", pool_dir, host) return result[0] + + def verify_query(self, expected_response, use_cached_query=False): + """Verify dmg pool query returns expected values. + + Args: + expected_response (dict): Expected key/value pairs from dmg pool query. + Can be a subset of the full response, where only expected keys are verified. + Expected value can be type callable(actual_value) -> bool for custom verification. + use_cached_query (bool, optional): Whether to use the last cached query. + Defaults to False, which issues a new query. + + Raises: + AssertionError: if the pool query response does not match expected values + + """ + # Only refresh the cache if requested or not yet cached + if not use_cached_query or 'response' not in self.query_data: + self.set_query_data() + response = self.query_data['response'] + + assert_dict_subset(expected_response, response) From c9004d306d89d9f6bf9b7eb9580e34a1ab77ec37 Mon Sep 17 00:00:00 2001 From: Liu Xuezhao Date: Wed, 24 Dec 2025 08:17:43 +0800 Subject: [PATCH 101/253] DAOS-18219 pool: refine pool IV err handling (#17291) When the pool IV fetch failed (fetch IV_POOL_HDL failed as -DER_NOTLEADER for example), the refresh callback still will be called. In that case should not refresh/update to IV cache, also cannot set iv_valid as true, to avoid following IV fetch get invalid data. Signed-off-by: Xuezhao Liu --- src/container/container_iv.c | 14 +++++++++++--- src/engine/server_iv.c | 10 +++++++--- src/pool/srv_iv.c | 16 ++++++++++++---- src/rebuild/rebuild_iv.c | 7 +++++++ 4 files changed, 37 insertions(+), 10 deletions(-) diff --git a/src/container/container_iv.c b/src/container/container_iv.c index 2f7719747f2..ede0c0c56fc 100644 --- a/src/container/container_iv.c +++ b/src/container/container_iv.c @@ -742,6 +742,12 @@ cont_iv_ent_refresh(struct ds_iv_entry *entry, struct ds_iv_key *key, d_sg_list_t *src, int ref_rc, void **priv) { D_ASSERT(dss_get_module_info()->dmi_xs_id == 0); + if (ref_rc != 0) { + DL_WARN(ref_rc, DF_UUID "bypass refresh, IV class id %d.", + DP_UUID(entry->ns->iv_pool_uuid), key->class_id); + return ref_rc; + } + return cont_iv_ent_update(entry, key, src, priv); } @@ -1768,7 +1774,7 @@ ds_cont_find_hdl(uuid_t po_uuid, uuid_t coh_uuid, struct ds_cont_hdl **coh_p) /* Return a retry-able error when the srv handle not propagated */ if (d_list_empty(&pool_child->spc_srv_cont_hdl)) { struct copy_hdl_arg arg; - int rc; + int rc, ret; /* * Sometimes the srv container handle failed to be propagated to the pool @@ -1784,8 +1790,10 @@ ds_cont_find_hdl(uuid_t po_uuid, uuid_t coh_uuid, struct ds_cont_hdl **coh_p) } } ds_pool_child_put(pool_child); - D_INFO(DF_UUID ": Server handle isn't propagated yet.\n", DP_UUID(po_uuid)); - return -DER_STALE; + ret = -DER_STALE; + DL_INFO(ret, DF_UUID ": Server handle isn't propagated yet %d.", DP_UUID(po_uuid), + rc); + return ret; } srv_hdl_ready: diff --git a/src/engine/server_iv.c b/src/engine/server_iv.c index 96186da2c9c..2f3ea5f46a2 100644 --- a/src/engine/server_iv.c +++ b/src/engine/server_iv.c @@ -454,7 +454,7 @@ iv_on_update_internal(crt_iv_namespace_t ivns, crt_iv_key_t *iv_key, struct ds_iv_ns *ns = NULL; struct ds_iv_entry *entry; struct ds_iv_key key; - struct iv_priv_entry *priv_entry = priv; + struct iv_priv_entry *priv_entry = priv; int rc = 0; rc = iv_ns_lookup_by_ivns(ivns, &ns); @@ -473,17 +473,21 @@ iv_on_update_internal(crt_iv_namespace_t ivns, crt_iv_key_t *iv_key, } if (refresh) { + /* oid_iv_ent_refresh need to be called to unlock */ rc = refresh_iv_value(entry, &key, iv_value, ref_rc, priv_entry ? priv_entry->priv : NULL); + if (rc == 0) + rc = ref_rc; } else { D_ASSERT(iv_value != NULL); + D_ASSERT(ref_rc == 0); + D_ASSERT(!invalidate); if (ns->iv_master_rank != key.rank) { D_DEBUG(DB_MD, "key id %d master rank %u != %u: rc = %d\n", key.class_id, ns->iv_master_rank, key.rank, -DER_GRPVER); D_GOTO(output, rc = -DER_GRPVER); } - rc = update_iv_value(entry, &key, iv_value, - priv_entry ? priv_entry->priv : NULL); + rc = update_iv_value(entry, &key, iv_value, priv_entry ? priv_entry->priv : NULL); } if (rc != 0) { D_DEBUG(DB_MD, "key id %d update failed: rc = " DF_RC "\n", key.class_id, diff --git a/src/pool/srv_iv.c b/src/pool/srv_iv.c index 15dd40f78a7..86f29111310 100644 --- a/src/pool/srv_iv.c +++ b/src/pool/srv_iv.c @@ -774,10 +774,11 @@ pool_iv_ent_fetch(struct ds_iv_entry *entry, struct ds_iv_key *key, if (dss_self_rank() == entry->ns->iv_master_rank) { if (!entry->iv_valid) { - D_INFO(DF_UUID" master %u is still stepping up: %d.\n", - DP_UUID(entry->ns->iv_pool_uuid), entry->ns->iv_master_rank, - -DER_NOTLEADER); - return -DER_NOTLEADER; + rc = -DER_NOTLEADER; + DL_INFO(rc, DF_UUID " iv class id %d, master %u is still stepping up.", + DP_UUID(entry->ns->iv_pool_uuid), key->class_id, + entry->ns->iv_master_rank); + return rc; } } @@ -961,6 +962,13 @@ pool_iv_ent_refresh(struct ds_iv_entry *entry, struct ds_iv_key *key, struct ds_pool *pool = 0; int rc; + if (ref_rc != 0) { + rc = ref_rc; + DL_WARN(rc, DF_UUID "bypass refresh, IV class id %d.", + DP_UUID(entry->ns->iv_pool_uuid), key->class_id); + goto out_put; + } + if (src == NULL) rc = ds_pool_lookup_internal(entry->ns->iv_pool_uuid, &pool); else diff --git a/src/rebuild/rebuild_iv.c b/src/rebuild/rebuild_iv.c index c7650ed8248..afb85154615 100644 --- a/src/rebuild/rebuild_iv.c +++ b/src/rebuild/rebuild_iv.c @@ -186,6 +186,13 @@ rebuild_iv_ent_refresh(struct ds_iv_entry *entry, struct ds_iv_key *key, if (rpt->rt_leader_term != src_iv->riv_leader_term) goto out; + if (ref_rc != 0) { + rc = ref_rc; + DL_WARN(rc, DF_UUID "bypass refresh, IV class id %d.", + DP_UUID(entry->ns->iv_pool_uuid), key->class_id); + goto out; + } + uuid_copy(dst_iv->riv_pool_uuid, src_iv->riv_pool_uuid); dst_iv->riv_master_rank = src_iv->riv_master_rank; dst_iv->riv_global_done = src_iv->riv_global_done; From f225b37ffae41001f479f737b1eda10bfc12f577 Mon Sep 17 00:00:00 2001 From: Niu Yawei Date: Wed, 24 Dec 2025 10:54:08 +0800 Subject: [PATCH 102/253] DAOS-18366 pool: set default chkpt parameters (#17295) Set default chkpt parameters to ensure the checkpoint working before the pool property being propagated. Signed-off-by: Niu Yawei --- src/pool/srv_target.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/pool/srv_target.c b/src/pool/srv_target.c index 24b44205a3f..b7fe7dfe2c8 100644 --- a/src/pool/srv_target.c +++ b/src/pool/srv_target.c @@ -909,6 +909,13 @@ pool_alloc_ref(void *key, unsigned int ksize, void *varg, pool->sp_map_version = arg->pca_map_version; pool->sp_reclaim = DAOS_RECLAIM_LAZY; /* default reclaim strategy */ pool->sp_data_thresh = DAOS_PROP_PO_DATA_THRESH_DEFAULT; + /* + * Set proper default chkpt parameters to ensure the checkpoint working + * before the pool property being propagated. + */ + pool->sp_checkpoint_mode = DAOS_PROP_PO_CHECKPOINT_MODE_DEFAULT; + pool->sp_checkpoint_freq = DAOS_PROP_PO_CHECKPOINT_FREQ_DEFAULT; + pool->sp_checkpoint_thresh = DAOS_PROP_PO_CHECKPOINT_THRESH_DEFAULT; /** set up ds_pool metrics */ rc = ds_pool_metrics_start(pool); From f24ba5c69b45f4f7a516613d82b9dd0cfe131520 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Fri, 26 Dec 2025 09:43:13 +0800 Subject: [PATCH 103/253] DAOS-18383 object: Fix potential use-after-free in migrate_pool_tls_create() (#17310) In migrate_pool_tls_create(), error handling could result in both migrate_pool_tls_destroy() and migrate_pool_tls_put() being called on the same pool_tls object, leading to a possible double free/use-after-free. Signed-off-by: Wang Shilong --- src/object/srv_obj_migrate.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/object/srv_obj_migrate.c b/src/object/srv_obj_migrate.c index 73236fe6c43..7c6dfb328ba 100644 --- a/src/object/srv_obj_migrate.c +++ b/src/object/srv_obj_migrate.c @@ -579,16 +579,12 @@ migrate_pool_tls_create(uuid_t pool_uuid, unsigned int version, unsigned int gen d_list_add(&pool_tls->mpt_list, &obj_tls->ot_pool_list); migrate_pool_tls_get(pool_tls); out: - if (rc && pool_tls) - migrate_pool_tls_destroy(pool_tls); - if (pool_child != NULL) ds_pool_child_put(pool_child); D_DEBUG(DB_TRACE, "create tls " DF_UUID ": " DF_RC "\n", DP_UUID(pool_uuid), DP_RC(rc)); if (rc != 0) { - if (pool_tls != NULL) - migrate_pool_tls_put(pool_tls); + migrate_pool_tls_put(pool_tls); } else { *p_tls = pool_tls; } From 1af35d59294a3cdd3162a66cb1f18d78a2fac9dc Mon Sep 17 00:00:00 2001 From: Niu Yawei Date: Fri, 26 Dec 2025 10:16:03 +0800 Subject: [PATCH 104/253] DAOS-18354 common: start local tx on cacnel (#17306) * DAOS-18354 common: start local tx on cacnel Start local tx on cancel operation since the tx is required when the reservation being given back to the heap. Signed-off-by: Niu Yawei --- src/common/dav/dav_internal.h | 1 + src/common/dav/tx.c | 19 +++++++++++++++++++ src/common/dav_v2/dav_internal.h | 1 + src/common/dav_v2/tx.c | 19 +++++++++++++++++++ src/common/tests/umem_test_bmem.c | 6 ++++-- 5 files changed, 44 insertions(+), 2 deletions(-) diff --git a/src/common/dav/dav_internal.h b/src/common/dav/dav_internal.h index ae6150c2748..2d31b4480f9 100644 --- a/src/common/dav/dav_internal.h +++ b/src/common/dav/dav_internal.h @@ -69,6 +69,7 @@ typedef struct dav_obj { static inline struct dav_tx *utx2wtx(struct umem_wal_tx *utx) { + D_ASSERT(utx != NULL); return (struct dav_tx *)&utx->utx_private; } diff --git a/src/common/dav/tx.c b/src/common/dav/tx.c index c7516b479a7..b98e076bd7b 100644 --- a/src/common/dav/tx.c +++ b/src/common/dav/tx.c @@ -1505,6 +1505,8 @@ dav_reserve(dav_obj_t *pop, struct dav_action *act, size_t size, uint64_t type_n if (palloc_reserve(pop->do_heap, size, NULL, NULL, type_num, 0, 0, 0, act) != 0) { + if (!tx_inprogress) + lw_tx_end(pop, NULL); DAV_API_END(); return 0; } @@ -1558,9 +1560,26 @@ dav_publish(dav_obj_t *pop, struct dav_action *actv, size_t actvcnt) void dav_cancel(dav_obj_t *pop, struct dav_action *actv, size_t actvcnt) { + int rc, tx_inprogress = 0; + DAV_DBG("actvcnt=%zu", actvcnt); + if (get_tx()->stage != DAV_TX_STAGE_NONE) + tx_inprogress = 1; + DAV_API_START(); + if (!tx_inprogress) { + rc = lw_tx_begin(pop); + if (rc) { + D_ERROR("Failed to start local tx. %d\n", rc); + return; + } + } + palloc_cancel(pop->do_heap, actv, actvcnt); + + if (!tx_inprogress) + lw_tx_end(pop, NULL); + DAV_API_END(); } diff --git a/src/common/dav_v2/dav_internal.h b/src/common/dav_v2/dav_internal.h index bc13e2eabc3..974da2a1fb0 100644 --- a/src/common/dav_v2/dav_internal.h +++ b/src/common/dav_v2/dav_internal.h @@ -58,6 +58,7 @@ typedef struct dav_obj { static inline struct dav_tx *utx2wtx(struct umem_wal_tx *utx) { + D_ASSERT(utx != NULL); return (struct dav_tx *)&utx->utx_private; } diff --git a/src/common/dav_v2/tx.c b/src/common/dav_v2/tx.c index ae7edde31d1..c6103eb20cb 100644 --- a/src/common/dav_v2/tx.c +++ b/src/common/dav_v2/tx.c @@ -1733,6 +1733,8 @@ dav_reserve_v2(dav_obj_t *pop, struct dav_action *act, size_t size, uint64_t typ if (palloc_reserve(pop->do_heap, size, constructor_alloc, &carg, type_num, 0, CLASS_ID_FROM_FLAG(flags), EZONE_ID_FROM_FLAG(flags), act) != 0) { + if (!tx_inprogress) + lw_tx_end(pop, NULL); DAV_API_END(); return 0; } @@ -1786,9 +1788,26 @@ dav_publish(dav_obj_t *pop, struct dav_action *actv, size_t actvcnt) DAV_FUNC_EXPORT void dav_cancel_v2(dav_obj_t *pop, struct dav_action *actv, size_t actvcnt) { + int rc, tx_inprogress = 0; + DAV_DBG("actvcnt=%zu", actvcnt); + if (get_tx()->stage != DAV_TX_STAGE_NONE) + tx_inprogress = 1; + DAV_API_START(); + if (!tx_inprogress) { + rc = lw_tx_begin(pop); + if (rc) { + D_ERROR("Failed to start local tx. %d\n", rc); + return; + } + } + palloc_cancel(pop->do_heap, actv, actvcnt); + + if (!tx_inprogress) + lw_tx_end(pop, NULL); + DAV_API_END(); } diff --git a/src/common/tests/umem_test_bmem.c b/src/common/tests/umem_test_bmem.c index 0b555e67c08..a62fe3814a4 100644 --- a/src/common/tests/umem_test_bmem.c +++ b/src/common/tests/umem_test_bmem.c @@ -1495,7 +1495,8 @@ test_tx_reserve_publish_cancel(void **state) assert_int_equal(memcmp(rsrv_ptr1, local_buf, 980), 0); assert_int_equal(memcmp(rsrv_ptr2, local_buf, 128), 0); umem_cancel(umm, rsrvd_act); - validate_persist_activity(1, 0); + /* umem_cacnel() internally started tx, which increased one additional resrv_cnt */ + validate_persist_activity(2, 0); utest_get_scm_used_space(arg->ta_utx, &cur_mem_used); assert_true(cur_mem_used >= initial_mem_used); umoff = umem_atomic_alloc(umm, 980, UMEM_TYPE_ANY); @@ -1626,7 +1627,8 @@ test_tx_bucket_reserve_publish_cancel(void **state) assert_int_equal(memcmp(rsrv_ptr1, local_buf, 980), 0); assert_int_equal(memcmp(rsrv_ptr2, local_buf, 128), 0); umem_cancel(umm, rsrvd_act); - validate_persist_activity(1, 0); + /* umem_cacnel() internally started tx, which increased one additional resrv_cnt */ + validate_persist_activity(2, 0); utest_get_scm_used_space(arg->ta_utx, &cur_mem_used); assert_true(cur_mem_used >= initial_mem_used); umoff = umem_atomic_alloc_from_bucket(umm, 980, UMEM_TYPE_ANY, UMEM_DEFAULT_MBKT_ID); From f5686ae9f6533f9f260166177c992be8368f56cc Mon Sep 17 00:00:00 2001 From: Liang Zhen Date: Fri, 26 Dec 2025 11:22:57 +0800 Subject: [PATCH 105/253] DAOS-18376 container: Fix inconsistent ds_cont_child state (#17314) Fix inconsistent ds_cont_child state when ds_cont_csummer_init() yields in md-on-ssd mode: In md-on-ssd mode, ds_cont_csummer_init() persists properties to VOS and calls umem_tx_commit(). Because umem_tx_commit() may yield, ds_cont_child can be left in an inconsistent state: - sc_props_fetched remains false - sc_csummer is already set A concurrent ULT may observe this state and hit an assertion. Signed-off-by: Liang Zhen --- src/container/srv_target.c | 86 +++++++++++++++++----------- src/include/daos_srv/container.h | 6 +- src/include/daos_srv/vos.h | 2 +- src/pool/srv_pool_scrub_ult.c | 2 +- src/vos/tests/pool_scrubbing_tests.c | 3 +- src/vos/vos_pool_scrub.c | 2 +- 6 files changed, 60 insertions(+), 41 deletions(-) diff --git a/src/container/srv_target.c b/src/container/srv_target.c index 906f852a75b..3b8aedfa404 100644 --- a/src/container/srv_target.c +++ b/src/container/srv_target.c @@ -132,23 +132,25 @@ ds_cont_csummer_init(struct ds_cont_child *cont) bool dedup_only = false; D_ASSERT(cont != NULL); - cont_props = &cont->sc_props; + while (cont->sc_csummer_initing) { + ABT_mutex_lock(cont->sc_mutex); + ABT_cond_wait(cont->sc_init_cond, cont->sc_mutex); + ABT_mutex_unlock(cont->sc_mutex); + } - if (cont->sc_props_fetched) + if (cont->sc_csummer_inited) return 0; + D_ASSERT(cont->sc_csummer == NULL); + cont->sc_csummer_initing = 1; /** Get the container csum related properties * Need the pool for the IV namespace */ - D_ASSERT(cont->sc_csummer == NULL); + cont_props = &cont->sc_props; rc = ds_cont_get_props(cont_props, cont->sc_pool_uuid, cont->sc_uuid); if (rc != 0) goto done; - /* Check again since IV fetch yield */ - if (cont->sc_props_fetched) - goto done; - csum_val = cont_props->dcp_csum_type; if (!daos_cont_csum_prop_is_enabled(csum_val)) { dedup_only = true; @@ -178,9 +180,13 @@ ds_cont_csummer_init(struct ds_cont_child *cont) DP_UUID(cont->sc_uuid), DP_RC(rc)); rc = 0; } - cont->sc_props_fetched = 1; - + D_ASSERT(!cont->sc_csummer_inited); /* nobody else can do this except me */ + cont->sc_csummer_inited = 1; done: + if (cont->sc_csummer_initing) { + cont->sc_csummer_initing = 0; + ABT_cond_broadcast(cont->sc_init_cond); + } return rc; } @@ -218,7 +224,7 @@ cont_aggregate_runnable(struct ds_cont_child *cont, struct sched_request *req, DP_CONT(cont->sc_pool->spc_uuid, cont->sc_uuid)); } - if (!cont->sc_props_fetched) + if (!cont->sc_csummer_inited) ds_cont_csummer_init(cont); if (cont->sc_props.dcp_dedup_enabled || @@ -655,6 +661,25 @@ cont_child_obj(struct daos_llink *llink) return container_of(llink, struct ds_cont_child, sc_list); } +static void +cont_child_fini_abt(struct ds_cont_child *cont) +{ + if (cont->sc_dtx_resync_cond) + ABT_cond_free(&cont->sc_dtx_resync_cond); + if (cont->sc_scrub_cond) + ABT_cond_free(&cont->sc_scrub_cond); + if (cont->sc_rebuild_cond) + ABT_cond_free(&cont->sc_rebuild_cond); + if (cont->sc_init_cond) + ABT_cond_free(&cont->sc_init_cond); + if (cont->sc_fini_cond) + ABT_cond_free(&cont->sc_fini_cond); + if (cont->sc_mutex) + ABT_mutex_free(&cont->sc_mutex); + if (cont->sc_open_mutex) + ABT_mutex_free(&cont->sc_open_mutex); +} + static int cont_child_alloc_ref(void *co_uuid, unsigned int ksize, void *po_uuid, struct daos_llink **link) @@ -678,34 +703,39 @@ cont_child_alloc_ref(void *co_uuid, unsigned int ksize, void *po_uuid, rc = ABT_mutex_create(&cont->sc_mutex); if (rc != ABT_SUCCESS) { rc = dss_abterr2der(rc); - goto out_open_mutex; + goto out_abt; } rc = ABT_cond_create(&cont->sc_dtx_resync_cond); if (rc != ABT_SUCCESS) { rc = dss_abterr2der(rc); - goto out_mutex; + goto out_abt; } rc = ABT_cond_create(&cont->sc_scrub_cond); if (rc != ABT_SUCCESS) { rc = dss_abterr2der(rc); - goto out_resync_cond; + goto out_abt; } rc = ABT_cond_create(&cont->sc_rebuild_cond); if (rc != ABT_SUCCESS) { rc = dss_abterr2der(rc); - goto out_scrub_cond; + goto out_abt; + } + rc = ABT_cond_create(&cont->sc_init_cond); + if (rc != ABT_SUCCESS) { + rc = dss_abterr2der(rc); + goto out_abt; } rc = ABT_cond_create(&cont->sc_fini_cond); if (rc != ABT_SUCCESS) { rc = dss_abterr2der(rc); - goto out_rebuild_cond; + goto out_abt; } cont->sc_pool = ds_pool_child_lookup(po_uuid); if (cont->sc_pool == NULL) { rc = -DER_NO_HDL; - goto out_finish_cond; + goto out_abt; } rc = vos_cont_open(cont->sc_pool->spc_hdl, co_uuid, &cont->sc_hdl); @@ -745,18 +775,8 @@ cont_child_alloc_ref(void *co_uuid, unsigned int ksize, void *po_uuid, vos_cont_close(cont->sc_hdl); out_pool: ds_pool_child_put(cont->sc_pool); -out_finish_cond: - ABT_cond_free(&cont->sc_fini_cond); -out_rebuild_cond: - ABT_cond_free(&cont->sc_rebuild_cond); -out_scrub_cond: - ABT_cond_free(&cont->sc_scrub_cond); -out_resync_cond: - ABT_cond_free(&cont->sc_dtx_resync_cond); -out_mutex: - ABT_mutex_free(&cont->sc_mutex); -out_open_mutex: - ABT_mutex_free(&cont->sc_open_mutex); +out_abt: + cont_child_fini_abt(cont); out: D_FREE(cont); return rc; @@ -777,14 +797,10 @@ cont_child_free_ref(struct daos_llink *llink) cont_tgt_track_eph_fini(cont); vos_cont_close(cont->sc_hdl); ds_pool_child_put(cont->sc_pool); - daos_csummer_destroy(&cont->sc_csummer); + if (cont->sc_csummer) + daos_csummer_destroy(&cont->sc_csummer); D_FREE(cont->sc_snapshots); - ABT_cond_free(&cont->sc_dtx_resync_cond); - ABT_cond_free(&cont->sc_scrub_cond); - ABT_cond_free(&cont->sc_rebuild_cond); - ABT_cond_free(&cont->sc_fini_cond); - ABT_mutex_free(&cont->sc_mutex); - ABT_mutex_free(&cont->sc_open_mutex); + cont_child_fini_abt(cont); D_FREE(cont); } diff --git a/src/include/daos_srv/container.h b/src/include/daos_srv/container.h index 3651927c29e..510ffc70ac4 100644 --- a/src/include/daos_srv/container.h +++ b/src/include/daos_srv/container.h @@ -76,9 +76,11 @@ struct ds_cont_child { ABT_cond sc_scrub_cond; ABT_cond sc_rebuild_cond; ABT_cond sc_fini_cond; + ABT_cond sc_init_cond; uint32_t sc_dtx_resyncing : 1, sc_dtx_reindex : 1, sc_dtx_reindex_abort : 1, - sc_dtx_delay_reset : 1, sc_dtx_registered : 1, sc_props_fetched : 1, sc_stopping : 1, - sc_destroying : 1, sc_vos_agg_active : 1, sc_ec_agg_active : 1, + sc_dtx_delay_reset : 1, sc_dtx_registered : 1, sc_csummer_inited : 1, + sc_csummer_initing : 1, sc_stopping : 1, sc_destroying : 1, sc_vos_agg_active : 1, + sc_ec_agg_active : 1, /* flag of CONT_CAPA_READ_DATA/_WRITE_DATA disabled */ sc_rw_disabled : 1, sc_scrubbing : 1, sc_rebuilding : 1, /* flag of sc_ec_agg_eph_boundary valid */ diff --git a/src/include/daos_srv/vos.h b/src/include/daos_srv/vos.h index 75bbd598bc0..730c2e88742 100644 --- a/src/include/daos_srv/vos.h +++ b/src/include/daos_srv/vos.h @@ -1590,7 +1590,7 @@ struct cont_scrub { void *scs_cont_src; daos_handle_t scs_cont_hdl; uuid_t scs_cont_uuid; - bool scs_props_fetched; + bool scs_csummer_inited; }; /* diff --git a/src/pool/srv_pool_scrub_ult.c b/src/pool/srv_pool_scrub_ult.c index 437b860ead7..fef7efa25e0 100644 --- a/src/pool/srv_pool_scrub_ult.c +++ b/src/pool/srv_pool_scrub_ult.c @@ -82,7 +82,7 @@ cont_lookup_cb(uuid_t pool_uuid, uuid_t cont_uuid, void *arg, cont->scs_cont_hdl = cont_child->sc_hdl; uuid_copy(cont->scs_cont_uuid, cont_uuid); cont->scs_cont_src = cont_child; - cont->scs_props_fetched = cont_child->sc_props_fetched; + cont->scs_csummer_inited = cont_child->sc_csummer_inited; ABT_mutex_lock(cont_child->sc_mutex); cont_child->sc_scrubbing = 1; diff --git a/src/vos/tests/pool_scrubbing_tests.c b/src/vos/tests/pool_scrubbing_tests.c index a7111045b73..066742a76fa 100644 --- a/src/vos/tests/pool_scrubbing_tests.c +++ b/src/vos/tests/pool_scrubbing_tests.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2020-2022 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -497,7 +498,7 @@ sts_ctx_setup_scrub_ctx(struct sts_context *ctx) ctx->tsc_scrub_ctx.sc_drain_pool_tgt_fn = fake_target_drain; ctx->tsc_scrub_ctx.sc_pool = &ctx->tsc_pool; ctx->tsc_scrub_ctx.sc_dmi = &ctx->tsc_dmi; - ctx->tsc_scrub_ctx.sc_cont.scs_props_fetched = true; + ctx->tsc_scrub_ctx.sc_cont.scs_csummer_inited = true; } static void diff --git a/src/vos/vos_pool_scrub.c b/src/vos/vos_pool_scrub.c index a6cb3ffb510..bd7b4091a4f 100644 --- a/src/vos/vos_pool_scrub.c +++ b/src/vos/vos_pool_scrub.c @@ -878,7 +878,7 @@ cont_iter_is_loaded_cb(daos_handle_t ih, vos_iter_entry_t *entry, * initialized if csums are enabled */ if (!args->args_found_unloaded_container) - args->args_found_unloaded_container = !args->args_ctx->sc_cont.scs_props_fetched; + args->args_found_unloaded_container = !args->args_ctx->sc_cont.scs_csummer_inited; sc_cont_teardown(ctx); return 0; From 5d69d2a63ca27f1d7ab6e047480be5a134c6cd0a Mon Sep 17 00:00:00 2001 From: Liu Xuezhao Date: Tue, 30 Dec 2025 21:35:03 +0800 Subject: [PATCH 106/253] DAOS-17861 cart: fix err handling in corpc (#17299) * DAOS-17861 cart: fix err handling in corpc 1. Some fail cases already set the rc by crt_corpc_fail_parent_rpc()/ crt_corpc_fail_child_rpc(), or called crt_corpc_complete() that will reply parent already, so need to reset rc to 0 to avoid call crt_hg_reply_error_send() again or drop refcount. 2. Fix a refcount leak in a case when need not call local RPC handler in middle node. Signed-off-by: Xuezhao Liu --- src/cart/crt_corpc.c | 41 +++++++++++++++++++++++++++-------------- src/cart/crt_hg.c | 1 + 2 files changed, 28 insertions(+), 14 deletions(-) diff --git a/src/cart/crt_corpc.c b/src/cart/crt_corpc.c index abced7310c3..422a44618b9 100644 --- a/src/cart/crt_corpc.c +++ b/src/cart/crt_corpc.c @@ -784,8 +784,13 @@ crt_corpc_req_hdlr(struct crt_rpc_priv *rpc_priv) opc_info = rpc_priv->crp_opc_info; co_ops = opc_info->coi_co_ops; - if (rpc_priv->crp_fail_hlc) - D_GOTO(forward_done, rc = -DER_HLC_SYNC); + if (rpc_priv->crp_fail_hlc) { + rc = -DER_HLC_SYNC; + RPC_ERROR(rpc_priv, "crp_fail_hlc (group %s) failed: " DF_RC "\n", + co_info->co_grp_priv->gp_pub.cg_grpid, DP_RC(rc)); + crt_corpc_fail_parent_rpc(rpc_priv, rc); + D_GOTO(forward_done, rc); + } /* Invoke pre-forward callback first if it is registered */ if (co_ops && co_ops->co_pre_forward) { @@ -899,20 +904,28 @@ crt_corpc_req_hdlr(struct crt_rpc_priv *rpc_priv) } forward_done: - if (rc != 0 && rpc_priv->crp_flags & CRT_RPC_FLAG_CO_FAILOUT) - co_failout = true; + if (rc != 0) { + /* reset rc to 0 as it already failed the parent/child RPC and + * will be replied/completed by crt_corpc_complete(). + */ + rc = 0; + if (rpc_priv->crp_flags & CRT_RPC_FLAG_CO_FAILOUT) + co_failout = true; + } - /* NOOP bcast (no child and root excluded) */ - if (co_info->co_child_num == 0 && (co_info->co_root_excluded || co_failout)) - crt_corpc_complete(rpc_priv); + /* need not call local RPC handler */ + if (co_info->co_root_excluded || co_failout) { + /* NOOP bcast (no child and root excluded) */ + if (co_info->co_child_num == 0) + crt_corpc_complete(rpc_priv); - if (co_info->co_root_excluded == 1 || co_failout) { - if (co_info->co_grp_priv->gp_self == co_info->co_root) { - /* don't return error for root to avoid RPC_DECREF in - * fail case in crt_req_send. - */ - rc = 0; - } + /* Corresponding the initial ref 1 in crt_rpc_handler_common() -> + * crt_rpc_priv_init(rpc_priv, crt_ctx, true). + * That ref commonly will be released by crt_rpc_common_hdlr() -> crt_handle_rpc(), + * here as will not call crt_rpc_common_hdlr() so drop it explicitly. + */ + if (rpc_priv->crp_srv) + RPC_DECREF(rpc_priv); D_GOTO(out, rc); } diff --git a/src/cart/crt_hg.c b/src/cart/crt_hg.c index cdd25909ca1..19d97dd0523 100644 --- a/src/cart/crt_hg.c +++ b/src/cart/crt_hg.c @@ -1601,6 +1601,7 @@ crt_hg_reply_send(struct crt_rpc_priv *rpc_priv) D_ASSERT(rpc_priv != NULL); + /* corresponds to decref in crt_hg_reply_send_cb */ RPC_ADDREF(rpc_priv); hg_ret = HG_Respond(rpc_priv->crp_hg_hdl, crt_hg_reply_send_cb, rpc_priv, &rpc_priv->crp_pub.cr_output); From 4029b6801b0f2fe88568befbcb9e73d1613e76a5 Mon Sep 17 00:00:00 2001 From: Niu Yawei Date: Tue, 30 Dec 2025 22:38:58 +0800 Subject: [PATCH 107/253] DAOS-18384 mgmt: skip vos file pre-allocation (#17318) When a VOS pool is removed by ddb rm_pool, skip the VOS file pre-allocation in md-on-ssd mode. Signed-off-by: Niu Yawei --- src/bio/bio_context.c | 38 ++++++++++++++++++++++++++ src/include/daos_srv/bio.h | 14 ++++++++++ src/include/daos_srv/mgmt_tgt_common.h | 3 +- src/mgmt/mgmt_common.c | 23 ++++++++++++++-- src/mgmt/srv_target.c | 2 +- 5 files changed, 75 insertions(+), 5 deletions(-) diff --git a/src/bio/bio_context.c b/src/bio/bio_context.c index 7c41a1abdcd..a404915705c 100644 --- a/src/bio/bio_context.c +++ b/src/bio/bio_context.c @@ -1365,3 +1365,41 @@ bio_mc2ioc(struct bio_meta_context *mc, enum smd_dev_type type) return NULL; } } + +/* + * Check if any blob (WAL, meta or data) is created for a pool target, + * return true if any blob is created, otherwise return false. + */ +bool +bio_pool_tgt_created(uuid_t pool_id, int tgt_id, enum bio_mc_flags flags) +{ + enum smd_dev_type st; + spdk_blob_id blob_id; + int rc; + + /* Always return true for pmem mode */ + if (!bio_nvme_configured(SMD_DEV_TYPE_META)) + return true; + + for (st = SMD_DEV_TYPE_DATA; st < SMD_DEV_TYPE_MAX; st++) { + if (flags & BIO_MC_FL_RDB) { + if (st == SMD_DEV_TYPE_DATA) + continue; + rc = smd_rdb_get_blob(pool_id, tgt_id, st, &blob_id); + } else { + rc = smd_pool_get_blob(pool_id, tgt_id, st, &blob_id); + } + + if (rc == 0) { + return true; + } else if (rc == -DER_NONEXIST) { + continue; + } else if (rc) { + DL_ERROR(rc, "Failed to query pool " DF_UUID " tgt:%d", DP_UUID(pool_id), + tgt_id); + continue; + } + } + + return false; +} diff --git a/src/include/daos_srv/bio.h b/src/include/daos_srv/bio.h index 6a2ddb0240b..3b9ecddadcc 100644 --- a/src/include/daos_srv/bio.h +++ b/src/include/daos_srv/bio.h @@ -1202,4 +1202,18 @@ bool bio_meta_is_empty(struct bio_meta_context *mc); */ int bio_meta_clear_empty(struct bio_meta_context *mc); +/* + * Check if any blob is created for a pool target. This function is supposed to + * be called in md-on-ssd mode. + * + * \param[in] pool_id pool UUID + * \param[in] tgt_id VOS target ID + * \param[in] flags bio_mc_flags + * + * \return true, when any blob is created for the pool target + * false, when no blob is created + */ +bool +bio_pool_tgt_created(uuid_t pool_id, int tgt_id, enum bio_mc_flags flags); + #endif /* __BIO_API_H__ */ diff --git a/src/include/daos_srv/mgmt_tgt_common.h b/src/include/daos_srv/mgmt_tgt_common.h index e3d616f0339..f7c551d39f8 100644 --- a/src/include/daos_srv/mgmt_tgt_common.h +++ b/src/include/daos_srv/mgmt_tgt_common.h @@ -74,11 +74,12 @@ ds_mgmt_tgt_recreate(uuid_t pool_uuid, daos_size_t scm_size, int tgt_nr, daos_si * \param[in] cancel_pending If true, preallocate will abort * \param[in] newborns_path Base path for store vos/rdb files * \param[in] bind_cpu_fn e.g. `dss_bind_to_xstream_cpuset` + * \param[in] skip_bitmap Bitmap for the targets being skipped */ int ds_mgmt_tgt_preallocate_parallel(uuid_t uuid, daos_size_t scm_size, int tgt_nr, bool *cancel_pending, const char *newborns_path, - bind_cpu_fn_t bind_cpu_fn); + bind_cpu_fn_t bind_cpu_fn, uint8_t *skip_bitmap); /** * Sequential recreate vos files. diff --git a/src/mgmt/mgmt_common.c b/src/mgmt/mgmt_common.c index bba5392c107..6cf29102fa9 100644 --- a/src/mgmt/mgmt_common.c +++ b/src/mgmt/mgmt_common.c @@ -91,8 +91,9 @@ ds_mgmt_tgt_recreate(uuid_t pool_uuid, daos_size_t scm_size, int tgt_nr, daos_si char *rdb_path = NULL; bool dummy_cancel_state = false; int rc; - int fd; + int fd, tgt_id; struct stat statbuf; + uint8_t *skip_bitmap = NULL; D_ASSERT(bio_nvme_configured(SMD_DEV_TYPE_META)); @@ -132,9 +133,22 @@ ds_mgmt_tgt_recreate(uuid_t pool_uuid, daos_size_t scm_size, int tgt_nr, daos_si goto out; } + D_ASSERT(tgt_nr > 0); + D_ALLOC(skip_bitmap, (tgt_nr + 7) / 8); + if (skip_bitmap == NULL) { + rc = -DER_NOMEM; + D_ERROR("Failed to allocate target bitmap.\n"); + goto out; + } + + for (tgt_id = 0; tgt_id < tgt_nr; tgt_id++) { + if (!bio_pool_tgt_created(pool_uuid, tgt_id, 0)) + setbit(skip_bitmap, tgt_id); + } + /** create VOS files */ rc = ds_mgmt_tgt_preallocate_parallel(pool_uuid, scm_size, tgt_nr, &dummy_cancel_state, - newborns_path, bind_cpu_fn); + newborns_path, bind_cpu_fn, skip_bitmap); if (rc) { D_ERROR(DF_UUID ": failed to create tgt vos files: " DF_RC "\n", DP_UUID(pool_uuid), DP_RC(rc)); @@ -182,6 +196,7 @@ ds_mgmt_tgt_recreate(uuid_t pool_uuid, daos_size_t scm_size, int tgt_nr, daos_si D_FREE(newborns_path); D_FREE(pool_newborns_path); D_FREE(pool_path); + D_FREE(skip_bitmap); return rc; } @@ -307,7 +322,7 @@ ds_mgmt_tgt_preallocate_sequential(uuid_t uuid, daos_size_t scm_size, int tgt_nr int ds_mgmt_tgt_preallocate_parallel(uuid_t uuid, daos_size_t scm_size, int tgt_nr, bool *cancel_pending, const char *newborns_path, - bind_cpu_fn_t bind_cpu_fn) + bind_cpu_fn_t bind_cpu_fn, uint8_t *skip_bitmap) { int i; int rc; @@ -326,6 +341,8 @@ ds_mgmt_tgt_preallocate_parallel(uuid_t uuid, daos_size_t scm_size, int tgt_nr, pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &old_cancelstate); for (i = 0; i < tgt_nr; i++) { + if (skip_bitmap && isset(skip_bitmap, i)) + continue; entry = &thrds_list[i]; uuid_copy(entry->tvt_args.tvpa_uuid, uuid); entry->tvt_args.tvpa_scm_size = scm_size; diff --git a/src/mgmt/srv_target.c b/src/mgmt/srv_target.c index 9f771cddc75..3080d92f430 100644 --- a/src/mgmt/srv_target.c +++ b/src/mgmt/srv_target.c @@ -702,7 +702,7 @@ tgt_create_preallocate(void *arg) rc = ds_mgmt_tgt_preallocate_parallel( tca->tca_ptrec->dptr_uuid, tca->tca_scm_size / dss_tgt_nr, dss_tgt_nr, &tca->tca_ptrec->cancel_create, newborns_path, - dss_bind_to_xstream_cpuset); + dss_bind_to_xstream_cpuset, NULL); } if (rc) goto out; From d9ae7df2bf1115dd5647dbd30ed4603138849000 Mon Sep 17 00:00:00 2001 From: Liu Xuezhao Date: Wed, 31 Dec 2025 13:16:49 +0800 Subject: [PATCH 108/253] DAOS-17843 rebuild: add some logs for rebuild enumerate (#17309) 1. add some DEBUG logs for rebuild enumerate 2. add WARN log if some engines did not report EC agg epoch progress in 600S. 3. Fix a typo for sub_anchors->sa_nr compare Signed-off-by: Xuezhao Liu --- src/container/srv_container.c | 16 ++++++++++++- src/container/srv_internal.h | 1 + src/object/cli_obj.c | 16 ++++++++++++- src/object/obj_enum.c | 6 ++--- src/object/srv_obj.c | 30 +++++++++++++++++++++++- src/object/srv_obj_migrate.c | 43 +++++++++++++++++++++++++++-------- 6 files changed, 96 insertions(+), 16 deletions(-) diff --git a/src/container/srv_container.c b/src/container/srv_container.c index 582c5f97352..578784d3d3c 100644 --- a/src/container/srv_container.c +++ b/src/container/srv_container.c @@ -1734,6 +1734,7 @@ cont_track_eph_leader_alloc(struct cont_svc *cont_svc, uuid_t cont_uuid, eph_ldr->cte_server_ephs[i].re_rank = doms[i].do_comp.co_rank; eph_ldr->cte_server_ephs[i].re_ec_agg_eph = 0; eph_ldr->cte_server_ephs[i].re_stable_eph = 0; + eph_ldr->cte_server_ephs[i].re_ec_agg_eph_update_ts = daos_gettime_coarse(); } d_list_add(&eph_ldr->cte_list, &cont_svc->cs_cont_ephs_leader_list); *leader_p = eph_ldr; @@ -1790,8 +1791,11 @@ ds_cont_leader_update_track_eph(uuid_t pool_uuid, uuid_t cont_uuid, d_rank_t ran for (i = 0; i < eph_ldr->cte_servers_num; i++) { if (eph_ldr->cte_server_ephs[i].re_rank == rank) { - if (eph_ldr->cte_server_ephs[i].re_ec_agg_eph < ec_agg_eph) + if (eph_ldr->cte_server_ephs[i].re_ec_agg_eph < ec_agg_eph) { eph_ldr->cte_server_ephs[i].re_ec_agg_eph = ec_agg_eph; + eph_ldr->cte_server_ephs[i].re_ec_agg_eph_update_ts = + daos_gettime_coarse(); + } if (eph_ldr->cte_server_ephs[i].re_stable_eph < stable_eph) eph_ldr->cte_server_ephs[i].re_stable_eph = stable_eph; break; @@ -2056,6 +2060,7 @@ cont_agg_eph_sync(struct ds_pool *pool, struct cont_svc *svc) uint64_t cur_eph, new_eph; daos_epoch_t min_ec_agg_eph; daos_epoch_t min_stable_eph; + uint64_t cur_ts; int i; int rc = 0; @@ -2090,6 +2095,7 @@ cont_agg_eph_sync(struct ds_pool *pool, struct cont_svc *svc) min_ec_agg_eph = DAOS_EPOCH_MAX; min_stable_eph = DAOS_EPOCH_MAX; + cur_ts = daos_gettime_coarse(); for (i = 0; i < eph_ldr->cte_servers_num; i++) { d_rank_t rank = eph_ldr->cte_server_ephs[i].re_rank; @@ -2099,6 +2105,14 @@ cont_agg_eph_sync(struct ds_pool *pool, struct cont_svc *svc) continue; } + if (pool->sp_reclaim != DAOS_RECLAIM_DISABLED && + cur_ts > eph_ldr->cte_server_ephs[i].re_ec_agg_eph_update_ts + 600) + D_WARN(DF_CONT ": Sluggish EC boundary report from rank %d, " DF_U64 + " Seconds.", + DP_CONT(svc->cs_pool_uuid, eph_ldr->cte_cont_uuid), rank, + cur_ts - + eph_ldr->cte_server_ephs[i].re_ec_agg_eph_update_ts); + if (eph_ldr->cte_server_ephs[i].re_ec_agg_eph < min_ec_agg_eph) min_ec_agg_eph = eph_ldr->cte_server_ephs[i].re_ec_agg_eph; if (eph_ldr->cte_server_ephs[i].re_stable_eph < min_stable_eph) diff --git a/src/container/srv_internal.h b/src/container/srv_internal.h index 469a671ffb0..7e4a6c8a626 100644 --- a/src/container/srv_internal.h +++ b/src/container/srv_internal.h @@ -62,6 +62,7 @@ struct rank_eph { d_rank_t re_rank; daos_epoch_t re_ec_agg_eph; daos_epoch_t re_stable_eph; + uint64_t re_ec_agg_eph_update_ts; /* re_ec_agg_eph update timestamp */ }; /* container EC aggregation epoch and stable epoch control descriptor, which is only on leader */ diff --git a/src/object/cli_obj.c b/src/object/cli_obj.c index 0cc48dcea1f..d2c309a990d 100644 --- a/src/object/cli_obj.c +++ b/src/object/cli_obj.c @@ -4138,8 +4138,10 @@ anchor_update_check_eof(struct obj_auxi_args *obj_auxi, daos_anchor_t *anchor) obj_auxi_shards_iterate(obj_auxi, update_sub_anchor_cb, NULL); sub_anchors = (struct shard_anchors *)anchor->da_sub_anchors; - if (!d_list_empty(&sub_anchors->sa_merged_list)) + if (!d_list_empty(&sub_anchors->sa_merged_list)) { + D_ASSERT(obj_auxi->opc != DAOS_OBJ_RPC_ENUMERATE); return; + } if (sub_anchors_is_eof(sub_anchors)) { daos_obj_list_t *obj_args; @@ -4148,6 +4150,18 @@ anchor_update_check_eof(struct obj_auxi_args *obj_auxi, daos_anchor_t *anchor) obj_args = dc_task_get_args(obj_auxi->obj_task); sub_anchors_free(obj_args, obj_auxi->opc); + } else if (obj_auxi->opc == DAOS_OBJ_RPC_ENUMERATE) { + for (int i = 0; i < sub_anchors->sa_anchors_nr; i++) { + daos_anchor_t *sub_anchor; + + sub_anchor = &sub_anchors->sa_anchors[i].ssa_anchor; + if (!daos_anchor_is_eof(sub_anchor)) { + D_DEBUG(DB_REBUILD, "shard %d sub_anchor %d/%d non EOF", + sub_anchors->sa_anchors[i].ssa_shard, i, + sub_anchors->sa_anchors_nr); + break; + } + } } } diff --git a/src/object/obj_enum.c b/src/object/obj_enum.c index 84669771669..4175d7de907 100644 --- a/src/object/obj_enum.c +++ b/src/object/obj_enum.c @@ -1,5 +1,6 @@ /* * (C) Copyright 2018-2022 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -689,9 +690,8 @@ obj_enum_iterate(daos_key_desc_t *kdss, d_sg_list_t *sgl, int nr, ptr = sgl_indexed_byte(sgl, &sgl_idx); D_ASSERTF(ptr != NULL, "kds and sgl don't line up"); - D_DEBUG(DB_REBUILD, "process %d, type %d, ptr %p, len "DF_U64 - ", total %zd\n", i, kds->kd_val_type, ptr, - kds->kd_key_len, sgl->sg_iovs[0].iov_len); + D_DEBUG(DB_REBUILD, "process %d/%d, type %d, ptr %p, len " DF_U64 ", total %zd\n", + i, nr, kds->kd_val_type, ptr, kds->kd_key_len, sgl->sg_iovs[0].iov_len); if (kds->kd_val_type == 0 || (kds->kd_val_type != type && type != -1)) { sgl_move_forward(sgl, &sgl_idx, kds->kd_key_len); diff --git a/src/object/srv_obj.c b/src/object/srv_obj.c index ef6c5a26830..6ea4bb63ab6 100644 --- a/src/object/srv_obj.c +++ b/src/object/srv_obj.c @@ -3246,6 +3246,27 @@ obj_enum_complete(crt_rpc_t *rpc, int status, int map_version, D_FREE(oeo->oeo_csum_iov.iov_buf); } +static void +dump_enum_anchor(daos_unit_oid_t uoid, daos_anchor_t *anchor, char *str) +{ + int nr = DAOS_ANCHOR_BUF_MAX / 8; + int i; + uint64_t data[nr]; + + D_DEBUG(DB_REBUILD, DF_UOID "%s anchor -", DP_UOID(uoid), str); + D_DEBUG(DB_REBUILD, "type %d, shard %d, flags 0x%x\n", anchor->da_type, anchor->da_shard, + anchor->da_flags); + for (i = 0; i < nr; i++) + data[i] = *(uint64_t *)((char *)anchor->da_buf + i * 8); + if (nr >= 13) + D_DEBUG(DB_REBUILD, + "da_buf " DF_X64 "," DF_X64 "," DF_X64 "," DF_X64 "," DF_X64 "," DF_X64 + "," DF_X64 "," DF_X64 "," DF_X64 "," DF_X64 "," DF_X64 "," DF_X64 + "," DF_X64, + data[0], data[1], data[2], data[3], data[4], data[5], data[6], data[7], + data[8], data[9], data[10], data[11], data[12]); +} + static int obj_local_enum(struct obj_io_context *ioc, crt_rpc_t *rpc, struct vos_iter_anchors *anchors, struct ds_obj_enum_arg *enum_arg, @@ -3314,6 +3335,8 @@ obj_local_enum(struct obj_io_context *ioc, crt_rpc_t *rpc, D_ASSERT(opc == DAOS_OBJ_RPC_ENUMERATE); type = VOS_ITER_DKEY; param.ip_flags |= VOS_IT_RECX_VISIBLE; + dump_enum_anchor(oei->oei_oid, &anchors->ia_dkey, "dkey"); + dump_enum_anchor(oei->oei_oid, &anchors->ia_akey, "akey"); if (daos_anchor_get_flags(&anchors->ia_dkey) & DIOF_WITH_SPEC_EPOCH) { /* For obj verification case. */ @@ -3331,7 +3354,12 @@ obj_local_enum(struct obj_io_context *ioc, crt_rpc_t *rpc, enum_arg->chk_key2big = 1; enum_arg->need_punch = 1; enum_arg->copy_data_cb = vos_iter_copy; - fill_oid(oei->oei_oid, enum_arg); + rc = fill_oid(oei->oei_oid, enum_arg); + if (rc != 0) { + rc = -DER_KEY2BIG; + DL_ERROR(rc, DF_UOID "fill oid failed", DP_UOID(oei->oei_oid)); + goto failed; + } } /* diff --git a/src/object/srv_obj_migrate.c b/src/object/srv_obj_migrate.c index 7c6dfb328ba..6f986e39f02 100644 --- a/src/object/srv_obj_migrate.c +++ b/src/object/srv_obj_migrate.c @@ -2583,9 +2583,18 @@ migrate_enum_unpack_cb(struct dc_obj_enum_unpack_io *io, void *data) if (rc == 1 && (is_ec_data_shard_by_tgt_off(unpack_tgt_off, &arg->oc_attr) || (io->ui_oid.id_layout_ver > 0 && io->ui_oid.id_shard != parity_shard))) { - D_DEBUG(DB_REBUILD, DF_RB ": " DF_UOID " ignore shard " DF_KEY "/%u/%d/%u/%d.\n", - DP_RB_MPT(tls), DP_UOID(io->ui_oid), DP_KEY(&io->ui_dkey), shard, - (int)obj_ec_shard_off(obj, io->ui_dkey_hash, 0), parity_shard, rc); + if (daos_is_dkey_uint64(io->ui_oid.id_pub) && io->ui_dkey.iov_len == 8) + D_DEBUG(DB_REBUILD, + DF_RB ": " DF_UOID " ignore shard, int dkey " DF_U64 + "/%u/%d/%u/%d.\n", + DP_RB_MPT(tls), DP_UOID(io->ui_oid), + *(uint64_t *)io->ui_dkey.iov_buf, shard, + (int)obj_ec_shard_off(obj, io->ui_dkey_hash, 0), parity_shard, rc); + else + D_DEBUG(DB_REBUILD, + DF_RB ": " DF_UOID " ignore shard " DF_KEY "/%u/%d/%u/%d.\n", + DP_RB_MPT(tls), DP_UOID(io->ui_oid), DP_KEY(&io->ui_dkey), shard, + (int)obj_ec_shard_off(obj, io->ui_dkey_hash, 0), parity_shard, rc); D_GOTO(put, rc = 0); } rc = 0; @@ -2601,11 +2610,19 @@ migrate_enum_unpack_cb(struct dc_obj_enum_unpack_io *io, void *data) continue; } - D_DEBUG(DB_REBUILD, - DF_RB ": " DF_UOID " unpack " DF_KEY " for shard " - "%u/%u/%u/" DF_X64 "/%u\n", - DP_RB_MPT(tls), DP_UOID(io->ui_oid), DP_KEY(&io->ui_dkey), shard, - unpack_tgt_off, migrate_tgt_off, io->ui_dkey_hash, parity_shard); + if (daos_is_dkey_uint64(io->ui_oid.id_pub) && io->ui_dkey.iov_len == 8) + D_DEBUG(DB_REBUILD, + DF_RB ": " DF_UOID " unpack int dkey " DF_U64 " for shard " + "%u/%u/%u/" DF_X64 "/%u\n", + DP_RB_MPT(tls), DP_UOID(io->ui_oid), + *(uint64_t *)io->ui_dkey.iov_buf, shard, unpack_tgt_off, + migrate_tgt_off, io->ui_dkey_hash, parity_shard); + else + D_DEBUG(DB_REBUILD, + DF_RB ": " DF_UOID " unpack " DF_KEY " for shard " + "%u/%u/%u/" DF_X64 "/%u\n", + DP_RB_MPT(tls), DP_UOID(io->ui_oid), DP_KEY(&io->ui_dkey), shard, + unpack_tgt_off, migrate_tgt_off, io->ui_dkey_hash, parity_shard); /** * Since we do not need split the rebuild into parity rebuild @@ -2643,8 +2660,14 @@ migrate_enum_unpack_cb(struct dc_obj_enum_unpack_io *io, void *data) if (!create_migrate_one) { struct ds_cont_child *cont = NULL; - D_DEBUG(DB_REBUILD, DF_RB ": " DF_UOID "/" DF_KEY " does not need rebuild.\n", - DP_RB_MPT(tls), DP_UOID(io->ui_oid), DP_KEY(&io->ui_dkey)); + if (daos_is_dkey_uint64(io->ui_oid.id_pub) && io->ui_dkey.iov_len == 8) + D_DEBUG(DB_REBUILD, + DF_RB ": " DF_UOID "/int dkey: " DF_U64 " does not need rebuild.", + DP_RB_MPT(tls), DP_UOID(io->ui_oid), + *(uint64_t *)io->ui_dkey.iov_buf); + else + D_DEBUG(DB_REBUILD, DF_RB ": " DF_UOID "/" DF_KEY " does not need rebuild.", + DP_RB_MPT(tls), DP_UOID(io->ui_oid), DP_KEY(&io->ui_dkey)); /* Create the vos container when no record need to be rebuilt for this shard, * for the case of reintegrate the container was discarded ahead. From e6258eeccd64edd7770fc0c1d12e1eef34137a7f Mon Sep 17 00:00:00 2001 From: Jan Michalski Date: Mon, 5 Jan 2026 16:30:11 +0000 Subject: [PATCH 109/253] DAOS-18378 dlck: introduce option to enable the standard DAOS log (#17331) Signed-off-by: Jan Michalski --- src/utils/dlck/dlck_args.h | 2 ++ src/utils/dlck/dlck_args_common.c | 8 ++++++++ src/utils/dlck/dlck_main.c | 11 +++++++++++ 3 files changed, 21 insertions(+) diff --git a/src/utils/dlck/dlck_args.h b/src/utils/dlck/dlck_args.h index 4ea3a6319c1..4cac4865659 100644 --- a/src/utils/dlck/dlck_args.h +++ b/src/utils/dlck/dlck_args.h @@ -26,6 +26,7 @@ /** all short options */ #define KEY_COMMON_OPTIONS 'o' #define KEY_COMMON_WRITE_MODE 'w' +#define KEY_COMMON_VERBOSE 'v' #define KEY_FILES 'f' /** the options below follow the daos_engine options */ #define KEY_ENGINE_NUMA_NODE 'p' @@ -47,6 +48,7 @@ struct dlck_args_common { struct checker_options options; bool write_mode; /** false by default (dry run) */ + bool verbose; /** false by default */ }; /** diff --git a/src/utils/dlck/dlck_args_common.c b/src/utils/dlck/dlck_args_common.c index bc65af4c15a..d11a56239c6 100644 --- a/src/utils/dlck/dlck_args_common.c +++ b/src/utils/dlck/dlck_args_common.c @@ -24,6 +24,10 @@ static struct argp_option args_common_options[] = { LIST_ENTRY(DLCK_OPT_NON_ZERO_PADDING_STR "=EVENT", "Action to take when non-zero padding or reserved fields are detected. EVENT can be " "either 'error' or 'warning'. It is 'error' by default."), + /** this is expected to be necessary only while solving issues with the tool itself so it seems + to fit better with a different group */ + {"verbose", KEY_COMMON_VERBOSE, 0, 0, + "Print DAOS log messages. All standard environment variables apply.", GROUP_AUTOMAGIC}, {0}}; enum dlck_options_values { DLCK_OPT_NON_ZERO_PADDING }; @@ -38,6 +42,7 @@ args_common_init(struct dlck_args_common *args) memset(args, 0, sizeof(*args)); /** set defaults */ args->write_mode = false; /** dry run */ + args->verbose = false; args->options.cko_non_zero_padding = CHECKER_EVENT_WARNING; } @@ -87,6 +92,9 @@ args_common_parser(int key, char *arg, struct argp_state *state) case KEY_COMMON_WRITE_MODE: args->write_mode = true; break; + case KEY_COMMON_VERBOSE: + args->verbose = true; + break; case KEY_COMMON_OPTIONS: rc = args_common_options_parse(arg, &args->options, state); break; diff --git a/src/utils/dlck/dlck_main.c b/src/utils/dlck/dlck_main.c index b75c7981755..01311155847 100644 --- a/src/utils/dlck/dlck_main.c +++ b/src/utils/dlck/dlck_main.c @@ -37,6 +37,13 @@ main(int argc, char *argv[]) dlck_args_parse(argc, argv, &ctrl); + if (ctrl.common.verbose) { + rc = daos_debug_init_ex(DAOS_LOG_DEFAULT, DLOG_ERR); + if (rc != 0) { + goto err_args_free; + } + } + rc_abt = ABT_init(0, NULL); if (rc_abt != ABT_SUCCESS) { rc = dss_abterr2der(rc_abt); @@ -78,6 +85,10 @@ main(int argc, char *argv[]) err_abt_fini: (void)ABT_finalize(); err_args_free: + if (ctrl.common.verbose) { + daos_debug_fini(); + } + dlck_args_free(&ctrl); (void)d_fault_inject_fini(); From 2421adbdd4b740202f5e93b34066e422ccb62d1f Mon Sep 17 00:00:00 2001 From: Li Wei Date: Tue, 6 Jan 2026 01:31:29 +0900 Subject: [PATCH 110/253] DAOS-18119 container: Fix cont_iv_prop_fetch_ult log messages (#17222) Add DF_CONT to cont_iv_prop_fetch_ult log messages. Signed-off-by: Li Wei --- src/container/container_iv.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/container/container_iv.c b/src/container/container_iv.c index ede0c0c56fc..05a15fb751d 100644 --- a/src/container/container_iv.c +++ b/src/container/container_iv.c @@ -1587,19 +1587,22 @@ cont_iv_prop_fetch_ult(void *data) iv_entry, iv_entry_size, iv_entry_size, false /* retry */); if (rc) { - DL_CDEBUG(rc == -DER_NOTLEADER, DB_ANY, DLOG_ERR, rc, "cont_iv_fetch failed"); + DL_CDEBUG(rc == -DER_NOTLEADER, DB_ANY, DLOG_ERR, rc, + DF_CONT ": cont_iv_fetch failed", DP_CONT(pool->sp_uuid, arg->cont_uuid)); D_GOTO(out, rc); } rc = cont_iv_prop_g2l(&iv_entry->iv_prop, &prop_fetch); if (rc) { - D_ERROR("cont_iv_prop_g2l failed "DF_RC"\n", DP_RC(rc)); + DL_ERROR(rc, DF_CONT ": cont_iv_prop_g2l failed", + DP_CONT(pool->sp_uuid, arg->cont_uuid)); D_GOTO(out, rc); } rc = daos_prop_copy(prop, prop_fetch); if (rc) { - D_ERROR("daos_prop_copy failed "DF_RC"\n", DP_RC(rc)); + DL_ERROR(rc, DF_CONT ": daos_prop_copy failed", + DP_CONT(pool->sp_uuid, arg->cont_uuid)); D_GOTO(out, rc); } From 540df3acfb451814147c0b0f59047f6c60bb7d3f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 5 Jan 2026 13:49:48 -0800 Subject: [PATCH 111/253] DAOS-18407 cq: Bump GHA versions (#17337) Updates `EnricoMi/publish-unit-test-result-action` from 2.21.0 to 2.22.0 Updates `github/codeql-action` from 4.31.8 to 4.31.9 Updates `dorny/test-reporter` from 2.3.0 to 2.5.0 Signed-off-by: dependabot[bot] --- .github/workflows/bullseye-coverage.yml | 4 ++-- .github/workflows/ci2.yml | 2 +- .github/workflows/landing-builds.yml | 2 +- .github/workflows/ossf-scorecard.yml | 2 +- .github/workflows/rpm-build-and-test-report.yml | 4 ++-- .github/workflows/rpm-build-and-test.yml | 4 ++-- .github/workflows/trivy.yml | 2 +- 7 files changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/workflows/bullseye-coverage.yml b/.github/workflows/bullseye-coverage.yml index d34c51b0a5e..52c9b5dea81 100644 --- a/.github/workflows/bullseye-coverage.yml +++ b/.github/workflows/bullseye-coverage.yml @@ -366,7 +366,7 @@ jobs: if: (!cancelled()) && (success() || failure()) && steps.run-test.outcome != 'skipped' # yamllint disable-line rule:line-length - uses: EnricoMi/publish-unit-test-result-action@34d7c956a59aed1bfebf31df77b8de55db9bbaaf # v2.21.0 + uses: EnricoMi/publish-unit-test-result-action@27d65e188ec43221b20d26de30f4892fad91df2f # v2.22.0 with: check_name: ${{ env.STAGE_NAME }} Test Results github_token: ${{ secrets.GITHUB_TOKEN }} @@ -634,7 +634,7 @@ jobs: if: (!cancelled()) && (success() || failure()) && steps.run-test.outcome != 'skipped' # yamllint disable-line rule:line-length - uses: EnricoMi/publish-unit-test-result-action@34d7c956a59aed1bfebf31df77b8de55db9bbaaf # v2.21.0 + uses: EnricoMi/publish-unit-test-result-action@27d65e188ec43221b20d26de30f4892fad91df2f # v2.22.0 with: check_name: ${{ env.STAGE_NAME }} Test Results github_token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/ci2.yml b/.github/workflows/ci2.yml index 1eea2010590..80a3ade7968 100644 --- a/.github/workflows/ci2.yml +++ b/.github/workflows/ci2.yml @@ -68,7 +68,7 @@ jobs: - name: Publish NLT test results if: always() # yamllint disable-line rule:line-length - uses: EnricoMi/publish-unit-test-result-action@34d7c956a59aed1bfebf31df77b8de55db9bbaaf # v2.21.0 + uses: EnricoMi/publish-unit-test-result-action@27d65e188ec43221b20d26de30f4892fad91df2f # v2.22.0 with: github_token: ${{ secrets.GITHUB_TOKEN }} files: nlt-junit.xml diff --git a/.github/workflows/landing-builds.yml b/.github/workflows/landing-builds.yml index 80026a3c8e7..f126cca4465 100644 --- a/.github/workflows/landing-builds.yml +++ b/.github/workflows/landing-builds.yml @@ -144,7 +144,7 @@ jobs: - name: Publish NLT test results if: always() # yamllint disable-line rule:line-length - uses: EnricoMi/publish-unit-test-result-action@34d7c956a59aed1bfebf31df77b8de55db9bbaaf # v2.21.0 + uses: EnricoMi/publish-unit-test-result-action@27d65e188ec43221b20d26de30f4892fad91df2f # v2.22.0 with: github_token: ${{ secrets.GITHUB_TOKEN }} files: nlt-junit.xml diff --git a/.github/workflows/ossf-scorecard.yml b/.github/workflows/ossf-scorecard.yml index 1f28cc096f3..78aad4c6682 100644 --- a/.github/workflows/ossf-scorecard.yml +++ b/.github/workflows/ossf-scorecard.yml @@ -71,6 +71,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard (optional). # Commenting out will disable upload of results to your repo's Code Scanning dashboard - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@1b168cd39490f61582a9beae412bb7057a6b2c4e # v4.31.8 + uses: github/codeql-action/upload-sarif@5d4e8d1aca955e8d8589aabd499c5cae939e33c7 # v4.31.9 with: sarif_file: results.sarif diff --git a/.github/workflows/rpm-build-and-test-report.yml b/.github/workflows/rpm-build-and-test-report.yml index 3fd674e1144..aae911b31c6 100644 --- a/.github/workflows/rpm-build-and-test-report.yml +++ b/.github/workflows/rpm-build-and-test-report.yml @@ -93,7 +93,7 @@ jobs: esac echo "STAGE_NAME=Build RPM on $DISTRO_NAME $DISTRO_VERSION" >> $GITHUB_ENV - name: Test Report - uses: dorny/test-reporter@fe45e9537387dac839af0d33ba56eed8e24189e8 # v2.3.0 + uses: dorny/test-reporter@b082adf0eced0765477756c2a610396589b8c637 # v2.5.0 with: artifact: ${{ env.STAGE_NAME }} test-results name: ${{ env.STAGE_NAME }} Test Results (dorny) @@ -112,7 +112,7 @@ jobs: - name: Set variables run: echo "STAGE_NAME=Functional Hardware ${{ matrix.stage }}" >> $GITHUB_ENV - name: Test Report - uses: dorny/test-reporter@fe45e9537387dac839af0d33ba56eed8e24189e8 # v2.3.0 + uses: dorny/test-reporter@b082adf0eced0765477756c2a610396589b8c637 # v2.5.0 with: artifact: ${{ env.STAGE_NAME }} test-results name: ${{ env.STAGE_NAME }} Test Results (dorny) diff --git a/.github/workflows/rpm-build-and-test.yml b/.github/workflows/rpm-build-and-test.yml index 56c6c61c85a..00eb9b021a8 100644 --- a/.github/workflows/rpm-build-and-test.yml +++ b/.github/workflows/rpm-build-and-test.yml @@ -375,7 +375,7 @@ jobs: if: (!cancelled()) && (success() || failure()) && steps.run-test.outcome != 'skipped' # yamllint disable-line rule:line-length - uses: EnricoMi/publish-unit-test-result-action@34d7c956a59aed1bfebf31df77b8de55db9bbaaf # v2.21.0 + uses: EnricoMi/publish-unit-test-result-action@27d65e188ec43221b20d26de30f4892fad91df2f # v2.22.0 with: check_name: ${{ env.STAGE_NAME }} Test Results github_token: ${{ secrets.GITHUB_TOKEN }} @@ -643,7 +643,7 @@ jobs: if: (!cancelled()) && (success() || failure()) && steps.run-test.outcome != 'skipped' # yamllint disable-line rule:line-length - uses: EnricoMi/publish-unit-test-result-action@34d7c956a59aed1bfebf31df77b8de55db9bbaaf # v2.21.0 + uses: EnricoMi/publish-unit-test-result-action@27d65e188ec43221b20d26de30f4892fad91df2f # v2.22.0 with: check_name: ${{ env.STAGE_NAME }} Test Results github_token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/trivy.yml b/.github/workflows/trivy.yml index df7b7fa0437..a3b1528788f 100644 --- a/.github/workflows/trivy.yml +++ b/.github/workflows/trivy.yml @@ -68,7 +68,7 @@ jobs: trivy-config: 'utils/trivy/trivy.yaml' - name: Upload Trivy scan results to GitHub Security tab - uses: github/codeql-action/upload-sarif@1b168cd39490f61582a9beae412bb7057a6b2c4e # v4.31.8 + uses: github/codeql-action/upload-sarif@5d4e8d1aca955e8d8589aabd499c5cae939e33c7 # v4.31.9 with: sarif_file: 'trivy-results.sarif' From 258970d527ee94919977bcdd8b38e5f7b5053240 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Wed, 7 Jan 2026 09:51:43 +0800 Subject: [PATCH 112/253] DAOS-18368 object: refine EC rotate enumeration minimum_nr and break condition (#17336) - For EC parity rotate, set minimum_nr to data_tgt_nr + 1 instead of all EC targets. This ensures that, even with some shard failures, at least one shard can return 2 keys (KDs). - Make the enumeration break condition stricter: only break if num < minimum_nr, not <=, to avoid premature termination when the buffer is exactly full. Signed-off-by: Xuezhao Liu Signed-off-by: Wang Shilong --- src/object/srv_obj_migrate.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/object/srv_obj_migrate.c b/src/object/srv_obj_migrate.c index 6f986e39f02..d2a95aa3c79 100644 --- a/src/object/srv_obj_migrate.c +++ b/src/object/srv_obj_migrate.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2019-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -2897,9 +2897,11 @@ migrate_one_epoch_object(daos_epoch_range_t *epr, struct migrate_pool_tls *tls, if (daos_oclass_is_ec(&unpack_arg.oc_attr)) { p_csum = NULL; - /* EC rotate needs to fetch from all shards */ + /* EC rotate needs to fetch from all shards, at least with data_tgt_nr alive, + * at least one shard should get 2 KDs. + */ if (obj_ec_parity_rotate_enabled_by_version(arg->oid.id_layout_ver)) - minimum_nr = obj_ec_tgt_nr(&unpack_arg.oc_attr); + minimum_nr = obj_ec_data_tgt_nr(&unpack_arg.oc_attr) + 1; else minimum_nr = 2; enum_flags |= DIOF_RECX_REVERSE; @@ -3031,7 +3033,7 @@ migrate_one_epoch_object(daos_epoch_range_t *epr, struct migrate_pool_tls *tls, } /* Each object enumeration RPC will at least one OID */ - if (num <= minimum_nr && (enum_flags & DIOF_TO_SPEC_GROUP)) { + if (num < minimum_nr && (enum_flags & DIOF_TO_SPEC_GROUP)) { D_DEBUG(DB_REBUILD, DF_RB ": enumeration buffer %u empty" DF_UOID "\n", DP_RB_MPT(tls), num, DP_UOID(arg->oid)); break; From e9c8283e40b39180afaffe48f7943b4af272989e Mon Sep 17 00:00:00 2001 From: Niu Yawei Date: Wed, 7 Jan 2026 15:16:30 +0800 Subject: [PATCH 113/253] DAOS-18420 pool: eliminate unnecessary error messages (#17343) ds_pool_child_lookup() is intented to get a pool_child expected to be in the STARTING or STARTED state. If the caller requires a pool_child without assuming a specific state, ds_pool_child_find() should be used instead. Signed-off-by: Niu Yawei --- src/pool/srv_util.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pool/srv_util.c b/src/pool/srv_util.c index 11e29f45bee..d67afe16050 100644 --- a/src/pool/srv_util.c +++ b/src/pool/srv_util.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2016-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -1512,7 +1512,7 @@ check_pool_targets(uuid_t pool_id, int *tgt_ids, int tgt_cnt, bool reint, int i, nr, rc = 0; /* Get pool map to check the target status */ - pool_child = ds_pool_child_lookup(pool_id); + pool_child = ds_pool_child_find(pool_id); if (pool_child == NULL) { D_ERROR(DF_UUID": Pool child not found\n", DP_UUID(pool_id)); /* From cd97fbb00c52bd19bf81d8bb4509fc3db208f249 Mon Sep 17 00:00:00 2001 From: Nasf-Fan Date: Thu, 8 Jan 2026 01:07:39 +0800 Subject: [PATCH 114/253] DAOS-18355 chk: check leader waits all check engines before exited (#17315) In old implementation, when the PS leader notifies the check leader that related pool has been checked, the check leader will mark such pool as 'done'. If all required pools have been marked as 'done', then the check leader will exit. But at that time, the check engine on related PS leader may not complete yet. There are something to be processed (such as restart pool server) after the checking the pool. The check engine will notify the check leader via CHK IV when exit. But the check leader does not wait such notification. Under such case, if someone tries to trigger new check instance, it will create new IV namespace. That will cause some check engines and the check leader to use different IV namespace, as to the CHK IV logic cannot recognize the leadership correctly. The patch adjust the leader exit logic: the leader scheduler needs to wait all check engines' notification before exit. Signed-off-by: Fan Yong --- src/chk/chk_engine.c | 4 +-- src/chk/chk_internal.h | 10 ++++++- src/chk/chk_leader.c | 60 +++++++++++++++++++-------------------- src/tests/suite/daos_cr.c | 8 +----- 4 files changed, 41 insertions(+), 41 deletions(-) diff --git a/src/chk/chk_engine.c b/src/chk/chk_engine.c index 8f4a56fe50a..85af34498c5 100644 --- a/src/chk/chk_engine.c +++ b/src/chk/chk_engine.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2022-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -1981,7 +1981,7 @@ chk_engine_sched(void *args) D_GOTO(out, rc); } - if (ins_phase > cbk->cb_phase) { + if (ins_phase != CHK_INVAL_PHASE && ins_phase > cbk->cb_phase) { D_INFO(DF_ENGINE" on rank %u moves from phase %u to phase %u\n", DP_ENGINE(ins), myrank, cbk->cb_phase, ins_phase); diff --git a/src/chk/chk_internal.h b/src/chk/chk_internal.h index 798154b2b2a..eb60039cb7d 100644 --- a/src/chk/chk_internal.h +++ b/src/chk/chk_internal.h @@ -1,6 +1,6 @@ /** * (C) Copyright 2022-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -1173,6 +1173,14 @@ chk_pools_find_slowest(struct chk_instance *ins, int *done) phase = cpr->cpr_bk.cb_phase; } + /* All pools have been done, some check engines are still running, leader needs to wait. */ + if (ins->ci_orphan_done && *done > 0 && !d_list_empty(&ins->ci_rank_list)) { + D_ASSERT(ins->ci_is_leader); + + phase = CHK_INVAL_PHASE; + *done = 0; + } + return phase; } diff --git a/src/chk/chk_leader.c b/src/chk/chk_leader.c index 6a4e9fcdae6..a2cf5ca8995 100644 --- a/src/chk/chk_leader.c +++ b/src/chk/chk_leader.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2022-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -226,9 +226,10 @@ static void chk_leader_exit(struct chk_instance *ins, uint32_t ins_phase, uint32_t ins_status, uint32_t pool_status, bool bcast) { - struct chk_bookmark *cbk = &ins->ci_bk; - struct chk_iv iv = { 0 }; - int rc = 0; + struct chk_dead_rank *cdr; + struct chk_bookmark *cbk = &ins->ci_bk; + struct chk_iv iv = {0}; + int rc = 0; ins->ci_sched_exiting = 1; @@ -237,8 +238,7 @@ chk_leader_exit(struct chk_instance *ins, uint32_t ins_phase, uint32_t ins_statu chk_pool_stop_all(ins, pool_status, NULL); if ((bcast && ins_status == CHK__CHECK_INST_STATUS__CIS_FAILED) || - ins_status == CHK__CHECK_INST_STATUS__CIS_IMPLICATED || - unlikely(ins_status == CHK__CHECK_INST_STATUS__CIS_COMPLETED && !ins->ci_orphan_done)) { + ins_status == CHK__CHECK_INST_STATUS__CIS_IMPLICATED || !ins->ci_orphan_done) { iv.ci_gen = cbk->cb_gen; iv.ci_phase = ins_phase != CHK_INVAL_PHASE ? ins_phase : cbk->cb_phase; iv.ci_ins_status = ins_status; @@ -264,6 +264,10 @@ chk_leader_exit(struct chk_instance *ins, uint32_t ins_phase, uint32_t ins_statu DP_LEADER(ins), ins_status, DP_RC(rc)); } + while ((cdr = d_list_pop_entry(&ins->ci_dead_ranks, struct chk_dead_rank, cdr_link)) != + NULL) + D_FREE(cdr); + ins->ci_sched_exiting = 0; } @@ -305,31 +309,24 @@ chk_leader_post_repair(struct chk_instance *ins, struct chk_pool_rec *cpr, DP_UUID(cpr->cpr_uuid), rc); } - /* - * If the operation failed and 'failout' is set, then do nothing here. - * chk_leader_exit will handle all the IV and bookmark related things. - */ - if (*result == 0 || !(ins->ci_prop.cp_flags & CHK__CHECK_FLAG__CF_FAILOUT)) { - if (notify) { - iv.ci_gen = cbk->cb_gen; - uuid_copy(iv.ci_uuid, cpr->cpr_uuid); - iv.ci_ins_status = ins->ci_bk.cb_ins_status; - iv.ci_phase = cbk->cb_phase; - iv.ci_pool_status = cbk->cb_pool_status; - - /* Synchronously notify the engines that check on the pool got failure. */ - rc = chk_iv_update(ins->ci_iv_ns, &iv, CRT_IV_SHORTCUT_NONE, - CRT_IV_SYNC_EAGER, true); - D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, - DF_LEADER" notify the engines that the check for pool " - DF_UUIDF" is done with status %u: rc = %d\n", - DP_LEADER(ins), DP_UUID(cpr->cpr_uuid), iv.ci_pool_status, rc); - if (rc == 0) - cpr->cpr_notified_exit = 1; - } + if (notify) { + uuid_copy(iv.ci_uuid, cpr->cpr_uuid); + iv.ci_gen = cbk->cb_gen; + iv.ci_ins_status = ins->ci_bk.cb_ins_status; + iv.ci_phase = cbk->cb_phase; + iv.ci_pool_status = cbk->cb_pool_status; + + rc = chk_iv_update(ins->ci_iv_ns, &iv, CRT_IV_SHORTCUT_NONE, CRT_IV_SYNC_EAGER, + true); + DL_CDEBUG(rc != 0, DLOG_WARN, DLOG_INFO, rc, + DF_LEADER " notify engines that check pool " DF_UUIDF " done, status %u", + DP_LEADER(ins), DP_UUID(cpr->cpr_uuid), iv.ci_pool_status); + if (rc == 0) + cpr->cpr_notified_exit = 1; + } + if (!(ins->ci_prop.cp_flags & CHK__CHECK_FLAG__CF_FAILOUT)) *result = 0; - } if (update) { rc = chk_bk_update_leader(&ins->ci_bk); @@ -2284,7 +2281,8 @@ chk_leader_sched(void *args) ins_phase = chk_pools_find_slowest(ins, &done); - if (ins_phase >= CHK__CHECK_SCAN_PHASE__CSP_POOL_MBS && !ins->ci_orphan_done && + if (ins_phase != CHK_INVAL_PHASE && + ins_phase >= CHK__CHECK_SCAN_PHASE__CSP_POOL_MBS && !ins->ci_orphan_done && !DAOS_FAIL_CHECK(DAOS_CHK_SYNC_ORPHAN_PROCESS)) { iv.ci_gen = cbk->cb_gen; iv.ci_phase = ins_phase; @@ -2316,7 +2314,7 @@ chk_leader_sched(void *args) D_GOTO(out, rc); } - if (cbk->cb_phase == CHK_INVAL_PHASE || cbk->cb_phase < ins_phase) { + if (ins_phase != CHK_INVAL_PHASE && ins_phase > cbk->cb_phase) { D_INFO(DF_LEADER" moves from phase %u to phase %u\n", DP_LEADER(ins), cbk->cb_phase, ins_phase); diff --git a/src/tests/suite/daos_cr.c b/src/tests/suite/daos_cr.c index cdec5e4233c..e8c1459eee1 100644 --- a/src/tests/suite/daos_cr.c +++ b/src/tests/suite/daos_cr.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2023-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -3427,12 +3427,6 @@ cr_fail_sync_orphan(void **state) rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_CHECKED, 0, NULL, NULL, NULL); assert_rc_equal(rc, 0); - /* Check leader may be completed earlier than check engines in this case, double check. */ - cr_ins_wait(0, NULL, &dci); - - rc = cr_ins_verify(&dci, TCIS_COMPLETED); - assert_rc_equal(rc, 0); - cr_debug_set_params(arg, 0); rc = cr_mode_switch(false); From 4a8d4cc72e4009e9d53177f2786e073be26c400d Mon Sep 17 00:00:00 2001 From: Jan Michalski Date: Thu, 8 Jan 2026 17:52:12 +0000 Subject: [PATCH 115/253] DAOS-17893 vos: check objects before freeing (#17293) DAOS-17893 is a ticket reporting a crash in DAVv2 happened while freeing an allocation which happened because the provided offset was not a beginning of a memory block but the memory block at hand was not a run. The allocator itself could check for this kind of discrepancies and report before the process will be terminated by a SIGFPE signal. But the higher the issue would be caught the more information we could potentially recover from the crash. One such place, notorious to be involved in this kind of incidents is the VOS garbage collector (e.g. DAOS-18049). Possibly not because it is more buggy than any other piece of DAOS rather it is a place where we enumerate large chunks of the VOS metadata in order to free the requested objects and all their descendants. Hence, this PR introduces a few asserts into the GC code so whenever it is possible to validate the offset GC is about to free whether it actually points to an object we expect to live there we assert it actually is as expected and we dump its contents if not for further investigation. Signed-off-by: Jan Michalski --- ci/test_files_to_stash.txt | 2 +- src/gurt/debug.c | 45 ++++++ src/gurt/tests/SConscript | 19 +++ src/gurt/tests/d_log_memory_ut.c | 258 +++++++++++++++++++++++++++++++ src/include/gurt/debug.h | 20 +++ src/vos/ilog.c | 9 ++ src/vos/ilog.h | 10 ++ src/vos/vos_dtx.c | 6 + src/vos/vos_gc.c | 6 + utils/utest.yaml | 1 + 10 files changed, 375 insertions(+), 1 deletion(-) create mode 100644 src/gurt/tests/d_log_memory_ut.c diff --git a/ci/test_files_to_stash.txt b/ci/test_files_to_stash.txt index 88680ff14f6..a36ef0a13c4 100755 --- a/ci/test_files_to_stash.txt +++ b/ci/test_files_to_stash.txt @@ -2,9 +2,9 @@ build/*/*/src/tests/ftest/cart/utest/test_linkage, build/*/*/src/tests/ftest/cart/utest/utest_hlc, build/*/*/src/tests/ftest/cart/utest/utest_protocol, build/*/*/src/tests/ftest/cart/utest/utest_swim, +build/*/*/src/gurt/tests/d_log_memory_ut, build/*/*/src/gurt/tests/test_gurt, build/*/*/src/gurt/tests/test_gurt_telem_producer, -build/*/*/src/gurt/tests/test_gurt_telem_consumer, build/*/*/src/rdb/raft/src/tests_main, build/*/*/src/common/tests/btree_direct, build/*/*/src/common/tests/btree, diff --git a/src/gurt/debug.c b/src/gurt/debug.c index e53ad61cb38..50f765d35bb 100644 --- a/src/gurt/debug.c +++ b/src/gurt/debug.c @@ -622,3 +622,48 @@ int d_register_alt_assert(void (*alt_assert)(const int, const char*, } return -DER_INVAL; } + +#define D_LOG_MEMORY_LINE_LENGTH (10 + 2 + 3 * 16 + 1) /** 0x12340000: 00 01 02... 0f */ + +void +d_log_memory(const uint8_t *ptr, size_t size) +{ + static char buf[D_LOG_MEMORY_LINE_LENGTH] = ""; + size_t i; + char *out = buf; + size_t out_space = D_LOG_MEMORY_LINE_LENGTH; + int rc; + + /** printed immediately in case reading the memory cause a crash */ + D_FATAL("ptr=%p, size=%zu\n", ptr, size); + + if (ptr == NULL || size == 0) { + return; + } + + for (i = 0; i < size; i++) { + /** start a new line */ + if (i % 16 == 0) { + rc = snprintf(out, out_space, "%p: ", &ptr[i]); /** append address */ + D_ASSERTF(rc > 0, "snprintf() failed: %d\n", rc); + out += rc; + out_space -= rc; + } + rc = snprintf(out, out_space, "%02x ", ptr[i]); /** append value */ + D_ASSERTF(rc > 0, "snprintf() failed: %d\n", rc); + out += rc; + out_space -= rc; + + /** print a complete line and reset the output buffer */ + if (i % 16 == 15) { + D_FATAL("%s\n", buf); + out = buf; + out_space = D_LOG_MEMORY_LINE_LENGTH; + } + } + + /** print an incomplete line */ + if (out_space < D_LOG_MEMORY_LINE_LENGTH) { + D_FATAL("%s\n", buf); + } +} diff --git a/src/gurt/tests/SConscript b/src/gurt/tests/SConscript index a773b12812a..54d5d156689 100644 --- a/src/gurt/tests/SConscript +++ b/src/gurt/tests/SConscript @@ -6,6 +6,23 @@ import os + +def build_d_log_memory_ut(utenv): + """Build d_log_memory_ut""" + utenv.AppendUnique(LINKFLAGS=['-Wl,--wrap=d_vlog']) + libs = ['cmocka', 'uuid', 'yaml', 'm', 'pthread'] + srcs = [ + 'd_log_memory_ut.c', + '../debug.c', + '../dlog.c', + '../misc.c', + '../fault_inject.c', + '../hash.c', + '../errno.c', + ] + utenv.d_test_program('d_log_memory_ut', srcs, LIBS=libs) + + TEST_SRC = ['test_gurt.c', 'test_gurt_telem_producer.c'] @@ -35,6 +52,8 @@ def scons(): LIBS=test_env["LIBS"] + ['yaml']) tests.append(testprog) + build_d_log_memory_ut(env.Clone()) + Default(tests) diff --git a/src/gurt/tests/d_log_memory_ut.c b/src/gurt/tests/d_log_memory_ut.c new file mode 100644 index 00000000000..4cf3eead454 --- /dev/null +++ b/src/gurt/tests/d_log_memory_ut.c @@ -0,0 +1,258 @@ +/** + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ +#define D_LOGFAC DD_FAC(tests) + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +/** helper */ +#define _STRINGIFY(x) #x +#define STRINGIFY(x) _STRINGIFY(x) + +#define FULL_LINE_LEN 16 +#define MAP_ADDRESS 0x12340000 +#define LINE_01_ADDRESS 0x12340010 +#define LINE_02_ADDRESS 0x12340020 +#define LINE_00_ADDRESS_STR STRINGIFY(MAP_ADDRESS) +#define LINE_01_ADDRESS_STR STRINGIFY(LINE_01_ADDRESS) +#define LINE_02_ADDRESS_STR STRINGIFY(LINE_02_ADDRESS) + +#define HDR_STR(SIZE_STR) "ptr=" LINE_00_ADDRESS_STR ", size=" SIZE_STR "\n" +#define EXP_LINE_00_01B LINE_00_ADDRESS_STR ": ff \n" +#define EXP_LINE_00_15B LINE_00_ADDRESS_STR ": ff fe fd fc fb fa f9 f8 f7 f6 f5 f4 f3 f2 f1 \n" +#define EXP_LINE_00_FULL LINE_00_ADDRESS_STR ": ff fe fd fc fb fa f9 f8 f7 f6 f5 f4 f3 f2 f1 f0 \n" +#define EXP_LINE_01_01B LINE_01_ADDRESS_STR ": ef \n" +#define EXP_LINE_01_15B LINE_01_ADDRESS_STR ": ef ee ed ec eb ea e9 e8 e7 e6 e5 e4 e3 e2 e1 \n" +#define EXP_LINE_01_FULL LINE_01_ADDRESS_STR ": ef ee ed ec eb ea e9 e8 e7 e6 e5 e4 e3 e2 e1 e0 \n" +#define EXP_LINE_02_01B LINE_02_ADDRESS_STR ": df \n" +#define EXP_LINE_02_FULL LINE_02_ADDRESS_STR ": df de dd dc db da d9 d8 d7 d6 d5 d4 d3 d2 d1 d0 \n" + +static const char Exp_line_00_full[] = EXP_LINE_00_FULL; +static const char Exp_line_01_full[] = EXP_LINE_01_FULL; +static const char Exp_line_02_full[] = EXP_LINE_02_FULL; + +/** mocks */ + +#define BUF_SIZE 1024 + +void +__wrap_d_vlog(int flags, const char *fmt, va_list ap) +{ + static char buf[BUF_SIZE]; + const char *output; + int rc; + + /** generate the output string */ + rc = vsnprintf(buf, BUF_SIZE, fmt, ap); + assert(rc > 0); + + /** skip the "file:line_number func() " bit */ + output = strchr(buf, ' '); + assert_non_null(output); + output += 1; + output = strchr(output, ' '); + assert_non_null(output); + output += 1; + + check_expected(output); +} + +/** setup & teardown */ + +static int +setup(void **state) +{ + void *addr = (void *)MAP_ADDRESS; /** desired address */ + size_t size = 4096; /** one page */ + + void *ptr = mmap(addr, size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); + assert_int_not_equal(ptr, MAP_FAILED); + + uint8_t *mem = ptr; + + /** initialize the bit which is in use - three lines 16 bytes each */ + for (int line = 0; line < 3; ++line) { + for (int _byte = 0; _byte < 16; ++_byte) { + int index = line * 16 + _byte; + mem[index] = 0xff - index; + } + } + + *state = ptr; + + return 0; +} + +static int +teardown(void **state) +{ + void *ptr = *state; + int rc; + + rc = munmap(ptr, 4096); + assert_int_equal(rc, 0); + + return 0; +} + +/** tests */ + +static void +test_ptr_NULL(void **state) +{ + const char hdr[] = "ptr=(nil), size=0\n"; + + expect_string(__wrap_d_vlog, output, hdr); + d_log_memory(NULL, 0); +} + +static void +test_size_0(void **state) +{ + const char hdr[] = HDR_STR("0"); + const uint8_t *mem = *state; + + expect_string(__wrap_d_vlog, output, hdr); + d_log_memory(mem, 0); +} + +static void +test_very_short_line(void **state) +{ + const char hdr[] = HDR_STR("1"); + const char exp[] = EXP_LINE_00_01B; + uint8_t *mem = *state; + + expect_string(__wrap_d_vlog, output, hdr); + expect_string(__wrap_d_vlog, output, exp); + d_log_memory(mem, 1); +} + +static void +test_short_line(void **state) +{ + const char hdr[] = HDR_STR("15"); + const char exp[] = EXP_LINE_00_15B; + uint8_t *mem = *state; + + expect_string(__wrap_d_vlog, output, hdr); + expect_string(__wrap_d_vlog, output, exp); + d_log_memory(mem, FULL_LINE_LEN - 1); +} + +static void +test_full_line(void **state) +{ + const char hdr[] = HDR_STR("16"); + uint8_t *mem = *state; + + expect_string(__wrap_d_vlog, output, hdr); + expect_string(__wrap_d_vlog, output, Exp_line_00_full); + d_log_memory(mem, FULL_LINE_LEN); +} + +static void +test_full_line_plus(void **state) +{ + const char hdr[] = HDR_STR("17"); + const char exp1[] = EXP_LINE_01_01B; + uint8_t *mem = *state; + + expect_string(__wrap_d_vlog, output, hdr); + expect_string(__wrap_d_vlog, output, Exp_line_00_full); + expect_string(__wrap_d_vlog, output, exp1); + d_log_memory(mem, FULL_LINE_LEN + 1); +} + +static void +test_almost_two_lines(void **state) +{ + const char hdr[] = HDR_STR("31"); + const char exp1[] = EXP_LINE_01_15B; + uint8_t *mem = *state; + + expect_string(__wrap_d_vlog, output, hdr); + expect_string(__wrap_d_vlog, output, Exp_line_00_full); + expect_string(__wrap_d_vlog, output, exp1); + d_log_memory(mem, FULL_LINE_LEN * 2 - 1); +} + +static void +test_two_lines(void **state) +{ + const char hdr[] = HDR_STR("32"); + uint8_t *mem = *state; + + expect_string(__wrap_d_vlog, output, hdr); + expect_string(__wrap_d_vlog, output, Exp_line_00_full); + expect_string(__wrap_d_vlog, output, Exp_line_01_full); + d_log_memory(mem, FULL_LINE_LEN * 2); +} + +static void +test_two_lines_plus(void **state) +{ + const char hdr[] = HDR_STR("33"); + const char exp2[] = EXP_LINE_02_01B; + uint8_t *mem = *state; + + expect_string(__wrap_d_vlog, output, hdr); + expect_string(__wrap_d_vlog, output, Exp_line_00_full); + expect_string(__wrap_d_vlog, output, Exp_line_01_full); + expect_string(__wrap_d_vlog, output, exp2); + d_log_memory(mem, FULL_LINE_LEN * 2 + 1); +} + +static void +test_three_lines(void **state) +{ + const char hdr[] = HDR_STR("48"); + uint8_t *mem = *state; + + expect_string(__wrap_d_vlog, output, hdr); + expect_string(__wrap_d_vlog, output, Exp_line_00_full); + expect_string(__wrap_d_vlog, output, Exp_line_01_full); + expect_string(__wrap_d_vlog, output, Exp_line_02_full); + d_log_memory(mem, FULL_LINE_LEN * 3); +} + +static const struct CMUnitTest tests[] = { + {"DUMP001: ptr == NULL", test_ptr_NULL, NULL, NULL}, + {"DUMP002: size == 0", test_size_0, NULL, NULL}, + {"DUMP003: very short line (1 byte)", test_very_short_line, NULL, NULL}, + {"DUMP004: short line (15 bytes)", test_short_line, NULL, NULL}, + {"DUMP005: full line (16 bytes)", test_full_line, NULL, NULL}, + {"DUMP006: full line + 1 (17 bytes)", test_full_line_plus, NULL, NULL}, + {"DUMP007: almost two lines (31 bytes)", test_almost_two_lines, NULL, NULL}, + {"DUMP008: two lines (32 bytes)", test_two_lines, NULL, NULL}, + {"DUMP009: two lines + 1 (33 bytes)", test_two_lines_plus, NULL, NULL}, + {"DUMP010: three lines (48 bytes)", test_three_lines, NULL, NULL}, + {NULL, NULL, NULL, NULL}}; + +int +main(int argc, char **argv) +{ + int rc; + + d_log_init(); + + d_register_alt_assert(mock_assert); + + rc = cmocka_run_group_tests_name("d_log_memory() tests", tests, setup, teardown); + + d_log_fini(); + + return rc; +} diff --git a/src/include/gurt/debug.h b/src/include/gurt/debug.h index df6e9f48e42..38c728085f5 100644 --- a/src/include/gurt/debug.h +++ b/src/include/gurt/debug.h @@ -322,6 +322,15 @@ int d_log_getdbgbit(d_dbug_t *dbgbit, char *bitname); int d_register_alt_assert(void (*alt_assert)(const int, const char*, const char*, const int)); +/** + * \brief D_FATAL the provided memory range in hex. + * + * \param[in] ptr Start of the memory range. + * \param[in] size Size of the memory range. + */ +void +d_log_memory(const uint8_t *ptr, size_t size); + /** * D_PRINT can be used for output to stdout with or without clog being enabled */ @@ -343,6 +352,17 @@ int d_register_alt_assert(void (*alt_assert)(const int, const char*, assert(0); \ } while (0) +#define D_ASSERTF_MEM(cond, ptr, size, fmt, ...) \ + do { \ + if (likely(cond)) \ + break; \ + D_FATAL("Assertion '%s' failed: " fmt, #cond, ##__VA_ARGS__); \ + d_log_memory((uint8_t *)ptr, size); \ + if (d_alt_assert != NULL) \ + d_alt_assert(0, #cond, __FILE__, __LINE__); \ + assert(0); \ + } while (0) + /* Assert cond is true with message to report on failure */ #define D_ASSERTF(cond, fmt, ...) \ do { \ diff --git a/src/vos/ilog.c b/src/vos/ilog.c index 1e3fca5be32..016cf8fa86f 100644 --- a/src/vos/ilog.c +++ b/src/vos/ilog.c @@ -1648,6 +1648,15 @@ ilog_version_get(daos_handle_t loh) return ilog_mag2ver(lctx->ic_root->lr_magic); } +bool +ilog_root_is_valid(struct ilog_df *ilog_df) +{ + struct ilog_root *root = (struct ilog_root *)ilog_df; + D_ASSERT(root != NULL); + + return ILOG_MAGIC_VALID(root->lr_magic); +} + bool ilog_is_valid(struct umem_instance *umm, umem_off_t rec, uint32_t dtx_lid, daos_epoch_t epoch) { diff --git a/src/vos/ilog.h b/src/vos/ilog.h index f8f64cbd455..3fdff8524a5 100644 --- a/src/vos/ilog.h +++ b/src/vos/ilog.h @@ -340,6 +340,16 @@ ilog_is_punch(const struct ilog_entry *entry) entry->ie_id.id_update_minor_eph; } +/** + * Validate ilog's root. + * + * \param[in] ilog_df + * + * \return true if the root is valid. + */ +bool +ilog_root_is_valid(struct ilog_df *ilog_df); + /** Validate the provided ilog. * * Note: It is designed for catastrophic recovery. Not to perform at run-time. diff --git a/src/vos/vos_dtx.c b/src/vos/vos_dtx.c index c64adc12bb0..839ec2ca97f 100644 --- a/src/vos/vos_dtx.c +++ b/src/vos/vos_dtx.c @@ -474,6 +474,9 @@ vos_dtx_table_destroy(struct umem_instance *umm, struct vos_cont_df *cont_df) while (!UMOFF_IS_NULL(cont_df->cd_dtx_committed_head)) { dbd_off = cont_df->cd_dtx_committed_head; dbd = umem_off2ptr(umm, dbd_off); + D_ASSERTF_MEM(dbd->dbd_magic == DTX_CMT_BLOB_MAGIC, dbd, DTX_CMT_BLOB_SIZE, + "dbd_magic = %#x != DTX_CMT_BLOB_MAGIC (%#x)\n", dbd->dbd_magic, + DTX_CMT_BLOB_MAGIC); cont_df->cd_dtx_committed_head = dbd->dbd_next; rc = umem_free(umm, dbd_off); if (rc != 0) @@ -493,6 +496,9 @@ vos_dtx_table_destroy(struct umem_instance *umm, struct vos_cont_df *cont_df) while (!UMOFF_IS_NULL(cont_df->cd_dtx_active_head)) { dbd_off = cont_df->cd_dtx_active_head; dbd = umem_off2ptr(umm, dbd_off); + D_ASSERTF_MEM(dbd->dbd_magic == DTX_ACT_BLOB_MAGIC, dbd, DTX_ACT_BLOB_SIZE, + "dbd_magic = %#x != DTX_ACT_BLOB_MAGIC (%#x)\n", dbd->dbd_magic, + DTX_ACT_BLOB_MAGIC); for (i = 0; i < dbd->dbd_index; i++) { dae_df = &dbd->dbd_active_data[i]; diff --git a/src/vos/vos_gc.c b/src/vos/vos_gc.c index 7726bb05bf1..8427b7fee6e 100644 --- a/src/vos/vos_gc.c +++ b/src/vos/vos_gc.c @@ -167,6 +167,12 @@ gc_drain_key(struct vos_gc *gc, struct vos_pool *pool, daos_handle_t coh, int creds = *credits; int rc; + /** + * Since the key's structure does not have a magic value and the ilog root (which has + * a magic value) is already destroyed at this stage there is no way to verify the pointer + * actually points to a valid data. + */ + if (key->kr_bmap & KREC_BF_NO_AKEY && gc->gc_type == GC_DKEY) { /** Special case, this will defer to the free callback * and the tree will be inserted as akey. diff --git a/utils/utest.yaml b/utils/utest.yaml index 0f2f4e3131a..84ad7513d8f 100644 --- a/utils/utest.yaml +++ b/utils/utest.yaml @@ -52,6 +52,7 @@ - name: gurt base: "BUILD_DIR" tests: + - cmd: ["src/gurt/tests/d_log_memory_ut"] - cmd: ["src/gurt/tests/test_gurt"] - cmd: ["src/gurt/tests/test_gurt_telem_producer"] - name: DTX From a7626519a5f3e654a686bb35e97efc4b71944d0c Mon Sep 17 00:00:00 2001 From: Liang Zhen Date: Fri, 9 Jan 2026 11:59:23 +0800 Subject: [PATCH 116/253] DAOS-18356 object: skip key checksum for value enumeration (#17319) * DAOS-18356 object: skip key checksum for value enumeration During enumeration, key and value checksum verification are performed separately. In the value path (csum_enum_verify_recx/sv), a dummy IOD is passed to daos_csummer_verify_iod(), which causes the ISA-L SHA-256 update (isal_mh_sha256_update()) to fail. Signed-off-by: Liang Zhen --- src/common/checksum.c | 33 +++++++++++++++++ src/include/daos/checksum.h | 16 ++++++++ src/object/cli_shard.c | 74 +++---------------------------------- src/object/srv_enum.c | 44 +++------------------- 4 files changed, 61 insertions(+), 106 deletions(-) diff --git a/src/common/checksum.c b/src/common/checksum.c index c36f14e3c6d..d0f80a3e92b 100644 --- a/src/common/checksum.c +++ b/src/common/checksum.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2019-2023 Intel Corporation. + * (C) Copyright 2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -1051,6 +1052,38 @@ daos_csummer_verify_key(struct daos_csummer *obj, daos_key_t *key, return 0; } +int +daos_csummer_verify_value(struct daos_csummer *csummer, daos_recx_t *recx, daos_size_t rsize, + d_iov_t *val, struct dcs_csum_info *csum_info) +{ + struct dcs_iod_csums iod_csum = {0}; + daos_iod_t iod = {0}; + d_sg_list_t sgl = {0}; + bool skip_key_c = csummer->dcs_skip_key_calc; + bool skip_key_v = csummer->dcs_skip_key_verify; + int rc; + + iod.iod_nr = 1; + iod.iod_size = rsize; + iod.iod_recxs = recx; + iod.iod_type = recx ? DAOS_IOD_ARRAY : DAOS_IOD_SINGLE; + + iod_csum.ic_nr = 1; + iod_csum.ic_data = csum_info; + + sgl.sg_iovs = val; + sgl.sg_nr = 1; + sgl.sg_nr_out = 1; + + csummer->dcs_skip_key_calc = true; + csummer->dcs_skip_key_verify = true; + rc = daos_csummer_verify_iod(csummer, &iod, &sgl, &iod_csum, NULL, 0, NULL); + csummer->dcs_skip_key_calc = skip_key_c; + csummer->dcs_skip_key_verify = skip_key_v; + + return rc; +} + int daos_csummer_alloc_iods_csums_with_packed(struct daos_csummer *csummer, daos_iod_t *iods, int iod_cnt, diff --git a/src/include/daos/checksum.h b/src/include/daos/checksum.h index fe4771f9fbf..2cf68e4ab1f 100644 --- a/src/include/daos/checksum.h +++ b/src/include/daos/checksum.h @@ -1,5 +1,6 @@ /** * (C) Copyright 2019-2023 Intel Corporation. + * (C) Copyright 2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -356,6 +357,21 @@ int daos_csummer_verify_key(struct daos_csummer *obj, daos_key_t *key, struct dcs_csum_info *csum); +/** + * Verify a value to a checksum + * + * @param obj The daos_csummer obj + * @param recx extent for array value (NULL for single value) + * @param rsize element/value size + * @param val The key to verify + * @param csum_info The dcs_csum_info that describes the checksum + * + * @return 0 for success, -DER_CSUM if corruption is detected + */ +int +daos_csummer_verify_value(struct daos_csummer *obj, daos_recx_t *recx, daos_size_t rsize, + d_iov_t *val, struct dcs_csum_info *csum_info); + /** * Calculate the needed memory for all the structures that will * store the checksums for the iods. diff --git a/src/object/cli_shard.c b/src/object/cli_shard.c index ba806bdb823..3708a11ef06 100644 --- a/src/object/cli_shard.c +++ b/src/object/cli_shard.c @@ -1,6 +1,6 @@ /* * (C) Copyright 2016-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -1599,62 +1599,6 @@ struct obj_enum_args { uint32_t *max_delay; }; -/** - * use iod/iod_csum as vehicle to verify data - */ -static int -csum_enum_verify_recx(struct daos_csummer *csummer, struct obj_enum_rec *rec, - d_iov_t *enum_type_val, struct dcs_csum_info *csum_info) -{ - daos_iod_t tmp_iod = {0}; - d_sg_list_t tmp_sgl = {0}; - struct dcs_iod_csums tmp_iod_csum = {0}; - int rc; - - tmp_iod.iod_size = rec->rec_size; - tmp_iod.iod_type = DAOS_IOD_ARRAY; - tmp_iod.iod_recxs = &rec->rec_recx; - tmp_iod.iod_nr = 1; - - tmp_sgl.sg_nr = tmp_sgl.sg_nr_out = 1; - tmp_sgl.sg_iovs = enum_type_val; - - tmp_iod_csum.ic_nr = 1; - tmp_iod_csum.ic_data = csum_info; - - rc = daos_csummer_verify_iod(csummer, &tmp_iod, &tmp_sgl, - &tmp_iod_csum, NULL, 0, NULL); - - return rc; -} - -/** - * use iod/iod_csum as vehicle to verify data - */ -static int -csum_enum_verify_sv(struct daos_csummer *csummer, struct obj_enum_rec *rec, - d_iov_t *enum_type_val, struct dcs_csum_info *csum_info) -{ - daos_iod_t tmp_iod = {0}; - d_sg_list_t tmp_sgl = {0}; - struct dcs_iod_csums tmp_iod_csum = {0}; - int rc; - - tmp_iod.iod_size = rec->rec_size; - tmp_iod.iod_type = DAOS_IOD_SINGLE; - tmp_iod.iod_nr = 1; - - tmp_sgl.sg_nr = tmp_sgl.sg_nr_out = 1; - tmp_sgl.sg_iovs = enum_type_val; - - tmp_iod_csum.ic_nr = 1; - tmp_iod_csum.ic_data = csum_info; - rc = daos_csummer_verify_iod(csummer, &tmp_iod, &tmp_sgl, - &tmp_iod_csum, NULL, 0, NULL); - - return rc; -} - struct csum_enum_args { d_iov_t *csum_iov; struct daos_csummer *csummer; @@ -1664,7 +1608,7 @@ static int verify_csum_cb(daos_key_desc_t *kd, void *buf, unsigned int size, void *arg) { struct dcs_csum_info *ci_to_compare = NULL; - struct csum_enum_args *args = arg; + struct csum_enum_args *args = arg; d_iov_t enum_type_val; int rc; @@ -1672,6 +1616,7 @@ verify_csum_cb(daos_key_desc_t *kd, void *buf, unsigned int size, void *arg) case OBJ_ITER_SINGLE: case OBJ_ITER_RECX: { struct obj_enum_rec *rec; + daos_recx_t *recx; uint64_t rec_data_len; rec = buf; @@ -1691,13 +1636,8 @@ verify_csum_cb(daos_key_desc_t *kd, void *buf, unsigned int size, void *arg) d_iov_set(&enum_type_val, buf, rec_data_len); - if (kd->kd_val_type == OBJ_ITER_RECX) - rc = csum_enum_verify_recx(args->csummer, rec, - &enum_type_val, - ci_to_compare); - else - rc = csum_enum_verify_sv(args->csummer, rec, - &enum_type_val, + recx = (kd->kd_val_type == OBJ_ITER_RECX) ? &rec->rec_recx : NULL; + rc = daos_csummer_verify_value(args->csummer, recx, rec->rec_size, &enum_type_val, ci_to_compare); if (rc != 0) return rc; @@ -1717,9 +1657,7 @@ verify_csum_cb(daos_key_desc_t *kd, void *buf, unsigned int size, void *arg) ci_cast(&ci_to_compare, args->csum_iov); ci_move_next_iov(ci_to_compare, args->csum_iov); - rc = daos_csummer_verify_key(args->csummer, - &enum_type_val, ci_to_compare); - + rc = daos_csummer_verify_key(args->csummer, &enum_type_val, ci_to_compare); if (rc != 0) { D_ERROR("daos_csummer_verify_key error for %s: %d\n", kd->kd_val_type == OBJ_ITER_AKEY ? "AKEY" : "DKEY", rc); diff --git a/src/object/srv_enum.c b/src/object/srv_enum.c index e1513f02f7f..4f8a4eb7531 100644 --- a/src/object/srv_enum.c +++ b/src/object/srv_enum.c @@ -1,5 +1,6 @@ /* * (C) Copyright 2018-2024 Intel Corporation. + * (C) Copyright 2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -320,37 +321,6 @@ entry_is_partial_extent(const vos_iter_entry_t *key_ent) return !recx_eq(&key_ent->ie_orig_recx, &key_ent->ie_recx); } -static int -csummer_verify_recx(struct daos_csummer *csummer, d_iov_t *data_to_verify, - daos_recx_t *recx, daos_size_t rsize, - struct dcs_csum_info *csum_info) -{ - int rc; - struct dcs_iod_csums iod_csum = {0}; - daos_iod_t iod = {0}; - d_sg_list_t sgl = {0}; - - iod.iod_type = DAOS_IOD_ARRAY; - iod.iod_recxs = recx; - iod.iod_nr = 1; - iod.iod_size = rsize; - - sgl.sg_iovs = data_to_verify; - sgl.sg_nr = 1; - sgl.sg_nr_out = 1; - - iod_csum.ic_nr = 1; - iod_csum.ic_data = csum_info; - - rc = daos_csummer_verify_iod(csummer, &iod, &sgl, - &iod_csum, NULL, 0, NULL); - if (rc != 0) - D_ERROR("Corruption found for recx "DF_RECX"\n", - DP_RECX(*recx)); - - return rc; -} - static int csummer_alloc_csum_info(struct daos_csummer *csummer, daos_recx_t *recx, daos_size_t rsize, @@ -467,15 +437,13 @@ csum_copy_inline(int type, vos_iter_entry_t *ent, struct ds_obj_enum_arg *arg, return rc; } - rc = csummer_verify_recx(csummer, - &data_to_verify, - &ent_to_verify.ie_orig_recx, - ent_to_verify.ie_rsize, - &ent_to_verify.ie_csum); - + rc = daos_csummer_verify_value(csummer, &ent_to_verify.ie_orig_recx, + ent_to_verify.ie_rsize, &data_to_verify, + &ent_to_verify.ie_csum); D_FREE(data_to_verify.iov_buf); if (rc != 0) { - D_ERROR("Found corruption!\n"); + D_ERROR("Found corrupted recx " DF_RECX "\n", + DP_RECX(ent_to_verify.ie_orig_recx)); return rc; } From a42f0c5aafbf8804a926a467e2a7c06c735094f3 Mon Sep 17 00:00:00 2001 From: Nasf-Fan Date: Fri, 9 Jan 2026 16:16:10 +0800 Subject: [PATCH 117/253] DAOS-18367 vos: properly evict object for failed transaction (#17325) * DAOS-18367 vos: properly evict object for failed transaction Currently, if a transaction failed for some reason, the cleanup logic will try to evict related vos object from cache to avoid leaving stable information in cache. Such logic works well for the system with PMEM. But under md-on-ssd mode, the eviction may cause trouble. Because one vos modification may hold the same object multiple times, and there is CPU yield during these object hold actions. That creates race windows for other concurrent operations against the same object. This patch changes the logic: when the transaction changes some vos object(s), it will record related oid(s), if such transaction failed in subsequent process, it will only evict these modified objects. The others in cache will not be affected during transaction cleanup. On the other hand, under md-on-ssd mode, CPU may yield during backend TX start, the object that is held by current modification maybe marked as evicted in such race windows. So add logic to check whether related object is evicted or not after backend TX started, if yes, then restart current transaction. Signed-off-by: Fan Yong --- src/dtx/dtx_common.c | 191 +++++---------------------------- src/dtx/tests/dts_structs.c | 9 +- src/include/daos/lru.h | 11 +- src/include/daos_srv/dtx_srv.h | 35 +++--- src/vos/tests/vts_dtx.c | 41 ++----- src/vos/vos_common.c | 32 ++++-- src/vos/vos_dtx.c | 137 +++++++++++++---------- src/vos/vos_internal.h | 29 ++--- src/vos/vos_io.c | 46 +++----- src/vos/vos_obj.c | 34 ++++-- src/vos/vos_obj.h | 7 ++ 11 files changed, 216 insertions(+), 356 deletions(-) diff --git a/src/dtx/dtx_common.c b/src/dtx/dtx_common.c index 175f440dc4e..ffb17279d34 100644 --- a/src/dtx/dtx_common.c +++ b/src/dtx/dtx_common.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2019-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -883,15 +883,10 @@ dtx_handle_reinit(struct dtx_handle *dth) dth->dth_modify_shared = 0; dth->dth_active = 0; - dth->dth_touched_leader_oid = 0; dth->dth_local_tx_started = 0; dth->dth_cos_done = 0; - - dth->dth_op_seq = 0; - dth->dth_oid_cnt = 0; - dth->dth_oid_cap = 0; - D_FREE(dth->dth_oid_array); - dth->dth_dkey_hash = 0; + dth->dth_op_seq = 0; + dth->dth_dkey_hash = 0; vos_dtx_rsrvd_fini(dth); return vos_dtx_rsrvd_init(dth); @@ -926,32 +921,29 @@ dtx_handle_init(struct dtx_id *dti, daos_handle_t xoh, struct dtx_epoch *epoch, dth->dth_coh = xoh; } - dth->dth_ver = pm_ver; - dth->dth_refs = 1; - dth->dth_mbs = mbs; - - dth->dth_pinned = 0; - dth->dth_cos_done = 0; - dth->dth_modify_shared = 0; - dth->dth_active = 0; - dth->dth_touched_leader_oid = 0; - dth->dth_local_tx_started = 0; - dth->dth_solo = (flags & DTX_SOLO) ? 1 : 0; - dth->dth_drop_cmt = (flags & DTX_DROP_CMT) ? 1 : 0; - dth->dth_dist = (flags & DTX_DIST) ? 1 : 0; - dth->dth_for_migration = (flags & DTX_FOR_MIGRATION) ? 1 : 0; + dth->dth_ver = pm_ver; + dth->dth_refs = 1; + dth->dth_mbs = mbs; + dth->dth_pinned = 0; + dth->dth_cos_done = 0; + dth->dth_modify_shared = 0; + dth->dth_active = 0; + dth->dth_local_tx_started = 0; + dth->dth_solo = (flags & DTX_SOLO) ? 1 : 0; + dth->dth_drop_cmt = (flags & DTX_DROP_CMT) ? 1 : 0; + dth->dth_dist = (flags & DTX_DIST) ? 1 : 0; + dth->dth_for_migration = (flags & DTX_FOR_MIGRATION) ? 1 : 0; dth->dth_ignore_uncommitted = (flags & DTX_IGNORE_UNCOMMITTED) ? 1 : 0; - dth->dth_prepared = (flags & DTX_PREPARED) ? 1 : 0; - dth->dth_epoch_owner = (flags & DTX_EPOCH_OWNER) ? 1 : 0; - dth->dth_aborted = 0; - dth->dth_already = 0; - dth->dth_need_validation = 0; + dth->dth_prepared = (flags & DTX_PREPARED) ? 1 : 0; + dth->dth_epoch_owner = (flags & DTX_EPOCH_OWNER) ? 1 : 0; + dth->dth_aborted = 0; + dth->dth_already = 0; + dth->dth_need_validation = 0; dth->dth_local = (flags & DTX_LOCAL) ? 1 : 0; - - dth->dth_dti_cos = dti_cos; - dth->dth_dti_cos_count = dti_cos_cnt; - dth->dth_ent = NULL; - dth->dth_flags = leader ? DTE_LEADER : 0; + dth->dth_dti_cos = dti_cos; + dth->dth_dti_cos_count = dti_cos_cnt; + dth->dth_ent = NULL; + dth->dth_flags = leader ? DTE_LEADER : 0; if (flags & DTX_SYNC) { dth->dth_flags |= DTE_BLOCK; @@ -960,12 +952,11 @@ dtx_handle_init(struct dtx_id *dti, daos_handle_t xoh, struct dtx_epoch *epoch, dth->dth_sync = 0; } - dth->dth_op_seq = 0; - dth->dth_oid_cnt = 0; - dth->dth_oid_cap = 0; - dth->dth_oid_array = NULL; - - dth->dth_dkey_hash = 0; + dth->dth_op_seq = 0; + dth->dth_local_oid_cnt = 0; + dth->dth_local_oid_cap = 0; + dth->dth_local_oid_array = NULL; + dth->dth_dkey_hash = 0; if (!(flags & DTX_LOCAL)) { if (daos_is_zero_dti(dti)) @@ -1001,83 +992,6 @@ dtx_handle_init(struct dtx_id *dti, daos_handle_t xoh, struct dtx_epoch *epoch, return rc; } -static int -dtx_insert_oid(struct dtx_handle *dth, daos_unit_oid_t *oid, bool touch_leader) -{ - int start = 0; - int end = dth->dth_oid_cnt - 1; - int at; - int rc = 0; - - do { - at = (start + end) / 2; - rc = daos_unit_oid_compare(dth->dth_oid_array[at], *oid); - if (rc == 0) - return 0; - - if (rc > 0) - end = at - 1; - else - start = at + 1; - } while (start <= end); - - if (dth->dth_oid_cnt == dth->dth_oid_cap) { - daos_unit_oid_t *oid_array; - - D_ALLOC_ARRAY(oid_array, dth->dth_oid_cap << 1); - if (oid_array == NULL) - return -DER_NOMEM; - - if (rc > 0) { - /* Insert before dth->dth_oid_array[at]. */ - if (at > 0) - memcpy(&oid_array[0], &dth->dth_oid_array[0], - sizeof(*oid) * at); - oid_array[at] = *oid; - memcpy(&oid_array[at + 1], &dth->dth_oid_array[at], - sizeof(*oid) * (dth->dth_oid_cnt - at)); - } else { - /* Insert after dth->dth_oid_array[at]. */ - memcpy(&oid_array[0], &dth->dth_oid_array[0], - sizeof(*oid) * (at + 1)); - oid_array[at + 1] = *oid; - if (at < dth->dth_oid_cnt - 1) - memcpy(&oid_array[at + 2], - &dth->dth_oid_array[at + 1], - sizeof(*oid) * (dth->dth_oid_cnt - 1 - at)); - } - - D_FREE(dth->dth_oid_array); - dth->dth_oid_array = oid_array; - dth->dth_oid_cap <<= 1; - - goto out; - } - - if (rc > 0) { - /* Insert before dth->dth_oid_array[at]. */ - memmove(&dth->dth_oid_array[at + 1], - &dth->dth_oid_array[at], - sizeof(*oid) * (dth->dth_oid_cnt - at)); - dth->dth_oid_array[at] = *oid; - } else { - /* Insert after dth->dth_oid_array[at]. */ - if (at < dth->dth_oid_cnt - 1) - memmove(&dth->dth_oid_array[at + 2], - &dth->dth_oid_array[at + 1], - sizeof(*oid) * (dth->dth_oid_cnt - 1 - at)); - dth->dth_oid_array[at + 1] = *oid; - } - -out: - if (touch_leader) - dth->dth_touched_leader_oid = 1; - - dth->dth_oid_cnt++; - - return 0; -} - void dtx_renew_epoch(struct dtx_epoch *epoch, struct dtx_handle *dth) { @@ -1110,51 +1024,6 @@ dtx_sub_init(struct dtx_handle *dth, daos_unit_oid_t *oid, uint64_t dkey_hash) dth->dth_dkey_hash = dkey_hash; dth->dth_op_seq++; - rc = daos_unit_oid_compare(dth->dth_leader_oid, *oid); - if (rc == 0) { - if (dth->dth_oid_array == NULL) - dth->dth_touched_leader_oid = 1; - - if (dth->dth_touched_leader_oid) - goto out; - - rc = dtx_insert_oid(dth, oid, true); - - D_GOTO(out, rc); - } - - if (dth->dth_oid_array == NULL) { - D_ASSERT(dth->dth_oid_cnt == 0); - - /* 4 slots by default to hold rename case. */ - dth->dth_oid_cap = 4; - D_ALLOC_ARRAY(dth->dth_oid_array, dth->dth_oid_cap); - if (dth->dth_oid_array == NULL) - D_GOTO(out, rc = -DER_NOMEM); - - if (!dth->dth_touched_leader_oid) { - dth->dth_oid_array[0] = *oid; - dth->dth_oid_cnt = 1; - - D_GOTO(out, rc = 0); - } - - dth->dth_oid_cnt = 2; - - if (rc > 0) { - dth->dth_oid_array[0] = *oid; - dth->dth_oid_array[1] = dth->dth_leader_oid; - } else { - dth->dth_oid_array[0] = dth->dth_leader_oid; - dth->dth_oid_array[1] = *oid; - } - - D_GOTO(out, rc = 0); - } - - rc = dtx_insert_oid(dth, oid, false); - -out: D_DEBUG(DB_IO, "Sub init DTX "DF_DTI" for object "DF_UOID " dkey %lu, opc seq %d: "DF_RC"\n", DP_DTI(&dth->dth_xid), DP_UOID(*oid), @@ -1493,7 +1362,6 @@ dtx_leader_end(struct dtx_leader_handle *dlh, struct ds_cont_child *cont, int re dth->dth_sync ? "sync" : "async", dth->dth_dti_cos_count, dth->dth_cos_done ? dth->dth_dti_cos_count : 0, DP_RC(result)); - D_FREE(dth->dth_oid_array); D_FREE(dlh); d_tm_dec_gauge(dtx_tls_get()->dt_dtx_leader_total, 1); @@ -1617,7 +1485,6 @@ dtx_end(struct dtx_handle *dth, struct ds_cont_child *cont, int result) vos_dtx_detach(dth); out: - D_FREE(dth->dth_oid_array); D_FREE(dth); return result; diff --git a/src/dtx/tests/dts_structs.c b/src/dtx/tests/dts_structs.c index bddfdf9816c..0e656e33f2c 100644 --- a/src/dtx/tests/dts_structs.c +++ b/src/dtx/tests/dts_structs.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP. + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -62,7 +62,6 @@ struct_dtx_handle(void **state) SET_BITFIELD_1(dummy, dth_drop_cmt); SET_BITFIELD_1(dummy, dth_modify_shared); SET_BITFIELD_1(dummy, dth_active); - SET_BITFIELD_1(dummy, dth_touched_leader_oid); SET_BITFIELD_1(dummy, dth_local_tx_started); SET_BITFIELD_1(dummy, dth_shares_inited); SET_BITFIELD_1(dummy, dth_dist); @@ -75,7 +74,7 @@ struct_dtx_handle(void **state) SET_BITFIELD_1(dummy, dth_local); SET_BITFIELD_1(dummy, dth_epoch_owner); SET_BITFIELD_1(dummy, dth_local_complete); - SET_BITFIELD(dummy, padding1, 12); + SET_BITFIELD(dummy, padding1, 13); SET_FIELD(dummy, dth_dti_cos_count); SET_FIELD(dummy, dth_dti_cos); @@ -87,10 +86,6 @@ struct_dtx_handle(void **state) SET_FIELD(dummy, dth_op_seq); SET_FIELD(dummy, dth_deferred_used_cnt); SET_FIELD(dummy, padding2); - SET_FIELD(dummy, dth_oid_cnt); - SET_FIELD(dummy, dth_oid_cap); - SET_FIELD(dummy, padding3); - SET_FIELD(dummy, dth_oid_array); SET_FIELD(dummy, dth_local_oid_cnt); SET_FIELD(dummy, dth_local_oid_cap); SET_FIELD(dummy, padding4); diff --git a/src/include/daos/lru.h b/src/include/daos/lru.h index de6c5a373b9..6b21d31a6f3 100644 --- a/src/include/daos/lru.h +++ b/src/include/daos/lru.h @@ -1,6 +1,6 @@ /* * (C) Copyright 2016-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -137,6 +137,15 @@ daos_lru_ref_evict(struct daos_lru_cache *lcache, struct daos_llink *llink) d_hash_rec_evict_at(&lcache->dlc_htable, &llink->ll_link); } +/** + * Whether the item is evicted or not. + */ +static inline bool +daos_lru_is_evicted(struct daos_llink *llink) +{ + return llink->ll_evicted != 0; +} + /** * Evict the item from LRU before releasing the refcount on it, wait until * the caller is the last one holds refcount. diff --git a/src/include/daos_srv/dtx_srv.h b/src/include/daos_srv/dtx_srv.h index 6143ed9b350..873d59ef1b2 100644 --- a/src/include/daos_srv/dtx_srv.h +++ b/src/include/daos_srv/dtx_srv.h @@ -1,6 +1,6 @@ /** * (C) Copyright 2019-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -47,6 +47,7 @@ struct dtx_local_oid_record { * the most optimal way (packed). Please make sure that all necessary padding * is explicit so it could be used in the future. */ +/* clang-format off */ struct dtx_handle { union { struct dtx_entry dth_dte; @@ -92,8 +93,6 @@ struct dtx_handle { dth_modify_shared : 1, /* The DTX entry is in active table. */ dth_active : 1, - /* Leader oid is touched. */ - dth_touched_leader_oid : 1, /* Local TX is started. */ dth_local_tx_started : 1, /* The DTX share lists are inited. */ @@ -117,7 +116,7 @@ struct dtx_handle { /* Locally generate the epoch. */ dth_epoch_owner : 1, /* Flag to commit the local transaction */ - dth_local_complete : 1, padding1 : 12; + dth_local_complete : 1, padding1 : 13; /* The count the DTXs in the dth_dti_cos array. */ uint32_t dth_dti_cos_count; @@ -138,25 +137,14 @@ struct dtx_handle { uint16_t dth_deferred_used_cnt; uint16_t padding2; - union { - struct { - /** The count of objects that are modified by this DTX. */ - uint16_t dth_oid_cnt; - /** The total slots in the dth_oid_array. */ - uint16_t dth_oid_cap; - uint32_t padding3; - /** If more than one objects are modified, the IDs are reocrded here. */ - daos_unit_oid_t *dth_oid_array; - }; - struct { - /** The count of objects stored in dth_local_oid_array. */ - uint16_t dth_local_oid_cnt; - /** The total slots in the dth_local_oid_array. */ - uint16_t dth_local_oid_cap; - uint32_t padding4; - /** The record of all objects touched by the local transaction. */ - struct dtx_local_oid_record *dth_local_oid_array; - }; + struct { + /** The count of objects stored in dth_local_oid_array. */ + uint16_t dth_local_oid_cnt; + /** The total slots in the dth_local_oid_array. */ + uint16_t dth_local_oid_cap; + uint32_t padding4; + /** The record of all objects touched by the local transaction. */ + struct dtx_local_oid_record *dth_local_oid_array; }; /* Hash of the dkey to be modified if applicable. Per modification. */ @@ -179,6 +167,7 @@ struct dtx_handle { int dth_share_tbd_count; uint32_t padding5; }; +/* clang-format on */ /* Each sub transaction handle to manage each sub thandle */ struct dtx_sub_status { diff --git a/src/vos/tests/vts_dtx.c b/src/vos/tests/vts_dtx.c index 12cd6d72728..57d80412c96 100644 --- a/src/vos/tests/vts_dtx.c +++ b/src/vos/tests/vts_dtx.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2019-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -51,40 +51,13 @@ vts_dtx_begin(const daos_unit_oid_t *oid, daos_handle_t coh, daos_epoch_t epoch, vts_init_dte(&dth->dth_dte); - dth->dth_coh = coh; - dth->dth_epoch = epoch; - dth->dth_leader_oid = *oid; - - dth->dth_pinned = 0; - dth->dth_sync = 0; - dth->dth_cos_done = 0; - dth->dth_touched_leader_oid = 0; - dth->dth_local_tx_started = 0; - dth->dth_solo = 0; - dth->dth_drop_cmt = 0; - dth->dth_modify_shared = 0; - dth->dth_active = 0; - dth->dth_dist = 0; - dth->dth_for_migration = 0; - dth->dth_ignore_uncommitted = 0; - dth->dth_prepared = 0; - dth->dth_epoch_owner = 0; - dth->dth_aborted = 0; - dth->dth_already = 0; - dth->dth_need_validation = 0; - - dth->dth_dti_cos_count = 0; - dth->dth_dti_cos = NULL; - dth->dth_ent = NULL; - dth->dth_flags = DTE_LEADER; + dth->dth_coh = coh; + dth->dth_epoch = epoch; + dth->dth_leader_oid = *oid; + dth->dth_flags = DTE_LEADER; dth->dth_modification_cnt = 1; - - dth->dth_op_seq = 1; - dth->dth_oid_cnt = 0; - dth->dth_oid_cap = 0; - dth->dth_oid_array = NULL; - - dth->dth_dkey_hash = dkey_hash; + dth->dth_op_seq = 1; + dth->dth_dkey_hash = dkey_hash; D_INIT_LIST_HEAD(&dth->dth_share_cmt_list); D_INIT_LIST_HEAD(&dth->dth_share_abt_list); diff --git a/src/vos/vos_common.c b/src/vos/vos_common.c index a7397a94256..e5fe50dac97 100644 --- a/src/vos/vos_common.c +++ b/src/vos/vos_common.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2016-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * (C) Copyright 2025 Google LLC * * SPDX-License-Identifier: BSD-2-Clause-Patent @@ -215,12 +215,22 @@ vos_tx_publish(struct dtx_handle *dth, bool publish) } int -vos_tx_begin(struct dtx_handle *dth, struct umem_instance *umm, bool is_sysdb) +vos_tx_begin(struct dtx_handle *dth, struct umem_instance *umm, bool is_sysdb, + struct vos_object *obj) { int rc; - if (dth == NULL) - return umem_tx_begin(umm, vos_txd_get(is_sysdb)); + if (dth == NULL) { + /* CPU may yield when umem_tx_begin, related object maybe evicted during that. */ + rc = umem_tx_begin(umm, vos_txd_get(is_sysdb)); + if (rc == 0 && obj != NULL && unlikely(vos_obj_is_evicted(obj))) { + D_DEBUG(DB_IO, "Obj " DF_UOID " is evicted(1), need to restart TX.\n", + DP_UOID(obj->obj_id)); + rc = umem_tx_end(umm, -DER_TX_RESTART); + } + + return rc; + } D_ASSERT(!is_sysdb); /** Note: On successful return, dth tls gets set and will be cleared by the corresponding @@ -235,6 +245,14 @@ vos_tx_begin(struct dtx_handle *dth, struct umem_instance *umm, bool is_sysdb) rc = umem_tx_begin(umm, vos_txd_get(is_sysdb)); if (rc == 0) { + /* CPU may yield when umem_tx_begin, related object maybe evicted during that. */ + if (obj != NULL && unlikely(vos_obj_is_evicted(obj))) { + D_DEBUG(DB_IO, "Obj " DF_UOID " is evicted(2), need to restart TX.\n", + DP_UOID(obj->obj_id)); + + return umem_tx_end(umm, -DER_TX_RESTART); + } + dth->dth_local_tx_started = 1; vos_dth_set(dth, false); } @@ -250,12 +268,6 @@ vos_local_tx_abort(struct dtx_handle *dth) if (dth->dth_local_oid_cnt == 0) return; - /** - * Since a local transaction spawns always a single pool an eaither one of the containers - * can be used to access the pool. - */ - record = &dth->dth_local_oid_array[0]; - /** * Evict all objects touched by the aborted transaction from the object cache to make sure * no invalid pointer stays there. Not all of the touched objects have to be evicted but diff --git a/src/vos/vos_dtx.c b/src/vos/vos_dtx.c index 839ec2ca97f..3391781adfa 100644 --- a/src/vos/vos_dtx.c +++ b/src/vos/vos_dtx.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2019-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * (C) Copyright 2025 Google LLC * * SPDX-License-Identifier: BSD-2-Clause-Patent @@ -151,37 +151,21 @@ dtx_inprogress(struct vos_dtx_act_ent *dae, struct dtx_handle *dth, } static void -dtx_act_ent_cleanup(struct vos_container *cont, struct vos_dtx_act_ent *dae, - struct dtx_handle *dth, bool evict, bool keep_df) +dtx_act_ent_cleanup(struct vos_container *cont, struct vos_dtx_act_ent *dae, bool evict, + bool keep_df) { - if (evict) { - daos_unit_oid_t *oids; - int count; - int i; + if (evict && dae->dae_oids != NULL) { + int i; - if (dth != NULL) { - if (dth->dth_oid_array != NULL) { - D_ASSERT(dth->dth_oid_cnt > 0); - - count = dth->dth_oid_cnt; - oids = dth->dth_oid_array; - } else { - count = 1; - oids = &dth->dth_leader_oid; - } - } else { - count = dae->dae_oid_cnt; - oids = dae->dae_oids; - } - - for (i = 0; i < count; i++) - vos_obj_evict_by_oid(cont, oids[i]); + for (i = 0; i < dae->dae_oid_cnt; i++) + vos_obj_evict_by_oid(cont, dae->dae_oids[i]); } if (dae->dae_oids != NULL && dae->dae_oids != &dae->dae_oid_inline && dae->dae_oids != &DAE_OID(dae)) { D_FREE(dae->dae_oids); dae->dae_oid_cnt = 0; + dae->dae_oid_cap = 0; } DAE_REC_OFF(dae) = UMOFF_NULL; @@ -254,7 +238,7 @@ dtx_act_ent_free(struct btr_instance *tins, struct btr_record *rec, D_ASSERT(dae != NULL); *(struct vos_dtx_act_ent **)args = dae; } else if (dae != NULL) { - dtx_act_ent_cleanup(tins->ti_priv, dae, NULL, true, false); + dtx_act_ent_cleanup(tins->ti_priv, dae, true, false); } return 0; @@ -885,7 +869,7 @@ vos_dtx_commit_one(struct vos_container *cont, struct dtx_id *dti, daos_epoch_t rc = dbtree_delete(cont->vc_dtx_active_hdl, BTR_PROBE_BYPASS, &kiov, &dae); if (rc == 0) { - dtx_act_ent_cleanup(cont, dae, NULL, false, false); + dtx_act_ent_cleanup(cont, dae, false, false); dtx_evict_lid(cont, dae); } @@ -1851,30 +1835,6 @@ vos_dtx_prepared(struct dtx_handle *dth, struct vos_dtx_cmt_ent **dce_p) (dth->dth_modification_cnt > 0)) dth->dth_sync = 1; - if (dth->dth_oid_array != NULL) { - D_ASSERT(dth->dth_oid_cnt > 0); - - dae->dae_oid_cnt = dth->dth_oid_cnt; - if (dth->dth_oid_cnt == 1) { - dae->dae_oid_inline = dth->dth_oid_array[0]; - dae->dae_oids = &dae->dae_oid_inline; - } else { - size = sizeof(daos_unit_oid_t) * dth->dth_oid_cnt; - D_ALLOC_NZ(dae->dae_oids, size); - if (dae->dae_oids == NULL) { - /* Not fatal. */ - D_WARN("No DRAM to store ACT DTX OIDs " - DF_DTI"\n", DP_DTI(&DAE_XID(dae))); - dae->dae_oid_cnt = 0; - } else { - memcpy(dae->dae_oids, dth->dth_oid_array, size); - } - } - } else { - dae->dae_oids = &DAE_OID(dae); - dae->dae_oid_cnt = 1; - } - if (DAE_MBS_DSIZE(dae) <= sizeof(DAE_MBS_INLINE(dae))) { memcpy(DAE_MBS_INLINE(dae), dth->dth_mbs->dm_data, DAE_MBS_DSIZE(dae)); @@ -2447,7 +2407,7 @@ vos_dtx_post_handle(struct vos_container *cont, DAE_FLAGS(daes[i]) |= DTE_PARTIAL_COMMITTED; daes[i]->dae_committing = 0; - dtx_act_ent_cleanup(cont, daes[i], NULL, false, true); + dtx_act_ent_cleanup(cont, daes[i], false, true); continue; } @@ -2473,13 +2433,13 @@ vos_dtx_post_handle(struct vos_container *cont, daes[i]->dae_aborted = 1; daes[i]->dae_aborting = 0; - dtx_act_ent_cleanup(cont, daes[i], NULL, true, false); + dtx_act_ent_cleanup(cont, daes[i], true, false); } else { D_ASSERT(daes[i]->dae_aborting == 0); daes[i]->dae_committed = 1; daes[i]->dae_committing = 0; - dtx_act_ent_cleanup(cont, daes[i], NULL, false, false); + dtx_act_ent_cleanup(cont, daes[i], false, false); } DAE_FLAGS(daes[i]) &= ~(DTE_CORRUPTED | DTE_ORPHAN | DTE_PARTIAL_COMMITTED); } @@ -3665,7 +3625,7 @@ vos_dtx_cleanup_internal(struct dtx_handle *dth) */ if (dae != NULL) { D_ASSERT(!vos_dae_is_prepare(dae)); - dtx_act_ent_cleanup(cont, dae, dth, true, false); + dtx_act_ent_cleanup(cont, dae, true, false); } } else { d_iov_set(&kiov, &dth->dth_xid, sizeof(dth->dth_xid)); @@ -3688,7 +3648,7 @@ vos_dtx_cleanup_internal(struct dtx_handle *dth) if (DAE_EPOCH(dae) != dth->dth_epoch) goto out; - dtx_act_ent_cleanup(cont, dae, dth, true, false); + dtx_act_ent_cleanup(cont, dae, true, false); rc = dbtree_delete(cont->vc_dtx_active_hdl, riov.iov_buf != NULL ? BTR_PROBE_BYPASS : BTR_PROBE_EQ, @@ -4046,7 +4006,7 @@ vos_dtx_local_begin(struct dtx_handle *dth, daos_handle_t poh) goto error; } - rc = vos_tx_begin(dth, umm, pool->vp_sysdb); + rc = vos_tx_begin(dth, umm, pool->vp_sysdb, NULL); if (rc != 0) { D_ERROR("Failed to start transaction: rc=" DF_RC "\n", DP_RC(rc)); goto error; @@ -4173,3 +4133,68 @@ vos_dtx_get_cmt_stat(daos_handle_t coh, uint64_t *cmt_cnt, struct dtx_time_stat out: return rc; } + +int +vos_dtx_record_oid(struct dtx_handle *dth, struct vos_container *cont, daos_unit_oid_t oid) +{ + struct dtx_local_oid_record *oid_array; + struct dtx_local_oid_record *record; + struct vos_dtx_act_ent *dae; + daos_unit_oid_t *oids; + int rc = 0; + + if (dth == NULL) + D_GOTO(out, rc = 0); + + if (dth->dth_local) { + if (dth->dth_local_oid_cnt == dth->dth_local_oid_cap) { + D_REALLOC_ARRAY(oid_array, dth->dth_local_oid_array, dth->dth_local_oid_cap, + dth->dth_local_oid_cap << 1); + if (oid_array == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + dth->dth_local_oid_array = oid_array; + dth->dth_local_oid_cap <<= 1; + } + + record = &dth->dth_local_oid_array[dth->dth_local_oid_cnt]; + record->dor_cont = cont; + vos_cont_addref(cont); + record->dor_oid = oid; + dth->dth_local_oid_cnt++; + + D_GOTO(out, rc = 0); + } + + if (daos_is_zero_dti(&dth->dth_xid)) + D_GOTO(out, rc = 0); + + dae = dth->dth_ent; + D_ASSERT(dae != NULL); + + if (dae->dae_oid_cnt == 0) { + if (daos_unit_oid_compare(oid, DAE_OID(dae)) == 0) + dae->dae_oids = &DAE_OID(dae); + else + dae->dae_oids = &dae->dae_oid_inline; + } else if (dae->dae_oid_cnt >= dae->dae_oid_cap) { + D_ALLOC_ARRAY(oids, dae->dae_oid_cnt << 1); + if (oids == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + memcpy(oids, dae->dae_oids, sizeof(*oids) * dae->dae_oid_cnt); + if (dae->dae_oids != &DAE_OID(dae) && dae->dae_oids != &dae->dae_oid_inline) + D_FREE(dae->dae_oids); + + dae->dae_oids = oids; + dae->dae_oid_cap = dae->dae_oid_cnt << 1; + } + + dae->dae_oids[dae->dae_oid_cnt++] = oid; + +out: + if (rc != 0) + D_ERROR("Failed to record oid " DF_UOID ": " DF_RC "\n", DP_UOID(oid), DP_RC(rc)); + + return rc; +} diff --git a/src/vos/vos_internal.h b/src/vos/vos_internal.h index 18e6438ce6e..428051203ed 100644 --- a/src/vos/vos_internal.h +++ b/src/vos/vos_internal.h @@ -1,6 +1,6 @@ /** * (C) Copyright 2016-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * (C) Copyright 2025 Google LLC * * SPDX-License-Identifier: BSD-2-Clause-Patent @@ -470,10 +470,6 @@ struct vos_dtx_act_ent { * then 'dae_oids' points to the 'dae_oid_inline'. * * Otherwise, 'dae_oids' points to new buffer to hold more. - * - * These information is used for EC aggregation optimization. - * If server restarts, then we will lose the optimization but - * it is not fatal. */ daos_unit_oid_t *dae_oids; /* The time (hlc) when the DTX entry is created. */ @@ -485,6 +481,9 @@ struct vos_dtx_act_ent { /* Back pointer to the DTX handle. */ struct dtx_handle *dae_dth; + /* The capacity of dae_oids if it points to new allocated area. */ + uint32_t dae_oid_cap; + unsigned int dae_committable:1, dae_committing:1, dae_committed:1, @@ -855,6 +854,9 @@ vos_dtx_post_handle(struct vos_container *cont, int vos_dtx_act_reindex(struct vos_container *cont); +int +vos_dtx_record_oid(struct dtx_handle *dth, struct vos_container *cont, daos_unit_oid_t oid); + enum vos_tree_class { /** the first reserved tree class */ VOS_BTR_BEGIN = DBTREE_VOS_BEGIN, @@ -1336,7 +1338,8 @@ vos_evt_desc_cbs_init(struct evt_desc_cbs *cbs, struct vos_pool *pool, daos_handle_t coh, struct vos_object *obj); int -vos_tx_begin(struct dtx_handle *dth, struct umem_instance *umm, bool is_sysdb); +vos_tx_begin(struct dtx_handle *dth, struct umem_instance *umm, bool is_sysdb, + struct vos_object *obj); /** Finish the transaction and publish or cancel the reservations or * return if err == 0 and it's a multi-modification transaction that @@ -1928,20 +1931,6 @@ vos_io_scm(struct vos_pool *pool, daos_iod_type_t type, daos_size_t size, enum v return false; } -/** - * Insert object ID and its parent container into the array of objects touched by the ongoing - * local transaction. - * - * \param[in] dth DTX handle for ongoing local transaction - * \param[in] cont VOS container - * \param[in] oid Object ID - * - * \return 0 : Success. - * -DER_NOMEM : Run out of the volatile memory. - */ -int -vos_insert_oid(struct dtx_handle *dth, struct vos_container *cont, daos_unit_oid_t *oid); - static inline bool vos_pool_is_p2(struct vos_pool *pool) { diff --git a/src/vos/vos_io.c b/src/vos/vos_io.c index cebf9181aaa..4d105b91412 100644 --- a/src/vos/vos_io.c +++ b/src/vos/vos_io.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2018-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -2552,32 +2552,6 @@ update_cancel(struct vos_io_context *ioc) true /* abort */); } -int -vos_insert_oid(struct dtx_handle *dth, struct vos_container *cont, daos_unit_oid_t *oid) -{ - struct dtx_local_oid_record *oid_array = NULL; - struct dtx_local_oid_record *record = NULL; - - /** The array has to grow to accommodate the next record. */ - if (dth->dth_local_oid_cnt == dth->dth_local_oid_cap) { - D_REALLOC_ARRAY(oid_array, dth->dth_local_oid_array, dth->dth_local_oid_cap, - dth->dth_local_oid_cap << 1); - if (oid_array == NULL) - return -DER_NOMEM; - - dth->dth_local_oid_array = oid_array; - dth->dth_local_oid_cap <<= 1; - } - - record = &dth->dth_local_oid_array[dth->dth_local_oid_cnt]; - record->dor_cont = cont; - vos_cont_addref(cont); - record->dor_oid = *oid; - dth->dth_local_oid_cnt++; - - return 0; -} - int vos_update_end(daos_handle_t ioh, uint32_t pm_ver, daos_key_t *dkey, int err, daos_size_t *size, struct dtx_handle *dth) @@ -2598,6 +2572,13 @@ vos_update_end(daos_handle_t ioh, uint32_t pm_ver, daos_key_t *dkey, int err, if (err != 0) goto abort; + if (ioc->ic_pinned_obj != NULL && unlikely(vos_obj_is_evicted(ioc->ic_pinned_obj))) { + D_DEBUG(DB_IO, "Obj " DF_UOID " is evicted during update, need to restart TX.\n", + DP_UOID(ioc->ic_oid)); + + D_GOTO(abort, err = -DER_TX_RESTART); + } + err = vos_ts_set_add(ioc->ic_ts_set, ioc->ic_cont->vc_ts_idx, NULL, 0); D_ASSERT(err == 0); @@ -2606,7 +2587,10 @@ vos_update_end(daos_handle_t ioh, uint32_t pm_ver, daos_key_t *dkey, int err, if (err != 0) goto abort; - err = vos_tx_begin(dth, umem, ioc->ic_cont->vc_pool->vp_sysdb); + if (ioc->ic_pinned_obj != NULL) + D_ASSERT(ioc->ic_pinned_obj == ioc->ic_obj); + + err = vos_tx_begin(dth, umem, ioc->ic_cont->vc_pool->vp_sysdb, ioc->ic_obj); if (err != 0) goto abort; @@ -2663,9 +2647,7 @@ vos_update_end(daos_handle_t ioh, uint32_t pm_ver, daos_key_t *dkey, int err, goto abort; } - if (dtx_is_valid_handle(dth) && dth->dth_local) { - err = vos_insert_oid(dth, ioc->ic_cont, &ioc->ic_oid); - } + err = vos_dtx_record_oid(dth, ioc->ic_cont, ioc->ic_oid); abort: if (err == -DER_NONEXIST || err == -DER_EXIST || @@ -2727,7 +2709,7 @@ vos_update_end(daos_handle_t ioh, uint32_t pm_ver, daos_key_t *dkey, int err, *size = ioc->ic_io_size; D_FREE(daes); D_FREE(dces); - vos_ioc_destroy(ioc, err != 0); + vos_ioc_destroy(ioc, err != 0 && tx_started); return err; } diff --git a/src/vos/vos_obj.c b/src/vos/vos_obj.c index 0015d91d916..117cf8baaab 100644 --- a/src/vos/vos_obj.c +++ b/src/vos/vos_obj.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2016-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -494,7 +494,7 @@ vos_obj_punch(daos_handle_t coh, daos_unit_oid_t oid, daos_epoch_t epoch, if (rc != 0) goto reset; - rc = vos_tx_begin(dth, vos_cont2umm(cont), cont->vc_pool->vp_sysdb); + rc = vos_tx_begin(dth, vos_cont2umm(cont), cont->vc_pool->vp_sysdb, obj); if (rc != 0) goto reset; @@ -572,11 +572,9 @@ vos_obj_punch(daos_handle_t coh, daos_unit_oid_t oid, daos_epoch_t epoch, } if (rc == 0) { - vos_ts_set_wupdate(ts_set, epr.epr_hi); - - if (dtx_is_valid_handle(dth) && dth->dth_local) { - rc = vos_insert_oid(dth, cont, &oid); - } + rc = vos_dtx_record_oid(dth, cont, oid); + if (rc == 0) + vos_ts_set_wupdate(ts_set, epr.epr_hi); } rc = vos_tx_end(cont, dth, NULL, NULL, tx_started, NULL, rc); @@ -592,7 +590,7 @@ vos_obj_punch(daos_handle_t coh, daos_unit_oid_t oid, daos_epoch_t epoch, } if (obj != NULL) - vos_obj_release(obj, 0, rc != 0); + vos_obj_release(obj, 0, rc != 0 && tx_started); D_FREE(daes); D_FREE(dces); @@ -816,7 +814,8 @@ vos_obj_mark_corruption(daos_handle_t coh, daos_epoch_t epoch, uint32_t pm_ver, daos_handle_t toh = DAOS_HDL_INVAL; int rc = 0; int i; - bool dirty = false; + bool dirty = false; + bool tx_started = false; cont = vos_hdl2cont(coh); D_ASSERT(cont != NULL); @@ -842,6 +841,7 @@ vos_obj_mark_corruption(daos_handle_t coh, daos_epoch_t epoch, uint32_t pm_ver, } } +restart: rc = vos_obj_hold(cont, oid, &epr, epoch, VOS_OBJ_VISIBLE | VOS_OBJ_CREATE, DAOS_INTENT_MARK, &obj, NULL); if (rc != 0) @@ -851,6 +851,16 @@ vos_obj_mark_corruption(daos_handle_t coh, daos_epoch_t epoch, uint32_t pm_ver, if (rc != 0) goto log; + if (unlikely(vos_obj_is_evicted(obj))) { + D_DEBUG(DB_IO, "Obj " DF_UOID " is evicted, needs to restart TX.\n", DP_UOID(oid)); + umem_tx_end(umm, -DER_TX_RESTART); + vos_obj_release(obj, 0, false); + obj = NULL; + goto restart; + } + + tx_started = true; + rc = vos_obj_incarnate(obj, &epr, epoch, VOS_OBJ_VISIBLE | VOS_OBJ_CREATE, DAOS_INTENT_MARK, NULL); if (rc != 0) @@ -906,12 +916,14 @@ vos_obj_mark_corruption(daos_handle_t coh, daos_epoch_t epoch, uint32_t pm_ver, ", dkey (empty), akey_nr %u, epoch " DF_X64 ", pm_ver %u", DP_UOID(oid), akey_nr, epoch, pm_ver); + if (rc == -DER_ALREADY) + rc = 0; if (daos_handle_is_valid(toh)) dbtree_close(toh); if (obj != NULL) - vos_obj_release(obj, 0, true); + vos_obj_release(obj, 0, rc != 0 && tx_started); - return rc == -DER_ALREADY ? 0 : rc; + return rc; } static int diff --git a/src/vos/vos_obj.h b/src/vos/vos_obj.h index f572ebb03d9..be67bd27ac6 100644 --- a/src/vos/vos_obj.h +++ b/src/vos/vos_obj.h @@ -1,5 +1,6 @@ /** * (C) Copyright 2016-2024 Intel Corporation. + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -123,6 +124,12 @@ void vos_obj_evict(struct vos_object *obj); int vos_obj_evict_by_oid(struct vos_container *cont, daos_unit_oid_t oid); +static inline bool +vos_obj_is_evicted(struct vos_object *obj) +{ + return daos_lru_is_evicted(&obj->obj_llink); +} + /** * Create an object cache. * From 23a23ba1763876c28f8d7149a0fc396415a3d2b0 Mon Sep 17 00:00:00 2001 From: Jan Michalski Date: Fri, 9 Jan 2026 14:01:38 +0000 Subject: [PATCH 118/253] DAOS-18369 ddb: fix --help argument (#17341) * DAOS-18369 ddb: fix --help argument - Add list of commands to general help. - Allow to call `cmd --help` to get the help message for a particular command. Signed-off-by: Jan Michalski --- src/control/cmd/ddb/main.go | 137 ++++++++++++++++++++++++++++-------- 1 file changed, 107 insertions(+), 30 deletions(-) diff --git a/src/control/cmd/ddb/main.go b/src/control/cmd/ddb/main.go index b7ed3672f81..7eb6a9cc7ce 100644 --- a/src/control/cmd/ddb/main.go +++ b/src/control/cmd/ddb/main.go @@ -1,6 +1,6 @@ // // (C) Copyright 2022-2024 Intel Corporation. -// (C) Copyright 2025 Hewlett Packard Enterprise Development LP +// (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -17,6 +17,7 @@ import ( "strings" "unsafe" + "github.com/desertbit/columnize" "github.com/desertbit/go-shlex" "github.com/desertbit/grumble" "github.com/jessevdk/go-flags" @@ -54,6 +55,43 @@ type cliOptions struct { } `positional-args:"yes"` } +const helpCommandsHeader = ` +Available commands: + +` + +const helpVosTreePath = ` +Path + +Many of the commands take a VOS tree path. The format for this path +is [cont]/[obj]/[dkey]/[akey]/[extent]. +- cont - the full container uuid. +- obj - the object id. +- keys (akey, dkey) - there are multiple types of keys + -- string keys are simply the string value. If the size of the + key is greater than strlen(key), then the size is included at + the end of the string value. Example: 'akey{5}' is the key: akey + with a null terminator at the end. + -- number keys are formatted as '{[type]: NNN}' where type is + 'uint8, uint16, uint32, or uint64'. NNN can be a decimal or + hex number. Example: '{uint32: 123456}' + -- binary keys are formatted as '{bin: 0xHHH}' where HHH is the hex + representation of the binary key. Example: '{bin: 0x1a2b}' +- extent for array values - in the format {lo-hi}. + +To make it easier to navigate the tree, indexes can be +used instead of the path part. The index is in the format [i]. Indexes +and actual path values can be used together + +Example Paths: +/3550f5df-e6b1-4415-947e-82e15cf769af/939000573846355970.0.13.1/dkey/akey/[0-1023] +[0]/[1]/[2]/[1]/[9] +/[0]/939000573846355970.0.13.1/[2]/akey{5}/[0-1023] + +` + +const grumbleUnknownCmdErr = "unknown command, try 'help'" + type vosPathStr string func (pathStr vosPathStr) Complete(match string) (comps []flags.Completion) { @@ -128,6 +166,64 @@ func runFileCmds(log logging.Logger, app *grumble.App, fileName string) error { return nil } +// One cannot relay on grumble to print the list of commands since app does not allow executing +// the help command from the outside of the interactive mode. +// This method extracts commands and their respective help (short) messages in the simplest possible way, +// put them in columns and print them using the provided log. +func printCommands(app *grumble.App, log *logging.LeveledLogger) { + var output []string + for _, c := range app.Commands().All() { + if c.Name == "quit" { + continue + } + row := c.Name + columnize.DefaultConfig().Delim + c.Help + output = append(output, row) + } + log.Info(helpCommandsHeader + columnize.SimpleFormat(output) + "\n\n") +} + +func printGeneralHelp(app *grumble.App, generalMsg string, log *logging.LeveledLogger) { + log.Info(generalMsg + "\n") // standard help from go-flags + printCommands(app, log) // list of commands + log.Info(helpVosTreePath) // extra info on VOS Tree Path syntax +} + +// Ask grumble to generate a help message for the requested command. +// Caveat: There is no known easy way of forcing grumble to use log to print the generated message +// so the output goes directly to stdout. +// Returns false in case the opts.Args.RunCmd is unknown. +func printCmdHelp(app *grumble.App, opts *cliOptions, log *logging.LeveledLogger) bool { + err := runCmdStr(app, string(opts.Args.RunCmd), "--help") + if err != nil { + if err.Error() == grumbleUnknownCmdErr { + log.Errorf("unknown command '%s'", string(opts.Args.RunCmd)) + printCommands(app, log) + } else { + log.Error(err.Error()) + } + return false + } + return true +} + +// Prints either general or command-specific help message. +// Returns a reasonable return code in case the caller chooses to terminate the process. +func printHelp(generalMsg string, opts *cliOptions, log *logging.LeveledLogger) int { + // ctx is not necessary since this instance of the app is not intended to run any of the commands + app := createGrumbleApp(nil) + + if string(opts.Args.RunCmd) == "" { + printGeneralHelp(app, generalMsg, log) + return 0 + } + + if printCmdHelp(app, opts, log) { + return 0 + } else { + return 1 + } +} + func parseOpts(args []string, opts *cliOptions, log *logging.LeveledLogger) error { p := flags.NewParser(opts, flags.HelpFlag|flags.IgnoreUnknown) p.Name = "ddb" @@ -137,33 +233,14 @@ func parseOpts(args []string, opts *cliOptions, log *logging.LeveledLogger) erro a file in the VOS format. It offers both a command line and interactive shell mode. If neither a single command or '-f' option is provided, then the tool will run in interactive mode. In order to modify the VOS file, -the '-w' option must be included. If supplied, the VOS file supplied in -the first positional parameter will be opened before commands are executed. - -Many of the commands take a vos tree path. The format for this path -is [cont]/[obj]/[dkey]/[akey]/[extent]. -- cont - the full container uuid. -- obj - the object id. -- keys (akey, dkey) - there are multiple types of keys - -- string keys are simply the string value. If the size of the - key is greater than strlen(key), then the size is included at - the end of the string value. Example: 'akey{5}' is the key: akey - with a null terminator at the end. - -- number keys are formatted as '{[type]: NNN}' where type is - 'uint8, uint16, uint32, or uint64'. NNN can be a decimal or - hex number. Example: '{uint32: 123456}' - -- binary keys are formatted as '{bin: 0xHHH}' where HHH is the hex - representation of the binary key. Example: '{bin: 0x1a2b}' -- extent for array values - in the format {lo-hi}. +the '-w' option must be included. -To make it easier to navigate the tree, indexes can be -used instead of the path part. The index is in the format [i]. Indexes -and actual path values can be used together +If the command requires it, the VOS file provided as the first positional +parameter will be opened before any commands are executed. See the +command‑specific help for details. When the VOS file is not required, it is +ignored; however, it must still be supplied, and it may be empty (""), e.g. -Example Paths: -/3550f5df-e6b1-4415-947e-82e15cf769af/939000573846355970.0.13.1/dkey/akey/[0-1023] -[0]/[1]/[2]/[1]/[9] -/[0]/939000573846355970.0.13.1/[2]/akey{5}/[0-1023] +ddb "" ls --help ` // Set the traceback level such that a crash results in @@ -171,6 +248,10 @@ Example Paths: debug.SetTraceback("crash") if _, err := p.ParseArgs(args); err != nil { + if fe, ok := errors.Cause(err).(*flags.Error); ok && fe.Type == flags.ErrHelp { + os.Exit(printHelp(fe.Error(), opts, log)) + } + return err } @@ -255,10 +336,6 @@ func main() { log := logging.NewCommandLineLogger() if err := parseOpts(os.Args[1:], &opts, log); err != nil { - if fe, ok := errors.Cause(err).(*flags.Error); ok && fe.Type == flags.ErrHelp { - log.Info(fe.Error()) - os.Exit(0) - } exitWithError(log, err) } } From ac24ca610d53f1d7a3caf6abd8b26c885edef8e0 Mon Sep 17 00:00:00 2001 From: Kris Jacque Date: Fri, 9 Jan 2026 10:27:27 -0700 Subject: [PATCH 119/253] DAOS-18343 test: Suppress Go runtime false positive (#17346) Suppress a valgrind false positive for Go runtime function ShadowSet. Signed-off-by: Kris Jacque --- src/cart/utils/memcheck-cart.supp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/cart/utils/memcheck-cart.supp b/src/cart/utils/memcheck-cart.supp index 4771f022302..b29fbe80ace 100644 --- a/src/cart/utils/memcheck-cart.supp +++ b/src/cart/utils/memcheck-cart.supp @@ -486,6 +486,13 @@ fun:_ZN6__tsan9ShadowSetEPNS_9RawShadowES1_S0_ fun:racecall } +{ + MemoryRangeSet ShadowSet + Memcheck:Value8 + fun:ShadowSet + fun:_ZN6__tsanL14MemoryRangeSetEmmNS_9RawShadowE + fun:racecall +} { FI leak 8 Memcheck:Leak From bbcc2ce47a6a5ad2b3756e9e8434ee242925e87d Mon Sep 17 00:00:00 2001 From: sherintg Date: Mon, 12 Jan 2026 12:27:16 +0530 Subject: [PATCH 120/253] DAOS-18285 mem: Deadlock when all evictable pages are pinned (#17292) * DAOS-18285 mem: Deadlock when all evictable pages are pinned This fix addresses an issue of deadlock in daos engine due to busy loop in cache_get_free_page() when all evictable pages are pinned. Signed-off-by: Sherin T George --- src/common/mem.c | 93 +++++++++++++++++++++++++++++++++++------- src/include/daos/mem.h | 6 ++- 2 files changed, 83 insertions(+), 16 deletions(-) diff --git a/src/common/mem.c b/src/common/mem.c index 4ace7e96bd3..e473cb8bfe5 100644 --- a/src/common/mem.c +++ b/src/common/mem.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2016-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -2233,6 +2233,7 @@ umem_cache_free(struct umem_store *store) D_ASSERT(d_list_empty(&cache->ca_pgs_pinned)); D_ASSERT(cache->ca_pgs_stats[UMEM_PG_STATS_PINNED] == 0); D_ASSERT(cache->ca_reserve_waiters == 0); + D_ASSERT(cache->ca_unpin_waiters == 0); pinfo = (struct umem_page_info *)&cache->ca_pages[cache->ca_md_pages]; for (i = 0; i < cache->ca_mem_pages; i++) { @@ -2248,6 +2249,11 @@ umem_cache_free(struct umem_store *store) } + if (cache->ca_unpin_wq != NULL) { + store->stor_ops->so_waitqueue_destroy(cache->ca_unpin_wq); + cache->ca_unpin_wq = NULL; + } + if (store->cache->off2ptr) D_FREE(store->cache->off2ptr); if (store->cache->ptr2off) @@ -2332,8 +2338,8 @@ umem_cache_alloc(struct umem_store *store, uint32_t page_sz, uint32_t md_pgs, ui if (cache == NULL) return -DER_NOMEM; - D_DEBUG(DB_IO, "Allocated page cache, md-pages(%u), mem-pages(%u), max-ne-pages(%u) %p\n", - md_pgs, mem_pgs, max_ne_pgs, cache); + D_INFO("Page cache: md-pgs(%u), mem-pages(%u), max-ne-pgs(%u), mode(%d)\n", md_pgs, mem_pgs, + max_ne_pgs, cmode); cache->ca_store = store; cache->ca_base = base; @@ -2383,6 +2389,11 @@ umem_cache_alloc(struct umem_store *store, uint32_t page_sz, uint32_t md_pgs, ui if (rc) goto error; + D_ASSERT(store->stor_ops->so_waitqueue_create != NULL); + rc = store->stor_ops->so_waitqueue_create(&cache->ca_unpin_wq); + if (rc) + goto error; + pinfo = (struct umem_page_info *)&cache->ca_pages[cache->ca_md_pages]; for (idx = 0; idx < cache->ca_mem_pages; idx++) { rc = page_waitqueue_create(cache, pinfo); @@ -2443,6 +2454,7 @@ cache_unmap_page(struct umem_cache *cache, struct umem_page_info *pinfo) verify_clean_page(pinfo, 1); D_ASSERT(pinfo->pi_pg_id < cache->ca_md_pages); D_ASSERT(cache->ca_pages[pinfo->pi_pg_id].pg_info == pinfo); + D_ASSERT(pinfo->pi_ref == 0); cache->off2ptr[pinfo->pi_pg_id] = 0; cache_idx = (pinfo - (struct umem_page_info *)&cache->ca_pages[cache->ca_md_pages]); @@ -2486,9 +2498,13 @@ cache_add2lru(struct umem_cache *cache, struct umem_page_info *pinfo) D_ASSERT(d_list_empty(&pinfo->pi_lru_link)); D_ASSERT(pinfo->pi_ref == 0); - if (pinfo->pi_evictable) + if (pinfo->pi_evictable) { d_list_add_tail(&pinfo->pi_lru_link, &cache->ca_pgs_lru[1]); - else + if (cache->ca_unpin_waiters) { + cache->ca_unpin_waiters--; + cache->ca_store->stor_ops->so_waitqueue_wakeup(cache->ca_unpin_wq, false); + } + } else d_list_add_tail(&pinfo->pi_lru_link, &cache->ca_pgs_lru[0]); } @@ -3190,7 +3206,8 @@ cache_evict_page(struct umem_cache *cache, bool for_sys) D_ERROR("No evictable page.\n"); return -DER_INVAL; } else if (d_list_empty(pg_list)) { - D_ERROR("All evictable pages are pinned.\n"); + cache->ca_unpin_waiters++; + cache->ca_store->stor_ops->so_waitqueue_wait(cache->ca_unpin_wq, false); return -DER_BUSY; } @@ -3289,14 +3306,22 @@ cache_get_free_page(struct umem_cache *cache, struct umem_page_info **ret_pinfo, } /* All pinned pages are from current caller */ - if (rc == -DER_BUSY && pinned_nr == cache->ca_pgs_stats[UMEM_PG_STATS_PINNED]) { - D_ERROR("Not enough evictable pages.\n"); + if (rc == -DER_BUSY && pinned_nr && + pinned_nr == cache->ca_pgs_stats[UMEM_PG_STATS_PINNED]) { + D_ERROR("Not enough evictable pages. pinned [%u/%u]\n", pinned_nr, + cache->ca_pgs_stats[UMEM_PG_STATS_PINNED]); return -DER_INVAL; } - D_CDEBUG(retry_cnt == 10, DLOG_ERR, DB_TRACE, - "Retry get free page, %d times\n", retry_cnt); + if (rc == -DER_BUSY) + return rc; + retry_cnt++; + D_CDEBUG(retry_cnt % 20 == 0, DLOG_ERR, DB_TRACE, + "%u retries of get free page with %u pinned. [ne:%u,pinned:%u,free:%u]\n", + retry_cnt, pinned_nr, cache->ca_pgs_stats[UMEM_PG_STATS_NONEVICTABLE], + cache->ca_pgs_stats[UMEM_PG_STATS_PINNED], + cache->ca_pgs_stats[UMEM_PG_STATS_FREE]); } pinfo = cache_pop_free_page(cache); @@ -3317,6 +3342,8 @@ cache_map_pages(struct umem_cache *cache, uint32_t *pages, int page_nr) struct umem_page_info *pinfo, *free_pinfo = NULL; uint32_t pg_id; int i, rc = 0; + int retry_cnt; + bool pages_evicted = false; for (i = 0; i < page_nr; i++) { pg_id = pages[i]; @@ -3325,6 +3352,7 @@ cache_map_pages(struct umem_cache *cache, uint32_t *pages, int page_nr) D_ERROR("Can only map single evictable page.\n"); return -DER_INVAL; } + retry_cnt = 0; retry: pinfo = cache->ca_pages[pg_id].pg_info; /* The page is already mapped */ @@ -3335,6 +3363,7 @@ cache_map_pages(struct umem_cache *cache, uint32_t *pages, int page_nr) if (free_pinfo != NULL) { cache_push_free_page(cache, free_pinfo); free_pinfo = NULL; + pages_evicted = true; } if (is_id_evictable(cache, pg_id) != pinfo->pi_evictable) { pinfo->pi_evictable = is_id_evictable(cache, pg_id); @@ -3351,14 +3380,21 @@ cache_map_pages(struct umem_cache *cache, uint32_t *pages, int page_nr) if (is_id_evictable(cache, pg_id)) { if (free_pinfo == NULL) { rc = cache_get_free_page(cache, &free_pinfo, 0, false); - if (rc) { + if (rc && rc != -DER_BUSY) { DL_ERROR(rc, "Failed to get free page."); break; } + retry_cnt++; + D_CDEBUG(retry_cnt % 100 == 0, DLOG_ERR, DB_TRACE, + "%u retries of get free page. [ne:%u,pinned:%u,free:%u]\n", + retry_cnt, cache->ca_pgs_stats[UMEM_PG_STATS_NONEVICTABLE], + cache->ca_pgs_stats[UMEM_PG_STATS_PINNED], + cache->ca_pgs_stats[UMEM_PG_STATS_FREE]); goto retry; } else { pinfo = free_pinfo; free_pinfo = NULL; + pages_evicted = true; } } else { pinfo = cache_pop_free_page(cache); @@ -3374,6 +3410,10 @@ cache_map_pages(struct umem_cache *cache, uint32_t *pages, int page_nr) /* Map an empty page, doesn't need to load page */ pinfo->pi_loaded = 1; } + if (rc || (pages_evicted && cache->ca_unpin_waiters)) { + cache->ca_unpin_waiters = 0; + cache->ca_store->stor_ops->so_waitqueue_wakeup(cache->ca_unpin_wq, true); + } return rc; } @@ -3383,10 +3423,13 @@ cache_pin_pages(struct umem_cache *cache, uint32_t *pages, int page_nr, bool for { struct umem_page_info *pinfo, *free_pinfo = NULL; uint32_t pg_id; - int i, processed = 0, pinned = 0, rc = 0; + int i, processed = 0, pinned = 0, rc = 0; + int retry_cnt; + bool pages_evicted = false; for (i = 0; i < page_nr; i++) { pg_id = pages[i]; + retry_cnt = 0; retry: pinfo = cache->ca_pages[pg_id].pg_info; /* The page is already mapped */ @@ -3397,19 +3440,28 @@ cache_pin_pages(struct umem_cache *cache, uint32_t *pages, int page_nr, bool for if (free_pinfo != NULL) { cache_push_free_page(cache, free_pinfo); free_pinfo = NULL; + pages_evicted = true; } goto next; } if (free_pinfo == NULL) { rc = cache_get_free_page(cache, &free_pinfo, pinned, for_sys); - if (rc) + if (rc && rc != -DER_BUSY) goto error; + retry_cnt++; + D_CDEBUG(retry_cnt % 20 == 0, DLOG_ERR, DB_TRACE, + "%u retries of get free page with %u pinned. " + "[ne:%u,pinned:%u,free:%u]\n", + retry_cnt, pinned, cache->ca_pgs_stats[UMEM_PG_STATS_NONEVICTABLE], + cache->ca_pgs_stats[UMEM_PG_STATS_PINNED], + cache->ca_pgs_stats[UMEM_PG_STATS_FREE]); /* Above cache_get_free_page() could yield, need re-check mapped status */ goto retry; } else { pinfo = free_pinfo; free_pinfo = NULL; + pages_evicted = true; } inc_cache_stats(cache, UMEM_CACHE_STATS_MISS); @@ -3434,6 +3486,10 @@ cache_pin_pages(struct umem_cache *cache, uint32_t *pages, int page_nr, bool for pinfo->pi_sys = for_sys; } + if (pages_evicted && cache->ca_unpin_waiters) { + cache->ca_unpin_waiters = 0; + cache->ca_store->stor_ops->so_waitqueue_wakeup(cache->ca_unpin_wq, true); + } return 0; error: for (i = 0; i < processed; i++) { @@ -3444,6 +3500,10 @@ cache_pin_pages(struct umem_cache *cache, uint32_t *pages, int page_nr, bool for cache_unpin_page(cache, pinfo); } + if (cache->ca_unpin_waiters) { + cache->ca_unpin_waiters = 0; + cache->ca_store->stor_ops->so_waitqueue_wakeup(cache->ca_unpin_wq, true); + } return rc; } @@ -3678,9 +3738,12 @@ umem_cache_reserve(struct umem_store *store) } rc = 0; - D_CDEBUG(retry_cnt == 10, DLOG_ERR, DB_TRACE, - "Retry reserve free page, %d times\n", retry_cnt); retry_cnt++; + D_CDEBUG(retry_cnt % 20 == 0, DLOG_ERR, DB_TRACE, + "%u retries of reserve page. [ne:%u,pinned:%u,free:%u]\n", retry_cnt, + cache->ca_pgs_stats[UMEM_PG_STATS_NONEVICTABLE], + cache->ca_pgs_stats[UMEM_PG_STATS_PINNED], + cache->ca_pgs_stats[UMEM_PG_STATS_FREE]); } D_ASSERT(cache->ca_reserve_waiters > 0); diff --git a/src/include/daos/mem.h b/src/include/daos/mem.h index 18d8fe988a7..1b08f4ac3d0 100644 --- a/src/include/daos/mem.h +++ b/src/include/daos/mem.h @@ -1,6 +1,6 @@ /** * (C) Copyright 2016-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -284,6 +284,10 @@ struct umem_cache { uint32_t ca_reserve_waiters; /** Waitqueue for free page reserve: umem_cache_reserve() */ void *ca_reserve_wq; + /** Waiters for evictable pages to be unpinned */ + uint32_t ca_unpin_waiters; + /** Waitqueue for waiters for evictable pages to be unpinned */ + void *ca_unpin_wq; /** TODO: some other global status */ uint64_t *ptr2off; uintptr_t *off2ptr; From 624c95f5a94ff75dc67b6ee10e244e84c5e8c122 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oksana=20Sa=C5=82yk?= Date: Mon, 12 Jan 2026 17:19:13 +0100 Subject: [PATCH 121/253] DAOS-16677 doc: remove FQDN from docs & testcode (#17126) Signed-off-by: Oksana Salyk --- docs/QSG/setup_rhel.md | 12 +++++------ docs/QSG/setup_suse.md | 10 ++++----- docs/QSG/tour.md | 34 +++++++++++++++---------------- docs/admin/administration.md | 4 ++-- docs/admin/troubleshooting.md | 12 +++++------ docs/testing/ior.md | 2 +- docs/user/filesystem.md | 20 +++++++++--------- src/tests/ftest/util/dmg_utils.py | 6 +++--- 8 files changed, 50 insertions(+), 50 deletions(-) diff --git a/docs/QSG/setup_rhel.md b/docs/QSG/setup_rhel.md index 8ce61c12b6f..053cb1c0cd3 100644 --- a/docs/QSG/setup_rhel.md +++ b/docs/QSG/setup_rhel.md @@ -377,8 +377,8 @@ Examples are available on [github](https://github.com/daos-stack/daos/tree/maste pdsh -S -w $SERVER_NODES "sudo systemctl status daos_server" # if you see following format messages (depending on number of servers), proceed to storage format - server-1: server-1.test.hpdd.intel.com INFO 2023/04/11 23:14:06 SCM format required on instance 1 - server-1: server-1.test.hpdd.intel.com INFO 2023/04/11 23:14:06 SCM format required on instance 0 + server-1: server-1.test.example.com INFO 2023/04/11 23:14:06 SCM format required on instance 1 + server-1: server-1.test.example.com INFO 2023/04/11 23:14:06 SCM format required on instance 0 # format storage dmg storage format -l $SERVER_NODES # can use --force if needed @@ -391,10 +391,10 @@ Examples are available on [github](https://github.com/daos-stack/daos/tree/maste # all the server ranks should show 'Joined' STATE Rank UUID Control Address Fault Domain State Reason ---- ---- --------------- ------------ ----- ------ - 0 604c4ffa-563a-49dc-b702-3c87293dbcf3 10.8.1.179:10001 /server-1.test.hpdd.intel.com Joined - 1 f0791f98-4379-4ace-a083-6ca3ffa65756 10.8.1.179:10001 /server-1.test.hpdd.intel.com Joined - 2 745d2a5b-46dd-42c5-b90a-d2e46e178b3e 10.8.1.189:10001 /server-2.test.hpdd.intel.com Joined - 3 ba6a7800-3952-46ce-af92-bba9daa35048 10.8.1.189:10001 /server-2.test.hpdd.intel.com Joined + 0 604c4ffa-563a-49dc-b702-3c87293dbcf3 10.8.1.179:10001 /server-1.test.example.com Joined + 1 f0791f98-4379-4ace-a083-6ca3ffa65756 10.8.1.179:10001 /server-1.test.example.com Joined + 2 745d2a5b-46dd-42c5-b90a-d2e46e178b3e 10.8.1.189:10001 /server-2.test.example.com Joined + 3 ba6a7800-3952-46ce-af92-bba9daa35048 10.8.1.189:10001 /server-2.test.example.com Joined ## Start the DAOS Agents diff --git a/docs/QSG/setup_suse.md b/docs/QSG/setup_suse.md index ddd964efe43..b1a29dd263d 100644 --- a/docs/QSG/setup_suse.md +++ b/docs/QSG/setup_suse.md @@ -394,7 +394,7 @@ Examples are available on [github](https://github.com/daos-stack/daos/tree/maste pdsh -S -w $SERVER_NODES "sudo systemctl status daos_server" # if you see following format messages (depending on number of servers), proceed to storage format - node-4: node-1.test.hpdd.intel.com INFO 2023/04/11 23:14:06 SCM format required on instance 0 + node-4: node-1.test.example.com INFO 2023/04/11 23:14:06 SCM format required on instance 0 # format storage dmg storage format -l $SERVER_NODES # can use --force if needed @@ -407,10 +407,10 @@ Examples are available on [github](https://github.com/daos-stack/daos/tree/maste # all the server ranks should show 'Joined' STATE Rank UUID Control Address Fault Domain State Reason ---- ---- --------------- ------------ ----- ------ - 0 604c4ffa-563a-49dc-b702-3c87293dbcf3 10.8.1.179:10001 /node-4.test.hpdd.intel.com Joined - 1 f0791f98-4379-4ace-a083-6ca3ffa65756 10.8.1.179:10001 /node-4.test.hpdd.intel.com Joined - 2 745d2a5b-46dd-42c5-b90a-d2e46e178b3e 10.8.1.189:10001 /node-5.test.hpdd.intel.com Joined - 3 ba6a7800-3952-46ce-af92-bba9daa35048 10.8.1.189:10001 /node-5.test.hpdd.intel.com Joined + 0 604c4ffa-563a-49dc-b702-3c87293dbcf3 10.8.1.179:10001 /node-4.test.example.com Joined + 1 f0791f98-4379-4ace-a083-6ca3ffa65756 10.8.1.179:10001 /node-4.test.example.com Joined + 2 745d2a5b-46dd-42c5-b90a-d2e46e178b3e 10.8.1.189:10001 /node-5.test.example.com Joined + 3 ba6a7800-3952-46ce-af92-bba9daa35048 10.8.1.189:10001 /node-5.test.example.com Joined ## Start the DAOS Agents diff --git a/docs/QSG/tour.md b/docs/QSG/tour.md index 78dd290cc03..130b89b5650 100644 --- a/docs/QSG/tour.md +++ b/docs/QSG/tour.md @@ -153,7 +153,7 @@ bring-up DAOS servers and clients. IOR-3.4.0+dev: MPI Coordinated Test of Parallel I/O Began : Fri Apr 16 18:07:56 2021 Command line : ior -a POSIX -b 26214400 -v -w -k -i 1 -o /tmp/daos_test1/testfile -t 25M - Machine : Linux boro-8.boro.hpdd.intel.com + Machine : Linux boro-8.boro.example.com Start time skew across all tasks: 0.00 sec TestID : 0 StartTime : Fri Apr 16 18:07:56 2021 @@ -358,19 +358,19 @@ bring-up DAOS servers and clients. $ dmg system query -v Rank UUID Control Address Fault Domain State Reason ---- --------------- ------------ ----- ------ - 0 2bf0e083-33d6-4ce3-83c4-c898c2a7ddbd 10.7.1.8:10001 boro-8.boro.hpdd.intel.com Joined - 1 c9ac1dd9-0f9d-4684-90d3-038b720fd26b 10.7.1.35:10001 boro-35.boro.hpdd.intel.com Joined - 2 80e44fe9-3a2b-4808-9a0f-88c3cbe7f565 10.7.1.53:10001 boro-53.boro.hpdd.intel.com Joined - 3 a26fd44a-6089-4cc3-a06b-278a85607fd3 10.7.1.52:10001 boro-52.boro.hpdd.intel.com Evicted system stop + 0 2bf0e083-33d6-4ce3-83c4-c898c2a7ddbd 10.7.1.8:10001 boro-8.boro.example.com Joined + 1 c9ac1dd9-0f9d-4684-90d3-038b720fd26b 10.7.1.35:10001 boro-35.boro.example.com Joined + 2 80e44fe9-3a2b-4808-9a0f-88c3cbe7f565 10.7.1.53:10001 boro-53.boro.example.com Joined + 3 a26fd44a-6089-4cc3-a06b-278a85607fd3 10.7.1.52:10001 boro-52.boro.example.com Evicted system stop # Restart, after evicted server restarted, verify the server joined $ /usr/bin/dmg system query -v Rank UUID Control Address Fault Domain State Reason ---- --------------- ------------ ----- ------ - 0 2bf0e083-33d6-4ce3-83c4-c898c2a7ddbd 10.7.1.8:10001 /boro-8.boro.hpdd.intel.com Joined - 1 c9ac1dd9-0f9d-4684-90d3-038b720fd26b 10.7.1.35:10001 /boro-35.boro.hpdd.intel.com Joined - 2 80e44fe9-3a2b-4808-9a0f-88c3cbe7f565 10.7.1.53:10001 /boro-53.boro.hpdd.intel.com Joined - 3 a26fd44a-6089-4cc3-a06b-278a85607fd3 10.7.1.52:10001 /boro-52.boro.hpdd.intel.com Joined + 0 2bf0e083-33d6-4ce3-83c4-c898c2a7ddbd 10.7.1.8:10001 /boro-8.boro.example.com Joined + 1 c9ac1dd9-0f9d-4684-90d3-038b720fd26b 10.7.1.35:10001 /boro-35.boro.example.com Joined + 2 80e44fe9-3a2b-4808-9a0f-88c3cbe7f565 10.7.1.53:10001 /boro-53.boro.example.com Joined + 3 a26fd44a-6089-4cc3-a06b-278a85607fd3 10.7.1.52:10001 /boro-52.boro.example.com Joined # Unmount after test completed $ fusermount -u /tmp/daos_test1/ @@ -415,19 +415,19 @@ bring-up DAOS servers and clients. $ dmg system query -v Rank UUID Control Address Fault Domain State Reason ---- --------------- ------------ ----- ------ - 0 2bf0e083-33d6-4ce3-83c4-c898c2a7ddbd 10.7.1.8:10001 boro-8.boro.hpdd.intel.com Joined - 1 c9ac1dd9-0f9d-4684-90d3-038b720fd26b 10.7.1.35:10001 boro-35.boro.hpdd.intel.com Joined - 2 80e44fe9-3a2b-4808-9a0f-88c3cbe7f565 10.7.1.53:10001 boro-53.boro.hpdd.intel.com Evicted system stop - 3 a26fd44a-6089-4cc3-a06b-278a85607fd3 10.7.1.52:10001 boro-52.boro.hpdd.intel.com Joined + 0 2bf0e083-33d6-4ce3-83c4-c898c2a7ddbd 10.7.1.8:10001 boro-8.boro.example.com Joined + 1 c9ac1dd9-0f9d-4684-90d3-038b720fd26b 10.7.1.35:10001 boro-35.boro.example.com Joined + 2 80e44fe9-3a2b-4808-9a0f-88c3cbe7f565 10.7.1.53:10001 boro-53.boro.example.com Evicted system stop + 3 a26fd44a-6089-4cc3-a06b-278a85607fd3 10.7.1.52:10001 boro-52.boro.example.com Joined # Restart, after evicted server restarted, verify the server joined $ /usr/bin/dmg system query -v Rank UUID Control Address Fault Domain State Reason ---- --------------- ------------ ----- ------ - 0 2bf0e083-33d6-4ce3-83c4-c898c2a7ddbd 10.7.1.8:10001 /boro-8.boro.hpdd.intel.com Joined - 1 c9ac1dd9-0f9d-4684-90d3-038b720fd26b 10.7.1.35:10001 /boro-35.boro.hpdd.intel.com Joined - 2 80e44fe9-3a2b-4808-9a0f-88c3cbe7f565 10.7.1.53:10001 /boro-53.boro.hpdd.intel.com Joined - 3 a26fd44a-6089-4cc3-a06b-278a85607fd3 10.7.1.52:10001 /boro-52.boro.hpdd.intel.com Joined + 0 2bf0e083-33d6-4ce3-83c4-c898c2a7ddbd 10.7.1.8:10001 /boro-8.boro.example.com Joined + 1 c9ac1dd9-0f9d-4684-90d3-038b720fd26b 10.7.1.35:10001 /boro-35.boro.example.com Joined + 2 80e44fe9-3a2b-4808-9a0f-88c3cbe7f565 10.7.1.53:10001 /boro-53.boro.example.com Joined + 3 a26fd44a-6089-4cc3-a06b-278a85607fd3 10.7.1.52:10001 /boro-52.boro.example.com Joined ## Clean-Up diff --git a/docs/admin/administration.md b/docs/admin/administration.md index 61fa6ba4179..554866e858e 100644 --- a/docs/admin/administration.md +++ b/docs/admin/administration.md @@ -34,7 +34,7 @@ Below is an example of a RAS event signaling an exclusion of an unresponsive engine: ``` -&&& RAS EVENT id: [swim_rank_dead] ts: [2021-11-21T13:32:31.747408+0000] host: [wolf-112.wolf.hpdd.intel.com] type: [STATE_CHANGE] sev: [NOTICE] msg: [SWIM marked rank as dead.] pid: [253454] tid: [1] rank: [6] inc: [63a058833280000] +&&& RAS EVENT id: [swim_rank_dead] ts: [2021-11-21T13:32:31.747408+0000] host: [wolf-112.wolf.example.com] type: [STATE_CHANGE] sev: [NOTICE] msg: [SWIM marked rank as dead.] pid: [253454] tid: [1] rank: [6] inc: [63a058833280000] ``` ### Event List @@ -580,7 +580,7 @@ following `daos_server` log entries to indicate the parameters are written to the engine's NVMe config: ```bash -DEBUG 13:59:29.229795 provider.go:592: BdevWriteConfigRequest: &{ForwardableRequest:{Forwarded:false} ConfigOutputPath:/mnt/daos0/daos_nvme.conf OwnerUID:10695475 OwnerGID:10695475 TierProps:[{Class:nvme DeviceList:0000:5e:00.0 DeviceFileSize:0 Tier:1 DeviceRoles:{OptionBits:0}}] HotplugEnabled:false HotplugBusidBegin:0 HotplugBusidEnd:0 Hostname:wolf-310.wolf.hpdd.intel.com AccelProps:{Engine: Options:0} SpdkRpcSrvProps:{Enable:false SockAddr:} AutoFaultyProps:{Enable:true MaxIoErrs:1 MaxCsumErrs:2} VMDEnabled:false ScannedBdevs:} +DEBUG 13:59:29.229795 provider.go:592: BdevWriteConfigRequest: &{ForwardableRequest:{Forwarded:false} ConfigOutputPath:/mnt/daos0/daos_nvme.conf OwnerUID:10695475 OwnerGID:10695475 TierProps:[{Class:nvme DeviceList:0000:5e:00.0 DeviceFileSize:0 Tier:1 DeviceRoles:{OptionBits:0}}] HotplugEnabled:false HotplugBusidBegin:0 HotplugBusidEnd:0 Hostname:wolf-310.wolf.example.com AccelProps:{Engine: Options:0} SpdkRpcSrvProps:{Enable:false SockAddr:} AutoFaultyProps:{Enable:true MaxIoErrs:1 MaxCsumErrs:2} VMDEnabled:false ScannedBdevs:} Writing NVMe config file for engine instance 0 to "/mnt/daos0/daos_nvme.conf" ``` diff --git a/docs/admin/troubleshooting.md b/docs/admin/troubleshooting.md index 78b54340071..6365142b5cd 100644 --- a/docs/admin/troubleshooting.md +++ b/docs/admin/troubleshooting.md @@ -523,7 +523,7 @@ updated its system fabric provider. Example `system_fabric_provider_changed` RAS event from syslog: ``` -daos_server[3302185]: id: [system_fabric_provider_changed] ts: [2024-02-13T20:08:50.956+00:00] host: [boro-74.boro.hpdd.intel.com] type: [INFO] sev: [NOTICE] msg: [system fabric provider has changed: ofi+tcp -> ofi+tcp;ofi_rxm] pid: [3302185] +daos_server[3302185]: id: [system_fabric_provider_changed] ts: [2024-02-13T20:08:50.956+00:00] host: [boro-74.boro.example.com] type: [INFO] sev: [NOTICE] msg: [system fabric provider has changed: ofi+tcp -> ofi+tcp;ofi_rxm] pid: [3302185] ``` To resolve the issue: @@ -1083,11 +1083,11 @@ running under systemd run the following command: CGroup: /system.slice/rsyslog.service └─1962 /usr/sbin/rsyslogd -n -May 23 16:12:31 wolf-164.wolf.hpdd.intel.com systemd[1]: Starting System Logging Service... -May 23 16:12:31 wolf-164.wolf.hpdd.intel.com rsyslogd[1962]: [origin software="rsyslogd" swVersion="8.21> -May 23 16:12:31 wolf-164.wolf.hpdd.intel.com systemd[1]: Started System Logging Service. -May 23 16:12:31 wolf-164.wolf.hpdd.intel.com rsyslogd[1962]: imjournal: journal files changed, reloading> -May 29 03:18:01 wolf-164.wolf.hpdd.intel.com rsyslogd[1962]: [origin software="rsyslogd" swVersion="8.21> +May 23 16:12:31 wolf-164.wolf.example.com systemd[1]: Starting System Logging Service... +May 23 16:12:31 wolf-164.wolf.example.com rsyslogd[1962]: [origin software="rsyslogd" swVersion="8.21> +May 23 16:12:31 wolf-164.wolf.example.com systemd[1]: Started System Logging Service. +May 23 16:12:31 wolf-164.wolf.example.com rsyslogd[1962]: imjournal: journal files changed, reloading> +May 29 03:18:01 wolf-164.wolf.example.com rsyslogd[1962]: [origin software="rsyslogd" swVersion="8.21> ``` To configure a Syslog daemon to resolve the delivery errors and receive messages from 'daos_server' diff --git a/docs/testing/ior.md b/docs/testing/ior.md index 15137866d5a..d52ed1e0226 100644 --- a/docs/testing/ior.md +++ b/docs/testing/ior.md @@ -54,7 +54,7 @@ $ mpirun -hostfile /path/to/hostfile_clients -np 30 /bin/ior -a POSIX IOR-3.4.0+dev: MPI Coordinated Test of Parallel I/O Began : Thu Apr 29 23:23:09 2021 Command line : ior -a POSIX -b 5G -t 1M -v -W -w -r -R -i 1 -o /tmp/daos_dfuse/testfile -Machine : Linux wolf-86.wolf.hpdd.intel.com +Machine : Linux wolf-86.wolf.example.com Start time skew across all tasks: 0.00 sec TestID : 0 StartTime : Thu Apr 29 23:23:09 2021 diff --git a/docs/user/filesystem.md b/docs/user/filesystem.md index ca07a638637..77459efd166 100644 --- a/docs/user/filesystem.md +++ b/docs/user/filesystem.md @@ -486,7 +486,7 @@ $ systemctl status scratch_fs-root_dfuse.mount Docs: man:fstab(5) man:systemd-fstab-generator(8) -Sep 23 15:55:33 wolf-170.wolf.hpdd.intel.com systemd[1]: scratch_fs-root_dfuse.mount: Succeeded. +Sep 23 15:55:33 wolf-170.wolf.example.com systemd[1]: scratch_fs-root_dfuse.mount: Succeeded. $ systemctl start scratch_fs-root_dfuse.mount $ df -h | grep fuse dfuse 537G 5.1G 532G 1% /scratch_fs/root_dfuse @@ -505,8 +505,8 @@ $ systemctl status scratch_fs-root_dfuse.mount CGroup: /system.slice/scratch_fs-root_dfuse.mount └─4173 dfuse /scratch_fs/root_dfuse -o rw pool=admin_pool container=admin_cont dev suid -Sep 23 15:57:52 wolf-170.wolf.hpdd.intel.com systemd[1]: Mounting /scratch_fs/root_dfuse... -Sep 23 15:57:53 wolf-170.wolf.hpdd.intel.com systemd[1]: Mounted /scratch_fs/root_dfuse. +Sep 23 15:57:52 wolf-170.wolf.example.com systemd[1]: Mounting /scratch_fs/root_dfuse... +Sep 23 15:57:53 wolf-170.wolf.example.com systemd[1]: Mounted /scratch_fs/root_dfuse. $ systemctl stop scratch_fs-root_dfuse.mount $ systemctl status scratch_fs-root_dfuse.mount ● scratch_fs-root_dfuse.mount - /scratch_fs/root_dfuse @@ -520,11 +520,11 @@ $ systemctl status scratch_fs-root_dfuse.mount Memory: 540.0K CGroup: /system.slice/scratch_fs-root_dfuse.mount -Sep 23 15:57:52 wolf-170.wolf.hpdd.intel.com systemd[1]: Mounting /scratch_fs/root_dfuse... -Sep 23 15:57:53 wolf-170.wolf.hpdd.intel.com systemd[1]: Mounted /scratch_fs/root_dfuse. -Sep 23 15:58:32 wolf-170.wolf.hpdd.intel.com systemd[1]: Unmounting /scratch_fs/root_dfuse... -Sep 23 15:58:32 wolf-170.wolf.hpdd.intel.com systemd[1]: scratch_fs-root_dfuse.mount: Succeeded. -Sep 23 15:58:32 wolf-170.wolf.hpdd.intel.com systemd[1]: Unmounted /scratch_fs/root_dfuse. +Sep 23 15:57:52 wolf-170.wolf.example.com systemd[1]: Mounting /scratch_fs/root_dfuse... +Sep 23 15:57:53 wolf-170.wolf.example.com systemd[1]: Mounted /scratch_fs/root_dfuse. +Sep 23 15:58:32 wolf-170.wolf.example.com systemd[1]: Unmounting /scratch_fs/root_dfuse... +Sep 23 15:58:32 wolf-170.wolf.example.com systemd[1]: scratch_fs-root_dfuse.mount: Succeeded. +Sep 23 15:58:32 wolf-170.wolf.example.com systemd[1]: Unmounted /scratch_fs/root_dfuse. $ ``` @@ -580,8 +580,8 @@ $ systemctl status scratch_fs-root_dfuse.mount CGroup: /system.slice/scratch_fs-root_dfuse.mount └─2346 dfuse /scratch_fs/root_dfuse -o rw pool=admin_pool container=admin_cont dev suid -Sep 23 16:13:34 wolf-170.wolf.hpdd.intel.com systemd[1]: Mounting /scratch_fs/root_dfuse... -Sep 23 16:13:35 wolf-170.wolf.hpdd.intel.com systemd[1]: Mounted /scratch_fs/root_dfuse. +Sep 23 16:13:34 wolf-170.wolf.example.com systemd[1]: Mounting /scratch_fs/root_dfuse... +Sep 23 16:13:35 wolf-170.wolf.example.com systemd[1]: Mounted /scratch_fs/root_dfuse. $ ``` diff --git a/src/tests/ftest/util/dmg_utils.py b/src/tests/ftest/util/dmg_utils.py index 1579d1cd144..88d4b26784c 100644 --- a/src/tests/ftest/util/dmg_utils.py +++ b/src/tests/ftest/util/dmg_utils.py @@ -1,6 +1,6 @@ """ (C) Copyright 2018-2024 Intel Corporation. - (C) Copyright 2025 Hewlett Packard Enterprise Development LP + (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -1192,7 +1192,7 @@ def system_query(self, ranks=None, verbose=True): # { # "addr": "10.8.1.11:10001", # "state": "joined", - # "fault_domain": "/wolf-11.wolf.hpdd.intel.com", + # "fault_domain": "/wolf-11.wolf.example.com", # "rank": 0, # "uuid": "e7f2cb06-a111-4d55-a6a5-b494b70d62ab", # "fabric_uri": "ofi+sockets://192.168.100.11:31416", @@ -1202,7 +1202,7 @@ def system_query(self, ranks=None, verbose=True): # { # "addr": "10.8.1.74:10001", # "state": "excluded", - # "fault_domain": "/wolf-74.wolf.hpdd.intel.com", + # "fault_domain": "/wolf-74.wolf.example.com", # "rank": 1, # "uuid": "db36ab28-fdb0-4822-97e6-89547393ed03", # "fabric_uri": "ofi+sockets://192.168.100.74:31416", From ae2385d62a0deba2f2d547d484c5cd14632b8f1a Mon Sep 17 00:00:00 2001 From: Makito Kano Date: Tue, 13 Jan 2026 08:34:46 +0900 Subject: [PATCH 122/253] DAOS-17858 test: CR - Add test_two_pools_healthy (#17321) Test to pass in two pool labels where one is healthy pool. 1. Create three pools and one container. 2. Inject container bad label into one of them. 3. Enable checker and set policy to --all-interactive. 4. Call dmg check start with two different healthy pool labels. 5. Call dmg check start with two same healthy pool labels. 6. Call dmg check start with healthy pool and corrupted pool. 7. Repair with option 2 (original container label) and wait for checker to finish. 8. Call dmg check start with healthy pool and invalid label. 9. Disable checker and verify that the fault is actually fixed. Signed-off-by: Makito Kano --- .../ftest/recovery/check_start_corner_case.py | 118 ++++++++++++++++++ .../recovery/check_start_corner_case.yaml | 2 +- 2 files changed, 119 insertions(+), 1 deletion(-) diff --git a/src/tests/ftest/recovery/check_start_corner_case.py b/src/tests/ftest/recovery/check_start_corner_case.py index d56d81aa2d3..5bdf4973b3b 100644 --- a/src/tests/ftest/recovery/check_start_corner_case.py +++ b/src/tests/ftest/recovery/check_start_corner_case.py @@ -153,3 +153,121 @@ def test_start_back_to_back(self): # Containers were removed by the checker. container_1.skip_cleanup() container_2.skip_cleanup() + + def test_two_pools_healthy(self): + """Test to pass in two pool labels where one is healthy pool. + + 1. Create three pools and one container. + 2. Inject container bad label into one of them. + 3. Enable checker and set policy to --all-interactive. + 4. Call dmg check start with two different healthy pool labels. + 5. Call dmg check start with two same healthy pool labels. + 6. Call dmg check start with healthy pool and corrupted pool. + 7. Repair with option 2 (original container label) and wait for checker to finish. + 8. Call dmg check start with healthy pool and invalid label. + 9. Disable checker and verify that the fault is actually fixed. + + Jira ID: DAOS-17858 + + :avocado: tags=all,full_regression + :avocado: tags=hw,medium + :avocado: tags=recovery,cat_recov + :avocado: tags=DMGCheckStartCornerCaseTest,test_two_pools_healthy + """ + # 1. Create three pools and one container. + self.log_step("Create three pools and one container.") + pool_1 = self.get_pool(connect=False) + pool_2 = self.get_pool(connect=False) + pool_3 = self.get_pool(connect=False) + container = self.get_container(pool=pool_3) + + # 2. Inject container bad label into one of them. + self.log_step("Inject container bad label into one of them.") + daos_command = self.get_daos_command() + daos_command.faults_container( + pool=pool_3.identifier, cont=container.identifier, + location="DAOS_CHK_CONT_BAD_LABEL") + + # 3. Enable checker and set policy to --all-interactive. + self.log_step("Enable checker and set policy to --all-interactive.") + dmg_command = self.get_dmg_command() + dmg_command.check_enable() + dmg_command.check_set_policy(all_interactive=True) + + # 4. Call dmg check start with two different healthy pool labels. + self.log_step("Call dmg check start with two different healthy pool labels.") + healthy_diff = pool_1.identifier + " " + pool_2.identifier + try: + dmg_command.check_start(pool=healthy_diff) + msg = ("dmg check start with two different healthy pool labels worked as " + "expected.") + self.log.info(msg) + except CommandFailure as command_failure: + msg = (f"dmg check start with two different healthy pool labels failed! " + f"{command_failure}") + self.fail(msg) + # Need to stop before starting again. + dmg_command.check_stop() + + # 5. Call dmg check start with two same healthy pool labels. + self.log_step("Call dmg check start with two same healthy pool labels.") + healthy_same = pool_1.identifier + " " + pool_1.identifier + try: + dmg_command.check_start(pool=healthy_same) + msg = ("dmg check start with two same healthy pool labels worked as " + "expected.") + self.log.info(msg) + except CommandFailure as command_failure: + msg = (f"dmg check start with two same healthy pool labels failed! " + f"{command_failure}") + self.fail(msg) + dmg_command.check_stop() + + # 6. Call dmg check start with healthy pool and corrupted pool. + self.log_step("Call dmg check start with healthy pool and corrupted pool.") + healthy_corrupted = pool_1.identifier + " " + pool_3.identifier + dmg_command.check_start(pool=healthy_corrupted) + + # 7. Repair with option 2 and wait for checker to finish. + self.log_step("Repair with option 2 and wait for checker to finish.") + # Wait for the checker to detect the inconsistent container label. + query_reports = None + for _ in range(8): + check_query_out = dmg_command.check_query() + # Status becomes RUNNING immediately, but it may take a while to detect the + # inconsistency. If detected, "reports" field is filled. + if check_query_out["response"]["status"] == "RUNNING": + query_reports = check_query_out["response"]["reports"] + if query_reports: + break + time.sleep(5) + if not query_reports: + self.fail("Checker didn't detect any inconsistency!") + fault_msg = query_reports[0]["msg"] + expected_fault = "inconsistent container label" + if expected_fault not in fault_msg: + self.fail(f"Checker didn't detect {expected_fault}! Fault msg = {fault_msg}") + # Obtain the seq num (ID) to repair. + seq = query_reports[0]["seq"] + # Repair with action 2, which is to use the original container label. + dmg_command.check_repair(seq_num=str(seq), action="2") + wait_for_check_complete(dmg=dmg_command) + dmg_command.check_stop() + + # 8. Call dmg check start with healthy pool and invalid label. + self.log_step("Call dmg check start with healthy pool and invalid label.") + healthy_invalid = pool_1.identifier + " TestPool0" + try: + dmg_command.check_start(pool=healthy_invalid) + self.fail("dmg check start with healthy and invalid pool labels worked!") + except CommandFailure as command_failure: + exp_msg = "unable to find pool service" + if exp_msg not in str(command_failure): + self.fail(f"{exp_msg} is not in the error message!") + + # 9. Disable checker and verify that the fault is actually fixed. + self.log_step("Disable checker and verify that the fault is actually fixed.") + dmg_command.check_disable() + expected_props = {"label": container.label.value} + label_verified = container.verify_prop(expected_props=expected_props) + self.assertTrue(label_verified, "Container label isn't fixed!") diff --git a/src/tests/ftest/recovery/check_start_corner_case.yaml b/src/tests/ftest/recovery/check_start_corner_case.yaml index d4ba8437916..ed1816f3b8c 100644 --- a/src/tests/ftest/recovery/check_start_corner_case.yaml +++ b/src/tests/ftest/recovery/check_start_corner_case.yaml @@ -2,7 +2,7 @@ hosts: test_servers: 1 test_clients: 1 -timeout: 4M +timeout: 5M server_config: name: daos_server From dc6dd2ccf557ce75b254c3d36d6526854860cdf9 Mon Sep 17 00:00:00 2001 From: Nasf-Fan Date: Tue, 13 Jan 2026 07:35:27 +0800 Subject: [PATCH 123/253] DAOS-18385 mgmt: new daos_fail_loc to simulate orphan pool shard (#17361) When create pool, if set daos_fail_loc as DAOS_CHK_ORPHAN_POOL_SHARD on server, then will generate orpahn pool shard on the specified rank via daos_fail_value. We can use that to simulate inconsistency for CR related test. Signed-off-by: Fan Yong --- src/include/daos/common.h | 3 ++- src/mgmt/srv_pool.c | 42 +++++++++++++++++++++++++++++---------- 2 files changed, 33 insertions(+), 12 deletions(-) diff --git a/src/include/daos/common.h b/src/include/daos/common.h index f963d77aae6..11b35d2aacf 100644 --- a/src/include/daos/common.h +++ b/src/include/daos/common.h @@ -1,6 +1,6 @@ /** * (C) Copyright 2015-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -927,6 +927,7 @@ enum { #define DAOS_CHK_FAIL_REPORT_POOL2 (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0xb8) #define DAOS_CHK_ENGINE_DEATH (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0xb9) #define DAOS_CHK_VERIFY_CONT_SHARDS (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0xba) +#define DAOS_CHK_ORPHAN_POOL_SHARD (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0xbb) #define DAOS_MGMT_FAIL_CREATE_QUERY (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0xe0) diff --git a/src/mgmt/srv_pool.c b/src/mgmt/srv_pool.c index 12030e5b883..0a491ec8a7e 100644 --- a/src/mgmt/srv_pool.c +++ b/src/mgmt/srv_pool.c @@ -1,6 +1,6 @@ /* * (C) Copyright 2016-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -154,7 +154,7 @@ ds_mgmt_tgt_pool_create_ranks(uuid_t pool_uuid, d_rank_list_t *rank_list, size_t } static int -ds_mgmt_pool_svc_create(uuid_t pool_uuid, int ntargets, const char *group, d_rank_list_t *ranks, +ds_mgmt_pool_svc_create(uuid_t pool_uuid, const char *group, d_rank_list_t *ranks, daos_prop_t *prop, d_rank_list_t **svc_list, size_t domains_nr, uint32_t *domains) { @@ -170,10 +170,11 @@ ds_mgmt_create_pool(uuid_t pool_uuid, const char *group, d_rank_list_t *targets, size_t nvme_size, size_t meta_size, daos_prop_t *prop, d_rank_list_t **svcp, int domains_nr, uint32_t *domains) { - d_rank_list_t *pg_ranks = NULL; - d_rank_list_t *pg_targets = NULL; - int rc; - int rc_cleanup; + d_rank_list_t *pg_ranks = NULL; + d_rank_list_t *pg_targets = NULL; + d_rank_list_t *dummy = NULL; + int rc; + int rc_cleanup; D_DEBUG(DB_MGMT, DF_UUID ": create scm/meta/nvme sizes %ld/%ld/%ld\n", DP_UUID(pool_uuid), scm_size, meta_size, nvme_size); @@ -213,16 +214,33 @@ ds_mgmt_create_pool(uuid_t pool_uuid, const char *group, d_rank_list_t *targets, D_GOTO(out, rc = -DER_OOG); } - rc = ds_mgmt_tgt_pool_create_ranks(pool_uuid, targets, scm_size, nvme_size, meta_size); + /* Extend the targets list to simulate orphan pool shard. */ + if (DAOS_FAIL_CHECK(DAOS_CHK_ORPHAN_POOL_SHARD)) { + d_rank_t rank; + int i; + + rank = daos_fail_value_get(); + if (!d_rank_in_rank_list(targets, rank)) { + dummy = d_rank_list_alloc(targets->rl_nr + 1); + D_ASSERT(dummy != NULL); + + for (i = 0; i < targets->rl_nr; i++) + dummy->rl_ranks[i] = targets->rl_ranks[i]; + dummy->rl_ranks[targets->rl_nr] = rank; + } + } + + rc = ds_mgmt_tgt_pool_create_ranks(pool_uuid, dummy != NULL ? dummy : targets, scm_size, + nvme_size, meta_size); if (rc != 0) { DL_ERROR(rc, DF_UUID ": creating pool on ranks failed", DP_UUID(pool_uuid)); goto out_ranks; } - D_INFO(DF_UUID": creating targets on ranks succeeded\n", DP_UUID(pool_uuid)); + D_INFO(DF_UUID ": creating targets on %d ranks succeeded\n", DP_UUID(pool_uuid), + dummy != NULL ? dummy->rl_nr : targets->rl_nr); - rc = ds_mgmt_pool_svc_create(pool_uuid, targets->rl_nr, group, targets, prop, svcp, - domains_nr, domains); + rc = ds_mgmt_pool_svc_create(pool_uuid, group, targets, prop, svcp, domains_nr, domains); if (rc) { D_ERROR("create pool "DF_UUID" svc failed: rc "DF_RC"\n", DP_UUID(pool_uuid), DP_RC(rc)); @@ -233,7 +251,8 @@ ds_mgmt_create_pool(uuid_t pool_uuid, const char *group, d_rank_list_t *targets, * round of RPCs. */ out_ranks: - rc_cleanup = ds_mgmt_tgt_pool_destroy_ranks(pool_uuid, targets); + rc_cleanup = + ds_mgmt_tgt_pool_destroy_ranks(pool_uuid, dummy != NULL ? dummy : targets); if (rc_cleanup) D_ERROR(DF_UUID": failed to clean up failed pool: "DF_RC"\n", DP_UUID(pool_uuid), DP_RC(rc_cleanup)); @@ -247,6 +266,7 @@ ds_mgmt_create_pool(uuid_t pool_uuid, const char *group, d_rank_list_t *targets, out: d_rank_list_free(pg_targets); d_rank_list_free(pg_ranks); + d_rank_list_free(dummy); D_DEBUG(DB_MGMT, "create pool "DF_UUID": "DF_RC"\n", DP_UUID(pool_uuid), DP_RC(rc)); return rc; From 8cd9c5007182d1ba4ac2ee588eac30fd81b55ef3 Mon Sep 17 00:00:00 2001 From: Li Wei Date: Tue, 13 Jan 2026 09:20:23 +0900 Subject: [PATCH 124/253] DAOS-17111 swim: Parse untrustable updates about self (#16519) When an engine starts up, after it gets its rank, but before it gets the latest system map, the engine skips parsing any incoming swim updates, because the current swim code considers all others untrustable. This makes the engine unusually vulnerable to transient suspicions: swim_dump_updates() 2 <= 1: {2 S 2241541897092071424} {1 A 2241415852910968832} swim_updates_parse() 2: skip untrustable update from 1, rc = -1005 Hence, this patch lets swim parse the "untrustable" updates, react to SUSPECT and DEAD updates about the self member, but skip those about other members. Signed-off-by: Li Wei --- src/cart/swim/swim.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/cart/swim/swim.c b/src/cart/swim/swim.c index 4073dcf9ad3..60d077d2ac5 100644 --- a/src/cart/swim/swim.c +++ b/src/cart/swim/swim.c @@ -2,7 +2,7 @@ * Copyright (c) 2016 UChicago Argonne, LLC * (C) Copyright 2018-2024 Intel Corporation. * (C) Copyright 2025 Google LLC - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -1093,6 +1093,7 @@ swim_updates_parse(struct swim_context *ctx, swim_id_t from_id, swim_id_t id, struct swim_member_state id_state; swim_id_t self_id = swim_self_get(ctx); swim_id_t upd_id; + bool from_untrustable = false; size_t i; int rc = 0; @@ -1106,10 +1107,9 @@ swim_updates_parse(struct swim_context *ctx, swim_id_t from_id, swim_id_t id, rc = ctx->sc_ops->get_member_state(ctx, from_id, &id_state); if (rc == -DER_NONEXIST || id_state.sms_status == SWIM_MEMBER_DEAD) { - swim_ctx_unlock(ctx); - SWIM_DEBUG("%lu: skip untrustable update from %lu, rc = %d\n", self_id, from_id, - rc); - D_GOTO(out, rc = -DER_NONEXIST); + SWIM_DEBUG("%lu: 'untrustable' updates from %lu: " DF_RC "\n", self_id, from_id, + DP_RC(rc)); + from_untrustable = true; } else if (rc != 0) { swim_ctx_unlock(ctx); SWIM_ERROR("get_member_state(%lu): " DF_RC "\n", from_id, DP_RC(rc)); @@ -1182,6 +1182,13 @@ swim_updates_parse(struct swim_context *ctx, swim_id_t from_id, swim_id_t id, break; } + /* + * If from_id is "untrustable", react to its SUSPECT and DEAD updates about + * me (above), but ignore those about others. + */ + if (from_untrustable) + break; + if (upds[i].smu_state.sms_status == SWIM_MEMBER_SUSPECT) swim_member_suspect(ctx, from_id, upd_id, upds[i].smu_state.sms_incarnation); From faf28dce857acc83e73831432ca7f24ce8fa32d2 Mon Sep 17 00:00:00 2001 From: Kris Jacque Date: Mon, 12 Jan 2026 17:22:34 -0700 Subject: [PATCH 125/253] DAOS-13205 control: Detect stale interactive check reports (#16988) Due to limitations of the checker, the user can't act on unresolved interactive findings from an older checker instance. When a new checker instance starts: - Remove unresolved interactive findings that will be re-discovered during the checker run (whole system or requested pool). - For unresolved findings that won't be re-discovered (e.g. checker starts on a different pool), change the action to STALE, but continue displaying the findings in the interface. Signed-off-by: Kris Jacque --- src/chk/chk.pb-c.c | 14 +- src/chk/chk.pb-c.h | 6 +- src/control/common/proto/chk/chk.pb.go | 132 +++++---- src/control/lib/control/check.go | 7 + src/control/lib/control/check_test.go | 34 +++ src/control/server/mgmt_check.go | 70 ++++- src/control/server/mgmt_check_test.go | 121 ++++++-- src/control/system/checker/finding.go | 12 +- src/control/system/checker/finding_test.go | 25 ++ src/control/system/raft/database_checker.go | 36 ++- .../system/raft/database_checker_test.go | 204 +++++++++++++ src/include/daos/tests_lib.h | 27 +- src/proto/chk/chk.proto | 3 + .../ftest/recovery/check_start_options.py | 270 +++++++++++++----- .../ftest/recovery/check_start_options.yaml | 1 - src/tests/suite/daos_cr.c | 31 +- 16 files changed, 789 insertions(+), 204 deletions(-) create mode 100644 src/control/system/raft/database_checker_test.go diff --git a/src/chk/chk.pb-c.c b/src/chk/chk.pb-c.c index aaf178ff90a..318081afc3a 100644 --- a/src/chk/chk.pb-c.c +++ b/src/chk/chk.pb-c.c @@ -379,7 +379,7 @@ const ProtobufCEnumDescriptor chk__check_inconsist_class__descriptor = chk__check_inconsist_class__value_ranges, NULL,NULL,NULL,NULL /* reserved[1234] */ }; -static const ProtobufCEnumValue chk__check_inconsist_action__enum_values_by_number[13] = +static const ProtobufCEnumValue chk__check_inconsist_action__enum_values_by_number[14] = { { "CIA_DEFAULT", "CHK__CHECK_INCONSIST_ACTION__CIA_DEFAULT", 0 }, { "CIA_INTERACT", "CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT", 1 }, @@ -394,17 +394,19 @@ static const ProtobufCEnumValue chk__check_inconsist_action__enum_values_by_numb { "CIA_TRUST_OLDEST", "CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_OLDEST", 10 }, { "CIA_TRUST_EC_PARITY", "CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_EC_PARITY", 11 }, { "CIA_TRUST_EC_DATA", "CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_EC_DATA", 12 }, + { "CIA_STALE", "CHK__CHECK_INCONSIST_ACTION__CIA_STALE", 65535 }, }; static const ProtobufCIntRange chk__check_inconsist_action__value_ranges[] = { -{0, 0},{0, 13} +{0, 0},{65535, 13},{0, 14} }; -static const ProtobufCEnumValueIndex chk__check_inconsist_action__enum_values_by_name[13] = +static const ProtobufCEnumValueIndex chk__check_inconsist_action__enum_values_by_name[14] = { { "CIA_DEFAULT", 0 }, { "CIA_DISCARD", 3 }, { "CIA_IGNORE", 2 }, { "CIA_INTERACT", 1 }, { "CIA_READD", 4 }, + { "CIA_STALE", 13 }, { "CIA_TRUST_EC_DATA", 12 }, { "CIA_TRUST_EC_PARITY", 11 }, { "CIA_TRUST_LATEST", 9 }, @@ -421,11 +423,11 @@ const ProtobufCEnumDescriptor chk__check_inconsist_action__descriptor = "CheckInconsistAction", "Chk__CheckInconsistAction", "chk", - 13, + 14, chk__check_inconsist_action__enum_values_by_number, - 13, + 14, chk__check_inconsist_action__enum_values_by_name, - 1, + 2, chk__check_inconsist_action__value_ranges, NULL,NULL,NULL,NULL /* reserved[1234] */ }; diff --git a/src/chk/chk.pb-c.h b/src/chk/chk.pb-c.h index 1f1bdaf0367..0ab35624431 100644 --- a/src/chk/chk.pb-c.h +++ b/src/chk/chk.pb-c.h @@ -196,7 +196,11 @@ typedef enum _Chk__CheckInconsistAction { /* * Trust EC data shard. */ - CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_EC_DATA = 12 + CHK__CHECK_INCONSIST_ACTION__CIA_TRUST_EC_DATA = 12, + /* + * Stale unresolved interaction. The checker can no longer address this report without re-running on affected pool. + */ + CHK__CHECK_INCONSIST_ACTION__CIA_STALE = 65535 PROTOBUF_C__FORCE_ENUM_TO_BE_INT_SIZE(CHK__CHECK_INCONSIST_ACTION) } Chk__CheckInconsistAction; /* diff --git a/src/control/common/proto/chk/chk.pb.go b/src/control/common/proto/chk/chk.pb.go index c53a2c1788e..fbfae461dde 100644 --- a/src/control/common/proto/chk/chk.pb.go +++ b/src/control/common/proto/chk/chk.pb.go @@ -1,5 +1,6 @@ // // (C) Copyright 2022 Intel Corporation. +// (C) Copyright 2025 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -212,24 +213,27 @@ const ( CheckInconsistAction_CIA_TRUST_EC_PARITY CheckInconsistAction = 11 // Trust EC data shard. CheckInconsistAction_CIA_TRUST_EC_DATA CheckInconsistAction = 12 + // Stale unresolved interaction. The checker can no longer address this report without re-running on affected pool. + CheckInconsistAction_CIA_STALE CheckInconsistAction = 65535 ) // Enum value maps for CheckInconsistAction. var ( CheckInconsistAction_name = map[int32]string{ - 0: "CIA_DEFAULT", - 1: "CIA_INTERACT", - 2: "CIA_IGNORE", - 3: "CIA_DISCARD", - 4: "CIA_READD", - 5: "CIA_TRUST_MS", - 6: "CIA_TRUST_PS", - 7: "CIA_TRUST_TARGET", - 8: "CIA_TRUST_MAJORITY", - 9: "CIA_TRUST_LATEST", - 10: "CIA_TRUST_OLDEST", - 11: "CIA_TRUST_EC_PARITY", - 12: "CIA_TRUST_EC_DATA", + 0: "CIA_DEFAULT", + 1: "CIA_INTERACT", + 2: "CIA_IGNORE", + 3: "CIA_DISCARD", + 4: "CIA_READD", + 5: "CIA_TRUST_MS", + 6: "CIA_TRUST_PS", + 7: "CIA_TRUST_TARGET", + 8: "CIA_TRUST_MAJORITY", + 9: "CIA_TRUST_LATEST", + 10: "CIA_TRUST_OLDEST", + 11: "CIA_TRUST_EC_PARITY", + 12: "CIA_TRUST_EC_DATA", + 65535: "CIA_STALE", } CheckInconsistAction_value = map[string]int32{ "CIA_DEFAULT": 0, @@ -245,6 +249,7 @@ var ( "CIA_TRUST_OLDEST": 10, "CIA_TRUST_EC_PARITY": 11, "CIA_TRUST_EC_DATA": 12, + "CIA_STALE": 65535, } ) @@ -870,7 +875,7 @@ var file_chk_chk_proto_rawDesc = []byte{ 0x4f, 0x53, 0x54, 0x5f, 0x45, 0x43, 0x5f, 0x44, 0x41, 0x54, 0x41, 0x10, 0x13, 0x12, 0x1a, 0x0a, 0x16, 0x43, 0x49, 0x43, 0x5f, 0x4f, 0x42, 0x4a, 0x5f, 0x44, 0x41, 0x54, 0x41, 0x5f, 0x49, 0x4e, 0x43, 0x4f, 0x4e, 0x53, 0x49, 0x53, 0x54, 0x10, 0x14, 0x12, 0x0f, 0x0a, 0x0b, 0x43, 0x49, 0x43, - 0x5f, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x64, 0x2a, 0x97, 0x02, 0x0a, 0x14, 0x43, + 0x5f, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x64, 0x2a, 0xa8, 0x02, 0x0a, 0x14, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x49, 0x6e, 0x63, 0x6f, 0x6e, 0x73, 0x69, 0x73, 0x74, 0x41, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x0f, 0x0a, 0x0b, 0x43, 0x49, 0x41, 0x5f, 0x44, 0x45, 0x46, 0x41, 0x55, 0x4c, 0x54, 0x10, 0x00, 0x12, 0x10, 0x0a, 0x0c, 0x43, 0x49, 0x41, 0x5f, 0x49, 0x4e, 0x54, 0x45, @@ -888,55 +893,56 @@ var file_chk_chk_proto_rawDesc = []byte{ 0x53, 0x54, 0x10, 0x0a, 0x12, 0x17, 0x0a, 0x13, 0x43, 0x49, 0x41, 0x5f, 0x54, 0x52, 0x55, 0x53, 0x54, 0x5f, 0x45, 0x43, 0x5f, 0x50, 0x41, 0x52, 0x49, 0x54, 0x59, 0x10, 0x0b, 0x12, 0x15, 0x0a, 0x11, 0x43, 0x49, 0x41, 0x5f, 0x54, 0x52, 0x55, 0x53, 0x54, 0x5f, 0x45, 0x43, 0x5f, 0x44, 0x41, - 0x54, 0x41, 0x10, 0x0c, 0x2a, 0x89, 0x01, 0x0a, 0x09, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x46, 0x6c, - 0x61, 0x67, 0x12, 0x0b, 0x0a, 0x07, 0x43, 0x46, 0x5f, 0x4e, 0x4f, 0x4e, 0x45, 0x10, 0x00, 0x12, - 0x0d, 0x0a, 0x09, 0x43, 0x46, 0x5f, 0x44, 0x52, 0x59, 0x52, 0x55, 0x4e, 0x10, 0x01, 0x12, 0x0c, - 0x0a, 0x08, 0x43, 0x46, 0x5f, 0x52, 0x45, 0x53, 0x45, 0x54, 0x10, 0x02, 0x12, 0x0e, 0x0a, 0x0a, - 0x43, 0x46, 0x5f, 0x46, 0x41, 0x49, 0x4c, 0x4f, 0x55, 0x54, 0x10, 0x04, 0x12, 0x0b, 0x0a, 0x07, - 0x43, 0x46, 0x5f, 0x41, 0x55, 0x54, 0x4f, 0x10, 0x08, 0x12, 0x12, 0x0a, 0x0e, 0x43, 0x46, 0x5f, - 0x4f, 0x52, 0x50, 0x48, 0x41, 0x4e, 0x5f, 0x50, 0x4f, 0x4f, 0x4c, 0x10, 0x10, 0x12, 0x11, 0x0a, - 0x0d, 0x43, 0x46, 0x5f, 0x4e, 0x4f, 0x5f, 0x46, 0x41, 0x49, 0x4c, 0x4f, 0x55, 0x54, 0x10, 0x20, - 0x12, 0x0e, 0x0a, 0x0a, 0x43, 0x46, 0x5f, 0x4e, 0x4f, 0x5f, 0x41, 0x55, 0x54, 0x4f, 0x10, 0x40, - 0x2a, 0x88, 0x01, 0x0a, 0x0f, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x49, 0x6e, 0x73, 0x74, 0x53, 0x74, - 0x61, 0x74, 0x75, 0x73, 0x12, 0x0c, 0x0a, 0x08, 0x43, 0x49, 0x53, 0x5f, 0x49, 0x4e, 0x49, 0x54, - 0x10, 0x00, 0x12, 0x0f, 0x0a, 0x0b, 0x43, 0x49, 0x53, 0x5f, 0x52, 0x55, 0x4e, 0x4e, 0x49, 0x4e, - 0x47, 0x10, 0x01, 0x12, 0x11, 0x0a, 0x0d, 0x43, 0x49, 0x53, 0x5f, 0x43, 0x4f, 0x4d, 0x50, 0x4c, - 0x45, 0x54, 0x45, 0x44, 0x10, 0x02, 0x12, 0x0f, 0x0a, 0x0b, 0x43, 0x49, 0x53, 0x5f, 0x53, 0x54, - 0x4f, 0x50, 0x50, 0x45, 0x44, 0x10, 0x03, 0x12, 0x0e, 0x0a, 0x0a, 0x43, 0x49, 0x53, 0x5f, 0x46, - 0x41, 0x49, 0x4c, 0x45, 0x44, 0x10, 0x04, 0x12, 0x0e, 0x0a, 0x0a, 0x43, 0x49, 0x53, 0x5f, 0x50, - 0x41, 0x55, 0x53, 0x45, 0x44, 0x10, 0x05, 0x12, 0x12, 0x0a, 0x0e, 0x43, 0x49, 0x53, 0x5f, 0x49, - 0x4d, 0x50, 0x4c, 0x49, 0x43, 0x41, 0x54, 0x45, 0x44, 0x10, 0x06, 0x2a, 0x9d, 0x01, 0x0a, 0x0f, - 0x43, 0x68, 0x65, 0x63, 0x6b, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, - 0x11, 0x0a, 0x0d, 0x43, 0x50, 0x53, 0x5f, 0x55, 0x4e, 0x43, 0x48, 0x45, 0x43, 0x4b, 0x45, 0x44, - 0x10, 0x00, 0x12, 0x10, 0x0a, 0x0c, 0x43, 0x50, 0x53, 0x5f, 0x43, 0x48, 0x45, 0x43, 0x4b, 0x49, - 0x4e, 0x47, 0x10, 0x01, 0x12, 0x0f, 0x0a, 0x0b, 0x43, 0x50, 0x53, 0x5f, 0x43, 0x48, 0x45, 0x43, - 0x4b, 0x45, 0x44, 0x10, 0x02, 0x12, 0x0e, 0x0a, 0x0a, 0x43, 0x50, 0x53, 0x5f, 0x46, 0x41, 0x49, - 0x4c, 0x45, 0x44, 0x10, 0x03, 0x12, 0x0e, 0x0a, 0x0a, 0x43, 0x50, 0x53, 0x5f, 0x50, 0x41, 0x55, - 0x53, 0x45, 0x44, 0x10, 0x04, 0x12, 0x0f, 0x0a, 0x0b, 0x43, 0x50, 0x53, 0x5f, 0x50, 0x45, 0x4e, - 0x44, 0x49, 0x4e, 0x47, 0x10, 0x05, 0x12, 0x0f, 0x0a, 0x0b, 0x43, 0x50, 0x53, 0x5f, 0x53, 0x54, - 0x4f, 0x50, 0x50, 0x45, 0x44, 0x10, 0x06, 0x12, 0x12, 0x0a, 0x0e, 0x43, 0x50, 0x53, 0x5f, 0x49, - 0x4d, 0x50, 0x4c, 0x49, 0x43, 0x41, 0x54, 0x45, 0x44, 0x10, 0x07, 0x2a, 0xe0, 0x01, 0x0a, 0x0e, - 0x43, 0x68, 0x65, 0x63, 0x6b, 0x53, 0x63, 0x61, 0x6e, 0x50, 0x68, 0x61, 0x73, 0x65, 0x12, 0x0f, - 0x0a, 0x0b, 0x43, 0x53, 0x50, 0x5f, 0x50, 0x52, 0x45, 0x50, 0x41, 0x52, 0x45, 0x10, 0x00, 0x12, - 0x11, 0x0a, 0x0d, 0x43, 0x53, 0x50, 0x5f, 0x50, 0x4f, 0x4f, 0x4c, 0x5f, 0x4c, 0x49, 0x53, 0x54, - 0x10, 0x01, 0x12, 0x10, 0x0a, 0x0c, 0x43, 0x53, 0x50, 0x5f, 0x50, 0x4f, 0x4f, 0x4c, 0x5f, 0x4d, - 0x42, 0x53, 0x10, 0x02, 0x12, 0x14, 0x0a, 0x10, 0x43, 0x53, 0x50, 0x5f, 0x50, 0x4f, 0x4f, 0x4c, - 0x5f, 0x43, 0x4c, 0x45, 0x41, 0x4e, 0x55, 0x50, 0x10, 0x03, 0x12, 0x11, 0x0a, 0x0d, 0x43, 0x53, - 0x50, 0x5f, 0x43, 0x4f, 0x4e, 0x54, 0x5f, 0x4c, 0x49, 0x53, 0x54, 0x10, 0x04, 0x12, 0x14, 0x0a, - 0x10, 0x43, 0x53, 0x50, 0x5f, 0x43, 0x4f, 0x4e, 0x54, 0x5f, 0x43, 0x4c, 0x45, 0x41, 0x4e, 0x55, - 0x50, 0x10, 0x05, 0x12, 0x12, 0x0a, 0x0e, 0x43, 0x53, 0x50, 0x5f, 0x44, 0x54, 0x58, 0x5f, 0x52, - 0x45, 0x53, 0x59, 0x4e, 0x43, 0x10, 0x06, 0x12, 0x11, 0x0a, 0x0d, 0x43, 0x53, 0x50, 0x5f, 0x4f, - 0x42, 0x4a, 0x5f, 0x53, 0x43, 0x52, 0x55, 0x42, 0x10, 0x07, 0x12, 0x0f, 0x0a, 0x0b, 0x43, 0x53, - 0x50, 0x5f, 0x52, 0x45, 0x42, 0x55, 0x49, 0x4c, 0x44, 0x10, 0x08, 0x12, 0x13, 0x0a, 0x0f, 0x43, - 0x53, 0x50, 0x5f, 0x41, 0x47, 0x47, 0x52, 0x45, 0x47, 0x41, 0x54, 0x49, 0x4f, 0x4e, 0x10, 0x09, - 0x12, 0x0c, 0x0a, 0x08, 0x43, 0x53, 0x50, 0x5f, 0x44, 0x4f, 0x4e, 0x45, 0x10, 0x0a, 0x2a, 0x27, - 0x0a, 0x0b, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x0b, 0x0a, - 0x07, 0x53, 0x55, 0x43, 0x43, 0x45, 0x53, 0x53, 0x10, 0x00, 0x12, 0x0b, 0x0a, 0x07, 0x44, 0x52, - 0x59, 0x5f, 0x52, 0x55, 0x4e, 0x10, 0x01, 0x42, 0x39, 0x5a, 0x37, 0x67, 0x69, 0x74, 0x68, 0x75, - 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2d, 0x73, 0x74, 0x61, 0x63, 0x6b, - 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2f, 0x73, 0x72, 0x63, 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, - 0x6c, 0x2f, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x63, - 0x68, 0x6b, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x54, 0x41, 0x10, 0x0c, 0x12, 0x0f, 0x0a, 0x09, 0x43, 0x49, 0x41, 0x5f, 0x53, 0x54, 0x41, 0x4c, + 0x45, 0x10, 0xff, 0xff, 0x03, 0x2a, 0x89, 0x01, 0x0a, 0x09, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x46, + 0x6c, 0x61, 0x67, 0x12, 0x0b, 0x0a, 0x07, 0x43, 0x46, 0x5f, 0x4e, 0x4f, 0x4e, 0x45, 0x10, 0x00, + 0x12, 0x0d, 0x0a, 0x09, 0x43, 0x46, 0x5f, 0x44, 0x52, 0x59, 0x52, 0x55, 0x4e, 0x10, 0x01, 0x12, + 0x0c, 0x0a, 0x08, 0x43, 0x46, 0x5f, 0x52, 0x45, 0x53, 0x45, 0x54, 0x10, 0x02, 0x12, 0x0e, 0x0a, + 0x0a, 0x43, 0x46, 0x5f, 0x46, 0x41, 0x49, 0x4c, 0x4f, 0x55, 0x54, 0x10, 0x04, 0x12, 0x0b, 0x0a, + 0x07, 0x43, 0x46, 0x5f, 0x41, 0x55, 0x54, 0x4f, 0x10, 0x08, 0x12, 0x12, 0x0a, 0x0e, 0x43, 0x46, + 0x5f, 0x4f, 0x52, 0x50, 0x48, 0x41, 0x4e, 0x5f, 0x50, 0x4f, 0x4f, 0x4c, 0x10, 0x10, 0x12, 0x11, + 0x0a, 0x0d, 0x43, 0x46, 0x5f, 0x4e, 0x4f, 0x5f, 0x46, 0x41, 0x49, 0x4c, 0x4f, 0x55, 0x54, 0x10, + 0x20, 0x12, 0x0e, 0x0a, 0x0a, 0x43, 0x46, 0x5f, 0x4e, 0x4f, 0x5f, 0x41, 0x55, 0x54, 0x4f, 0x10, + 0x40, 0x2a, 0x88, 0x01, 0x0a, 0x0f, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x49, 0x6e, 0x73, 0x74, 0x53, + 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x0c, 0x0a, 0x08, 0x43, 0x49, 0x53, 0x5f, 0x49, 0x4e, 0x49, + 0x54, 0x10, 0x00, 0x12, 0x0f, 0x0a, 0x0b, 0x43, 0x49, 0x53, 0x5f, 0x52, 0x55, 0x4e, 0x4e, 0x49, + 0x4e, 0x47, 0x10, 0x01, 0x12, 0x11, 0x0a, 0x0d, 0x43, 0x49, 0x53, 0x5f, 0x43, 0x4f, 0x4d, 0x50, + 0x4c, 0x45, 0x54, 0x45, 0x44, 0x10, 0x02, 0x12, 0x0f, 0x0a, 0x0b, 0x43, 0x49, 0x53, 0x5f, 0x53, + 0x54, 0x4f, 0x50, 0x50, 0x45, 0x44, 0x10, 0x03, 0x12, 0x0e, 0x0a, 0x0a, 0x43, 0x49, 0x53, 0x5f, + 0x46, 0x41, 0x49, 0x4c, 0x45, 0x44, 0x10, 0x04, 0x12, 0x0e, 0x0a, 0x0a, 0x43, 0x49, 0x53, 0x5f, + 0x50, 0x41, 0x55, 0x53, 0x45, 0x44, 0x10, 0x05, 0x12, 0x12, 0x0a, 0x0e, 0x43, 0x49, 0x53, 0x5f, + 0x49, 0x4d, 0x50, 0x4c, 0x49, 0x43, 0x41, 0x54, 0x45, 0x44, 0x10, 0x06, 0x2a, 0x9d, 0x01, 0x0a, + 0x0f, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, + 0x12, 0x11, 0x0a, 0x0d, 0x43, 0x50, 0x53, 0x5f, 0x55, 0x4e, 0x43, 0x48, 0x45, 0x43, 0x4b, 0x45, + 0x44, 0x10, 0x00, 0x12, 0x10, 0x0a, 0x0c, 0x43, 0x50, 0x53, 0x5f, 0x43, 0x48, 0x45, 0x43, 0x4b, + 0x49, 0x4e, 0x47, 0x10, 0x01, 0x12, 0x0f, 0x0a, 0x0b, 0x43, 0x50, 0x53, 0x5f, 0x43, 0x48, 0x45, + 0x43, 0x4b, 0x45, 0x44, 0x10, 0x02, 0x12, 0x0e, 0x0a, 0x0a, 0x43, 0x50, 0x53, 0x5f, 0x46, 0x41, + 0x49, 0x4c, 0x45, 0x44, 0x10, 0x03, 0x12, 0x0e, 0x0a, 0x0a, 0x43, 0x50, 0x53, 0x5f, 0x50, 0x41, + 0x55, 0x53, 0x45, 0x44, 0x10, 0x04, 0x12, 0x0f, 0x0a, 0x0b, 0x43, 0x50, 0x53, 0x5f, 0x50, 0x45, + 0x4e, 0x44, 0x49, 0x4e, 0x47, 0x10, 0x05, 0x12, 0x0f, 0x0a, 0x0b, 0x43, 0x50, 0x53, 0x5f, 0x53, + 0x54, 0x4f, 0x50, 0x50, 0x45, 0x44, 0x10, 0x06, 0x12, 0x12, 0x0a, 0x0e, 0x43, 0x50, 0x53, 0x5f, + 0x49, 0x4d, 0x50, 0x4c, 0x49, 0x43, 0x41, 0x54, 0x45, 0x44, 0x10, 0x07, 0x2a, 0xe0, 0x01, 0x0a, + 0x0e, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x53, 0x63, 0x61, 0x6e, 0x50, 0x68, 0x61, 0x73, 0x65, 0x12, + 0x0f, 0x0a, 0x0b, 0x43, 0x53, 0x50, 0x5f, 0x50, 0x52, 0x45, 0x50, 0x41, 0x52, 0x45, 0x10, 0x00, + 0x12, 0x11, 0x0a, 0x0d, 0x43, 0x53, 0x50, 0x5f, 0x50, 0x4f, 0x4f, 0x4c, 0x5f, 0x4c, 0x49, 0x53, + 0x54, 0x10, 0x01, 0x12, 0x10, 0x0a, 0x0c, 0x43, 0x53, 0x50, 0x5f, 0x50, 0x4f, 0x4f, 0x4c, 0x5f, + 0x4d, 0x42, 0x53, 0x10, 0x02, 0x12, 0x14, 0x0a, 0x10, 0x43, 0x53, 0x50, 0x5f, 0x50, 0x4f, 0x4f, + 0x4c, 0x5f, 0x43, 0x4c, 0x45, 0x41, 0x4e, 0x55, 0x50, 0x10, 0x03, 0x12, 0x11, 0x0a, 0x0d, 0x43, + 0x53, 0x50, 0x5f, 0x43, 0x4f, 0x4e, 0x54, 0x5f, 0x4c, 0x49, 0x53, 0x54, 0x10, 0x04, 0x12, 0x14, + 0x0a, 0x10, 0x43, 0x53, 0x50, 0x5f, 0x43, 0x4f, 0x4e, 0x54, 0x5f, 0x43, 0x4c, 0x45, 0x41, 0x4e, + 0x55, 0x50, 0x10, 0x05, 0x12, 0x12, 0x0a, 0x0e, 0x43, 0x53, 0x50, 0x5f, 0x44, 0x54, 0x58, 0x5f, + 0x52, 0x45, 0x53, 0x59, 0x4e, 0x43, 0x10, 0x06, 0x12, 0x11, 0x0a, 0x0d, 0x43, 0x53, 0x50, 0x5f, + 0x4f, 0x42, 0x4a, 0x5f, 0x53, 0x43, 0x52, 0x55, 0x42, 0x10, 0x07, 0x12, 0x0f, 0x0a, 0x0b, 0x43, + 0x53, 0x50, 0x5f, 0x52, 0x45, 0x42, 0x55, 0x49, 0x4c, 0x44, 0x10, 0x08, 0x12, 0x13, 0x0a, 0x0f, + 0x43, 0x53, 0x50, 0x5f, 0x41, 0x47, 0x47, 0x52, 0x45, 0x47, 0x41, 0x54, 0x49, 0x4f, 0x4e, 0x10, + 0x09, 0x12, 0x0c, 0x0a, 0x08, 0x43, 0x53, 0x50, 0x5f, 0x44, 0x4f, 0x4e, 0x45, 0x10, 0x0a, 0x2a, + 0x27, 0x0a, 0x0b, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x0b, + 0x0a, 0x07, 0x53, 0x55, 0x43, 0x43, 0x45, 0x53, 0x53, 0x10, 0x00, 0x12, 0x0b, 0x0a, 0x07, 0x44, + 0x52, 0x59, 0x5f, 0x52, 0x55, 0x4e, 0x10, 0x01, 0x42, 0x39, 0x5a, 0x37, 0x67, 0x69, 0x74, 0x68, + 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2d, 0x73, 0x74, 0x61, 0x63, + 0x6b, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2f, 0x73, 0x72, 0x63, 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, + 0x6f, 0x6c, 0x2f, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, + 0x63, 0x68, 0x6b, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( diff --git a/src/control/lib/control/check.go b/src/control/lib/control/check.go index a41aea37dcd..c7297e05989 100644 --- a/src/control/lib/control/check.go +++ b/src/control/lib/control/check.go @@ -429,6 +429,13 @@ func (r *SystemCheckReport) IsInteractive() bool { return r.Action == chkpb.CheckInconsistAction_CIA_INTERACT } +// IsStale indicates whether this report was awaiting user interaction when it became stale. Stale +// reports are still valid but can't be repaired without re-running the checker on the affected +// pool. +func (r *SystemCheckReport) IsStale() bool { + return r.Action == chkpb.CheckInconsistAction_CIA_STALE +} + // IsRemovedPool indicates whether the error detected in this report indicates a missing pool. func (r *SystemCheckReport) IsRemovedPool() bool { return r.Action == chkpb.CheckInconsistAction_CIA_DISCARD && diff --git a/src/control/lib/control/check_test.go b/src/control/lib/control/check_test.go index eab2c0eb5ee..a25f02c9217 100644 --- a/src/control/lib/control/check_test.go +++ b/src/control/lib/control/check_test.go @@ -146,3 +146,37 @@ func TestControl_SystemCheckReport_IsDryRun(t *testing.T) { }) } } + +func TestControl_SystemCheckReport_IsInteractive(t *testing.T) { + expInteractive := chkpb.CheckInconsistAction_CIA_INTERACT + + for name, actVal := range chkpb.CheckInconsistAction_value { + t.Run(name, func(t *testing.T) { + action := chkpb.CheckInconsistAction(actVal) + report := &SystemCheckReport{ + chkpb.CheckReport{ + Action: action, + }, + } + + test.AssertEqual(t, action == expInteractive, report.IsInteractive(), "") + }) + } +} + +func TestControl_SystemCheckReport_IsStale(t *testing.T) { + expStaleAction := chkpb.CheckInconsistAction_CIA_STALE + + for name, actVal := range chkpb.CheckInconsistAction_value { + t.Run(name, func(t *testing.T) { + action := chkpb.CheckInconsistAction(actVal) + report := &SystemCheckReport{ + chkpb.CheckReport{ + Action: action, + }, + } + + test.AssertEqual(t, action == expStaleAction, report.IsStale(), "") + }) + } +} diff --git a/src/control/server/mgmt_check.go b/src/control/server/mgmt_check.go index ce4ba773a57..a9266d47e78 100644 --- a/src/control/server/mgmt_check.go +++ b/src/control/server/mgmt_check.go @@ -263,24 +263,72 @@ func (svc *mgmtSvc) SystemCheckStart(ctx context.Context, req *mgmtpb.CheckStart } if resp.Status > 0 { - if len(req.Uuids) == 0 { - svc.log.Debug("resetting checker findings DB") - if err := svc.sysdb.ResetCheckerData(); err != nil { - return nil, errors.Wrap(err, "failed to reset checker finding database") - } - } else { - pools := strings.Join(req.Uuids, ", ") - svc.log.Debugf("removing old checker findings for pools: %s", pools) - if err := svc.sysdb.RemoveCheckerFindingsForPools(req.Uuids...); err != nil { - return nil, errors.Wrapf(err, "failed to remove old findings for pools: %s", pools) - } + // Checker instance was reset. We can safely clear all findings related to any pools + // requested. + if err := svc.resetFindings(req.Uuids); err != nil { + return nil, err } resp.Status = 0 // reset status to indicate success } + // If either the checker was not reset, or it was only reset against specified pools above, + // there may still be unresolved findings in the DB that need to be marked stale or removed. + if resp.Status == 0 { + svc.handleUnresolvedInteractions(req.Uuids) + } + return resp, nil } +func (svc *mgmtSvc) resetFindings(uuids []string) error { + if len(uuids) == 0 { + svc.log.Debug("resetting checker findings DB") + if err := svc.sysdb.ResetCheckerData(); err != nil { + return errors.Wrap(err, "failed to reset checker finding database") + } + } else { + pools := strings.Join(uuids, ", ") + svc.log.Debugf("removing old checker findings for pools: %s", pools) + if err := svc.sysdb.RemoveCheckerFindingsForPools(uuids...); err != nil { + return errors.Wrapf(err, "failed to remove old findings for pools: %s", pools) + } + } + return nil +} + +// handleUnresolvedInteractions goes through all unresolved (INTERACT/STALE) findings in the database. +// Those that will be rediscovered in the next run can be removed. All others must be marked stale +// as the user will be unable to act on them after the new check instance started. To fix the +// inconsistency, they'll need to re-run the checker on the affected pool. +func (svc *mgmtSvc) handleUnresolvedInteractions(uuids []string) { + findings, err := svc.sysdb.GetCheckerFindings() + if err != nil { + svc.log.Errorf("unable to fetch old checker findings: %s", err.Error()) + return + } + + uuidSet := common.NewStringSet(uuids...) + for _, f := range findings { + switch f.Action { + case chkpb.CheckInconsistAction_CIA_INTERACT, chkpb.CheckInconsistAction_CIA_STALE: + if len(uuids) == 0 || uuidSet.Has(f.PoolUuid) { + // Unresolved interactive and stale findings for pools that will be scanned will be re-discovered. + svc.log.Debugf("removing unresolved %s finding %d for pool %s", f.Action, f.Seq, f.PoolUuid) + if err := svc.sysdb.RemoveCheckerFinding(f); err != nil { + svc.log.Errorf("unable to remove stale checker finding %s: %s", f, err.Error()) + } + } else if f.Action != chkpb.CheckInconsistAction_CIA_STALE { // No need to re-mark stale interactions + // If the pool isn't being re-checked, we should keep the unresolved finding, but the user + // won't be able to act on it anymore. + svc.log.Debugf("marking unresolved interaction %d stale for pool %s", f.Seq, f.PoolUuid) + if err := svc.sysdb.SetCheckerFindingAction(f.Seq, int32(chkpb.CheckInconsistAction_CIA_STALE)); err != nil { + svc.log.Errorf("unable to mark interactive finding %s stale: %s", f, err.Error()) + } + } + } + } +} + func (svc *mgmtSvc) mergePoliciesWithCurrent(policies []*mgmtpb.CheckInconsistPolicy) ([]*mgmtpb.CheckInconsistPolicy, error) { pm, err := svc.getCheckerPolicyMap() if err != nil { diff --git a/src/control/server/mgmt_check_test.go b/src/control/server/mgmt_check_test.go index ba0662e5f2e..b1d381340fd 100644 --- a/src/control/server/mgmt_check_test.go +++ b/src/control/server/mgmt_check_test.go @@ -118,18 +118,54 @@ func TestServer_mgmtSvc_SystemCheckStart(t *testing.T) { testPolicies := testPoliciesWithAction(chkpb.CheckInconsistAction_CIA_INTERACT) uuids := testPoolUUIDs(3) - testFindings := func() []*checker.Finding { + testFindings := func(act chkpb.CheckInconsistAction) []*checker.Finding { findings := []*checker.Finding{} for i, uuid := range uuids { f := &checker.Finding{CheckReport: chkpb.CheckReport{ Seq: uint64(i + 1), PoolUuid: uuid, + Action: act, }} findings = append(findings, f) } return findings } + defaultTestFindings := func() []*checker.Finding { + return testFindings(chkpb.CheckInconsistAction_CIA_TRUST_MS) + } + + actionTestFindings := func(act chkpb.CheckInconsistAction, idx ...int) []*checker.Finding { + findings := defaultTestFindings() + for _, i := range idx { + t.Logf("findings[%d].Action: %s -> %s", i, findings[i].Action, act) + findings[i].Action = act + } + t.Logf("findings: %+v", findings) + return findings + } + + interactTestFindings := func(idx ...int) []*checker.Finding { + return actionTestFindings(chkpb.CheckInconsistAction_CIA_INTERACT, idx...) + } + + staleTestFindings := func(idx ...int) []*checker.Finding { + return actionTestFindings(chkpb.CheckInconsistAction_CIA_STALE, idx...) + } + + createMSWithFindings := func(t *testing.T, log logging.Logger, findings []*checker.Finding) *mgmtSvc { + svc := testSvcCheckerEnabled(t, log, system.MemberStateCheckerStarted, uuids) + if err := svc.setCheckerPolicyMap(testPolicies); err != nil { + t.Fatal(err) + } + for _, f := range findings { + if err := svc.sysdb.AddCheckerFinding(f); err != nil { + t.Fatal(err) + } + } + return svc + } + for name, tc := range map[string]struct { createMS func(*testing.T, logging.Logger) *mgmtSvc getMockDrpc func() *mockDrpcClient @@ -178,7 +214,7 @@ func TestServer_mgmtSvc_SystemCheckStart(t *testing.T) { Sys: "daos_server", }, expErr: errors.New("mock dRPC"), - expFindings: testFindings(), + expFindings: defaultTestFindings(), expPolicies: testPolicies, }, "bad resp": { @@ -189,7 +225,7 @@ func TestServer_mgmtSvc_SystemCheckStart(t *testing.T) { Sys: "daos_server", }, expErr: errors.New("unmarshal CheckStart response"), - expFindings: testFindings(), + expFindings: defaultTestFindings(), expPolicies: testPolicies, }, "request failed": { @@ -200,7 +236,7 @@ func TestServer_mgmtSvc_SystemCheckStart(t *testing.T) { Sys: "daos_server", }, expResp: &mgmt.CheckStartResp{Status: int32(daos.MiscError)}, - expFindings: testFindings(), + expFindings: defaultTestFindings(), expPolicies: testPolicies, }, "no reset": { @@ -208,7 +244,7 @@ func TestServer_mgmtSvc_SystemCheckStart(t *testing.T) { Sys: "daos_server", }, expResp: &mgmtpb.CheckStartResp{}, - expFindings: testFindings(), + expFindings: defaultTestFindings(), expPolicies: testPolicies, }, "reset": { @@ -236,12 +272,7 @@ func TestServer_mgmtSvc_SystemCheckStart(t *testing.T) { }, expResp: &mgmtpb.CheckStartResp{}, expFindings: []*checker.Finding{ - { - CheckReport: chkpb.CheckReport{ - Seq: 2, - PoolUuid: uuids[1], - }, - }, + defaultTestFindings()[1], }, expPolicies: testPolicies, }, @@ -263,6 +294,63 @@ func TestServer_mgmtSvc_SystemCheckStart(t *testing.T) { expResp: &mgmtpb.CheckStartResp{}, expPolicies: mergeTestPolicies(testPolicies, specificPolicies), }, + "interactive findings removed for all pools": { + createMS: func(t *testing.T, log logging.Logger) *mgmtSvc { + return createMSWithFindings(t, log, interactTestFindings(0, 2)) + }, + req: &mgmtpb.CheckStartReq{ + Sys: "daos_server", + }, + expResp: &mgmtpb.CheckStartResp{}, // non-reset + expPolicies: testPolicies, + expFindings: []*checker.Finding{ + defaultTestFindings()[1], // non-interactive is left alone + }, + }, + "interactive findings stale for unspecified pool": { + createMS: func(t *testing.T, log logging.Logger) *mgmtSvc { + return createMSWithFindings(t, log, interactTestFindings(0, 2)) + }, + req: &mgmtpb.CheckStartReq{ + Sys: "daos_server", + Uuids: []string{uuids[0]}, + }, + expResp: &mgmtpb.CheckStartResp{}, // non-reset + expPolicies: testPolicies, + expFindings: []*checker.Finding{ + defaultTestFindings()[1], // non-interactive is left alone + // interactive for unspecified pool is marked stale and re-annotated + checker.AnnotateFinding(testFindings(chkpb.CheckInconsistAction_CIA_STALE)[2]), + }, + }, + "stale findings removed for all pools": { + createMS: func(t *testing.T, log logging.Logger) *mgmtSvc { + return createMSWithFindings(t, log, staleTestFindings(0, 2)) + }, + req: &mgmtpb.CheckStartReq{ + Sys: "daos_server", + }, + expResp: &mgmtpb.CheckStartResp{}, // non-reset + expPolicies: testPolicies, + expFindings: []*checker.Finding{ + defaultTestFindings()[1], // non-stale is left alone + }, + }, + "stale finding ignored for unspecified pool": { + createMS: func(t *testing.T, log logging.Logger) *mgmtSvc { + return createMSWithFindings(t, log, staleTestFindings(0, 2)) + }, + req: &mgmtpb.CheckStartReq{ + Sys: "daos_server", + Uuids: []string{uuids[0]}, + }, + expResp: &mgmtpb.CheckStartResp{}, // non-reset + expPolicies: testPolicies, + expFindings: []*checker.Finding{ + defaultTestFindings()[1], // non-stale is left alone + testFindings(chkpb.CheckInconsistAction_CIA_STALE)[2], // stale for unspecified pool remains + }, + }, } { t.Run(name, func(t *testing.T) { log, buf := logging.NewTestLogger(t.Name()) @@ -270,16 +358,7 @@ func TestServer_mgmtSvc_SystemCheckStart(t *testing.T) { if tc.createMS == nil { tc.createMS = func(t *testing.T, log logging.Logger) *mgmtSvc { - svc := testSvcCheckerEnabled(t, log, system.MemberStateCheckerStarted, uuids) - if err := svc.setCheckerPolicyMap(testPolicies); err != nil { - t.Fatal(err) - } - for _, f := range testFindings() { - if err := svc.sysdb.AddCheckerFinding(f); err != nil { - t.Fatal(err) - } - } - return svc + return createMSWithFindings(t, log, defaultTestFindings()) } } svc := tc.createMS(t, log) diff --git a/src/control/system/checker/finding.go b/src/control/system/checker/finding.go index 770dfcd7132..2be0119987f 100644 --- a/src/control/system/checker/finding.go +++ b/src/control/system/checker/finding.go @@ -1,5 +1,6 @@ // // (C) Copyright 2022 Intel Corporation. +// (C) Copyright 2025 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -73,9 +74,9 @@ func NewFinding(report *chkpb.CheckReport) *Finding { return f } -// descAction attempts to generate a human-readable description of the +// GetActionMsg attempts to generate a human-readable description of the // action that may be taken for the given finding. -func descAction(class chkpb.CheckInconsistClass, action chkpb.CheckInconsistAction, details ...string) string { +func GetActionMsg(class chkpb.CheckInconsistClass, action chkpb.CheckInconsistAction, details ...string) string { var ro reportObject switch { case class >= chkpb.CheckInconsistClass_CIC_POOL_LESS_SVC_WITH_QUORUM && class <= chkpb.CheckInconsistClass_CIC_POOL_BAD_LABEL: @@ -151,6 +152,8 @@ func descAction(class chkpb.CheckInconsistClass, action chkpb.CheckInconsistActi return fmt.Sprintf("Trust the parity of the %s results", ro) case chkpb.CheckInconsistAction_CIA_TRUST_EC_DATA: return fmt.Sprintf("Trust the data of the %s results", ro) + case chkpb.CheckInconsistAction_CIA_STALE: + return "Current checker instance cannot act on this finding. Restart checker against the pool to handle the inconsistency." default: return fmt.Sprintf("%s: %s (details: %+v)", ro, action, details) } @@ -174,6 +177,7 @@ func trimProtoSpaces(pm proto.Message) { }) } +// AnnotateFinding updates human-readable action messages. func AnnotateFinding(f *Finding) *Finding { if f == nil { return nil @@ -195,11 +199,11 @@ func AnnotateFinding(f *Finding) *Finding { if len(f.ActChoices) > 0 { f.ActMsgs = make([]string, len(f.ActChoices)) for i, act := range f.ActChoices { - f.ActMsgs[i] = descAction(f.Class, act, append([]string{f.PoolUuid, f.ContUuid}, f.ActDetails...)...) + f.ActMsgs[i] = GetActionMsg(f.Class, act, append([]string{f.PoolUuid, f.ContUuid}, f.ActDetails...)...) } } else { f.ActMsgs = make([]string, 1) - f.ActMsgs[0] = descAction(f.Class, f.Action, append([]string{f.PoolUuid, f.ContUuid}, f.ActDetails...)...) + f.ActMsgs[0] = GetActionMsg(f.Class, f.Action, append([]string{f.PoolUuid, f.ContUuid}, f.ActDetails...)...) } } if len(f.Msg) == 0 { diff --git a/src/control/system/checker/finding_test.go b/src/control/system/checker/finding_test.go index 3517ce33315..96c6a819619 100644 --- a/src/control/system/checker/finding_test.go +++ b/src/control/system/checker/finding_test.go @@ -1,5 +1,6 @@ // // (C) Copyright 2022 Intel Corporation. +// (C) Copyright 2025 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -438,6 +439,30 @@ func TestChecker_AnnotateFinding(t *testing.T) { }, }), }, + "stale": { + rpt: &chkpb.CheckReport{ + Seq: 972775323717861377, + Class: chkpb.CheckInconsistClass_CIC_CONT_BAD_LABEL, + Action: chkpb.CheckInconsistAction_CIA_STALE, + PoolUuid: "9614ebfb-cbad-4250-a4e4-d24b7b70d85e", + ContUuid: "18b9b418-211c-455f-aa42-0cc13dedcff9", + Timestamp: "Mon Dec 5 16:27:56 2022\n", + Msg: "Check engine detects inconsistent container label: new-label (CS) vs foo (property).\n", + }, + expFinding: checker.NewFinding( + &chkpb.CheckReport{ + Seq: 972775323717861377, + Class: chkpb.CheckInconsistClass_CIC_CONT_BAD_LABEL, + Action: chkpb.CheckInconsistAction_CIA_STALE, + PoolUuid: "9614ebfb-cbad-4250-a4e4-d24b7b70d85e", + ContUuid: "18b9b418-211c-455f-aa42-0cc13dedcff9", + Timestamp: "Mon Dec 5 16:27:56 2022", + Msg: "Check engine detects inconsistent container label: new-label (CS) vs foo (property).", + ActMsgs: []string{ + "Current checker instance cannot act on this finding. Restart checker against the pool to handle the inconsistency.", + }, + }), + }, } { t.Run(name, func(t *testing.T) { f := checker.NewFinding(tc.rpt) diff --git a/src/control/system/raft/database_checker.go b/src/control/system/raft/database_checker.go index 6fa13acead5..4e02d36dc9a 100644 --- a/src/control/system/raft/database_checker.go +++ b/src/control/system/raft/database_checker.go @@ -1,5 +1,6 @@ // // (C) Copyright 2022-2023 Intel Corporation. +// (C) Copyright 2025 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -161,15 +162,36 @@ func (db *Database) SetCheckerFindingAction(seq uint64, action int32) error { return err } - for i, d := range f.ActChoices { - if d != chkAction { - continue - } + if chkAction == chk.CheckInconsistAction_CIA_STALE { f.Action = chkAction - if len(f.ActMsgs) > i { - f.ActMsgs = []string{f.ActMsgs[i]} - } + + // Clear old choices and re-annotate f.ActChoices = nil + f.ActDetails = nil + f.ActMsgs = nil + f = checker.AnnotateFinding(f) + } else { + found := false + + for i, d := range f.ActChoices { + if d != chkAction { + continue + } + f.Action = chkAction + if len(f.ActMsgs) > i { + f.ActMsgs = []string{f.ActMsgs[i]} + } + if len(f.ActDetails) > i { + f.ActDetails = []string{f.ActDetails[i]} + } + f.ActChoices = nil + found = true + break + } + + if !found { + return errors.Errorf("action not available for this finding: %s", chk.CheckInconsistAction_name[action]) + } } return db.submitCheckerUpdate(raftOpUpdateCheckerFinding, f) diff --git a/src/control/system/raft/database_checker_test.go b/src/control/system/raft/database_checker_test.go new file mode 100644 index 00000000000..44ecdfbe19c --- /dev/null +++ b/src/control/system/raft/database_checker_test.go @@ -0,0 +1,204 @@ +// +// (C) Copyright 2025 Hewlett Packard Enterprise Development LP +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + +package raft + +import ( + "testing" + + "github.com/daos-stack/daos/src/control/common/proto/chk" + "github.com/daos-stack/daos/src/control/common/test" + "github.com/daos-stack/daos/src/control/logging" + "github.com/daos-stack/daos/src/control/system/checker" + "github.com/pkg/errors" +) + +func TestRaft_Database_SetCheckerFindingAction(t *testing.T) { + createDBWithFindings := func(t *testing.T, log logging.Logger, findings ...*checker.Finding) *Database { + db := MockDatabase(t, log) + for _, f := range findings { + if err := db.data.Checker.addFinding(f); err != nil { + t.Fatal(err) + } + } + return db + } + + staleMsg := checker.GetActionMsg(0, chk.CheckInconsistAction_CIA_STALE) + + for name, tc := range map[string]struct { + startFindings []*checker.Finding + seq uint64 + action chk.CheckInconsistAction + expErr error + expActionChoices []chk.CheckInconsistAction + expActionMsg []string + expActionDetails []string + }{ + "invalid action": { + action: chk.CheckInconsistAction(4242), // arbitrary + expErr: errors.New("invalid action"), + }, + "empty db": { + seq: 123, + action: chk.CheckInconsistAction_CIA_IGNORE, + expErr: ErrFindingNotFound(123), + }, + "not found": { + startFindings: []*checker.Finding{ + {CheckReport: chk.CheckReport{Seq: 100}}, + {CheckReport: chk.CheckReport{Seq: 101}}, + {CheckReport: chk.CheckReport{Seq: 102}}, + }, + seq: 123, + action: chk.CheckInconsistAction_CIA_IGNORE, + expErr: ErrFindingNotFound(123), + }, + "stale, no action choices": { + startFindings: []*checker.Finding{ + {CheckReport: chk.CheckReport{Seq: 100}}, + {CheckReport: chk.CheckReport{Seq: 101}}, + {CheckReport: chk.CheckReport{Seq: 102}}, + }, + seq: 101, + action: chk.CheckInconsistAction_CIA_STALE, + expActionMsg: []string{ + staleMsg, + }, + }, + "stale ignores choices": { + startFindings: []*checker.Finding{ + { + CheckReport: chk.CheckReport{ + Seq: 101, + ActChoices: []chk.CheckInconsistAction{ + chk.CheckInconsistAction_CIA_IGNORE, + chk.CheckInconsistAction_CIA_TRUST_MS, + chk.CheckInconsistAction_CIA_TRUST_PS, + }, + ActMsgs: []string{ + "one", + "two", + "three", + }, + ActDetails: []string{ + "detail1", + "detail2", + "detail3", + }, + }, + }, + }, + seq: 101, + action: chk.CheckInconsistAction_CIA_STALE, + expActionChoices: nil, // cleared + expActionDetails: nil, // cleared + expActionMsg: []string{ + staleMsg, + }, + }, + "valid choice": { + startFindings: []*checker.Finding{ + { + CheckReport: chk.CheckReport{ + Seq: 101, + ActChoices: []chk.CheckInconsistAction{ + chk.CheckInconsistAction_CIA_IGNORE, + chk.CheckInconsistAction_CIA_TRUST_MS, + chk.CheckInconsistAction_CIA_TRUST_PS, + }, + ActMsgs: []string{ + "one", + "two", + "three", + }, + ActDetails: []string{ + "detail1", + "detail2", + "detail3", + }, + }, + }, + }, + seq: 101, + action: chk.CheckInconsistAction_CIA_TRUST_MS, + expActionChoices: nil, // cleared + expActionMsg: []string{ + "two", + }, + expActionDetails: []string{ + "detail2", + }, + }, + "no messages or details": { + startFindings: []*checker.Finding{ + { + CheckReport: chk.CheckReport{ + Seq: 101, + ActChoices: []chk.CheckInconsistAction{ + chk.CheckInconsistAction_CIA_IGNORE, + chk.CheckInconsistAction_CIA_TRUST_MS, + chk.CheckInconsistAction_CIA_TRUST_PS, + }, + }, + }, + }, + seq: 101, + action: chk.CheckInconsistAction_CIA_TRUST_MS, + expActionChoices: nil, // cleared + }, + "unavailable choice": { + startFindings: []*checker.Finding{ + { + CheckReport: chk.CheckReport{ + Seq: 101, + ActChoices: []chk.CheckInconsistAction{ + chk.CheckInconsistAction_CIA_IGNORE, + chk.CheckInconsistAction_CIA_TRUST_MS, + chk.CheckInconsistAction_CIA_TRUST_PS, + }, + ActMsgs: []string{ + "one", + "two", + "three", + }, + ActDetails: []string{ + "detail1", + "detail2", + "detail3", + }, + }, + }, + }, + seq: 101, + action: chk.CheckInconsistAction_CIA_TRUST_EC_DATA, + expErr: errors.New("action not available"), + }, + } { + t.Run(name, func(t *testing.T) { + ctx := test.MustLogContext(t) + + db := createDBWithFindings(t, logging.FromContext(ctx), tc.startFindings...) + + err := db.SetCheckerFindingAction(tc.seq, int32(tc.action)) + + test.CmpErr(t, tc.expErr, err) + + if tc.expErr == nil { + // Check that the action was actually updated + f, err := db.GetCheckerFinding(tc.seq) + if err != nil { + t.Fatal(err) + } + + test.AssertEqual(t, tc.action, f.Action, "verifying action was set") + test.CmpAny(t, "action choices", tc.expActionChoices, f.ActChoices) + test.CmpAny(t, "action messages", tc.expActionMsg, f.ActMsgs) + test.CmpAny(t, "action details", tc.expActionDetails, f.ActDetails) + } + }) + } +} diff --git a/src/include/daos/tests_lib.h b/src/include/daos/tests_lib.h index 72438598297..9bb15883b8c 100644 --- a/src/include/daos/tests_lib.h +++ b/src/include/daos/tests_lib.h @@ -176,19 +176,20 @@ enum test_cr_class { }; enum test_cr_action { - TCA_DEFAULT = 0, - TCA_INTERACT = 1, - TCA_IGNORE = 2, - TCA_DISCARD = 3, - TCA_READD = 4, - TCA_TRUST_MS = 5, - TCA_TRUST_PS = 6, - TCA_TRUST_TARGET = 7, - TCA_TRUST_MAJORITY = 8, - TCA_TRUST_LATEST = 9, - TCA_TRUST_OLDEST = 10, - TCA_TRUST_EC_PARITY = 11, - TCA_TRUST_EC_DATA = 12, + TCA_STALE = 0xffff, + TCA_DEFAULT = 0, + TCA_INTERACT = 1, + TCA_IGNORE = 2, + TCA_DISCARD = 3, + TCA_READD = 4, + TCA_TRUST_MS = 5, + TCA_TRUST_PS = 6, + TCA_TRUST_TARGET = 7, + TCA_TRUST_MAJORITY = 8, + TCA_TRUST_LATEST = 9, + TCA_TRUST_OLDEST = 10, + TCA_TRUST_EC_PARITY = 11, + TCA_TRUST_EC_DATA = 12, }; struct daos_check_pool_info { diff --git a/src/proto/chk/chk.proto b/src/proto/chk/chk.proto index 869243528b3..338f2d5efb5 100644 --- a/src/proto/chk/chk.proto +++ b/src/proto/chk/chk.proto @@ -119,6 +119,9 @@ enum CheckInconsistAction { CIA_TRUST_EC_PARITY = 11; // Trust EC data shard. CIA_TRUST_EC_DATA = 12; + + // Stale unresolved interaction. The checker can no longer address this report without re-running on affected pool. + CIA_STALE = 0xffff; } // The flags to control DAOS check general behavior, not related with any detailed inconsistency. diff --git a/src/tests/ftest/recovery/check_start_options.py b/src/tests/ftest/recovery/check_start_options.py index 2d17edb95b8..e7468d53309 100644 --- a/src/tests/ftest/recovery/check_start_options.py +++ b/src/tests/ftest/recovery/check_start_options.py @@ -10,6 +10,11 @@ from recovery_utils import query_detect from run_utils import command_as_user, run_remote +# Enum values used in this test +ENUM_CIC_POOL_NONEXIST_ON_MS = 4 +ENUM_CIA_INTERACT = 1 +ENUM_CIA_STALE = 0xffff + class DMGCheckStartOptionsTest(TestWithServers): """Test dmg check start options. @@ -25,6 +30,8 @@ class DMGCheckStartOptionsTest(TestWithServers): :avocado: recursive """ + MAX_QUERY_RETRY = 8 # max retries for check query + def test_check_start_reset(self): """Test dmg check start --reset. @@ -36,13 +43,12 @@ def test_check_start_reset(self): from "unchecked". 4. Verify that the orphan pool is detected. 5. Stop the checker. The state is now at "stopped". - 6. Remove the pool directory from the mount point. - 7. Start the checker without --reset. State is back to "checking". - 8. Verify that the action entry is still there. - 9. Stop the checker. State is "stopped". - 10. Start the checker with --reset. The state should have transitioned to + 6. Start the checker without --reset. State is back to "checking". + 7. Verify that the action entry is still there. + 8. Stop the checker. State is "stopped". + 9. Start the checker with --reset. The state should have transitioned to "unchecked", then "checking". - 11. Verify that the action entry is empty and the status is COMPLETED. + 10. Verify that the action entry is empty and the status is COMPLETED. Jira ID: DAOS-17623 @@ -61,10 +67,9 @@ def test_check_start_reset(self): dmg_command.faults_mgmt_svc_pool( pool=pool.identifier, checker_report_class="CIC_POOL_NONEXIST_ON_MS") - # 3. Start the checker with interactive mode. - self.log_step("Start the checker with interactive mode.") + # 3. Start the checker. + self.log_step("Start the checker.") dmg_command.check_enable() - dmg_command.check_set_policy(all_interactive=True) dmg_command.check_start() # 4. Verify that the orphan pool is detected. @@ -75,36 +80,18 @@ def test_check_start_reset(self): self.log_step("Stop the checker.") dmg_command.check_stop() - # 6. Remove the pool directory from the mount point. - self.log_step("Remove the pool directory from the mount point.") - pool_path = self.server_managers[0].get_vos_path(pool) - pool_out = check_file_exists( - hosts=self.hostlist_servers, filename=pool_path, sudo=True) - if not pool_out[0]: - msg = ("MD-on-SSD cluster. Contents under mount point are removed by control " - "plane after system stop.") - self.log.info(msg) - dmg_command.system_start() - # return results in PASS. - return - command = command_as_user(command=f"rm -rf {pool_path}", user="root") - remove_result = run_remote( - log=self.log, hosts=self.hostlist_servers, command=command) - if not remove_result.passed: - self.fail(f"Failed to remove {pool_path} from {remove_result.failed_hosts}") - - # 7. Start the checker without --reset. + # 6. Start the checker without --reset. self.log_step("Start the checker without --reset.") dmg_command.check_start() - # 8. Verify that the action entry is still there. - self.log_step("Verify that the action entry is still there.") - # At this point, the status is STOPPED (it will not turn to RUNNING), so just - # check whether msg contains "orphan pool". + # 7. Verify that the action entry is still there. + self.log_step("Verify that the old action entry is still there.") check_query_out = dmg_command.check_query() query_reports = check_query_out["response"]["reports"] if not query_reports: self.fail("Checker didn't detect any inconsistency!") + if len(query_reports) != 1: + self.fail(f"Expected only one report, but multiple reports found: {query_reports}") fault_msg = query_reports[0]["msg"] if "orphan pool" not in fault_msg: msg = (f"Checker didn't detect the orphan pool (2)! Fault msg = " @@ -112,15 +99,15 @@ def test_check_start_reset(self): dmg_command.check_disable() self.fail(msg) - # 9. Stop the checker. + # 8. Stop the checker. self.log_step("Stop the checker.") dmg_command.check_stop() - # 10. Start the checker with --reset. + # 9. Start the checker with --reset. self.log_step("Start the checker with --reset.") dmg_command.check_start(reset=True) - # 11. Verify that the action entry is empty and the status is COMPLETED. + # 10. Verify that the action entry is empty and the status is COMPLETED. self.log_step( "Verify that the action entry is empty and the status is COMPLETED.") repair_reports = None @@ -141,8 +128,152 @@ def test_check_start_reset(self): # Disable the checker to prepare for the tearDown. dmg_command.check_disable() - # The pool is orphan pool, so skip the cleanup. - pool.skip_cleanup() + + def get_reports(self, cmd): + """Helper function - get the reports from the check query""" + check_query_out = cmd.check_query() + return check_query_out["response"]["reports"] + + def expect_reports(self, query_reports, exp_reports): + """Helper function - verify expected check reports are found in actual query reports""" + if not query_reports: + self.fail("Checker didn't detect any inconsistency!") + for exp in exp_reports: + found = False + for report in query_reports: + if ( + report["pool_uuid"].lower() == exp["pool_uuid"].lower() + and report["class"] == exp["class"] + and report["action"] == exp["action"] + ): + found = True + break + if not found: + self.fail(f"expected report {exp} not found") + + def test_check_start_interactive(self): + """Test dmg check start's effects on interactive actions. + + 1. Create 2 pools. + 2. Inject faults on both pools. + 3. Start the checker with interactive mode for all. + 4. Verify that the first pool's issue is found. + 5. Stop the checker. + 6. Start the checker on the second pool. + 7. Verify that the first pool's action is now STALE, and the second pool's fault appears. + 8. Stop the checker. + 9. Start the checker on the first pool. + 10. Verify that the first pool's action is not STALE, but the second pool's action is STALE. + 11. Stop the checker. + 12. Start the checker with no pool specified. + 13. Check that both pools have non-stale actions. + + :avocado: tags=all,full_regression + :avocado: tags=hw,medium + :avocado: tags=recovery,cat_recov + :avocado: tags=DMGCheckStartOptionsTest,test_check_start_interactive + """ + # 1. Create a pool. + self.log_step("Create a pool") + pool1 = self.get_pool(connect=False, size="50%") + pool2 = self.get_pool(connect=False, size="50%") + + # 2. Inject pool faults. + self.log_step("Inject pool faults") + dmg_command = self.get_dmg_command() + dmg_command.faults_mgmt_svc_pool( + pool=pool1.identifier, checker_report_class="CIC_POOL_NONEXIST_ON_MS") + dmg_command.faults_mgmt_svc_pool( + pool=pool2.identifier, checker_report_class="CIC_POOL_NONEXIST_ON_MS") + + # 3. Enable the checker with interactive policies. + self.log_step("Enable the checker with interactive policies") + dmg_command.check_enable() + dmg_command.check_set_policy(all_interactive=True) + + # 4. Start the checker on pool 1. + self.log_step("Start the checker on pool1") + dmg_command.check_start(pool=pool1.uuid) + + # 5. Verify the interactive action + self.log_step("Verify the interactive action for pool1") + reports = self.get_reports(dmg_command) + self.expect_reports(reports, [{ + "pool_uuid": pool1.uuid, + "class": ENUM_CIC_POOL_NONEXIST_ON_MS, + "action": ENUM_CIA_INTERACT, + }]) + + # 6. Stop the checker. + self.log_step("Stop the checker") + dmg_command.check_stop() + + # 7. Start the checker on pool2. + self.log_step("Start the checker on pool2") + dmg_command.check_start(pool=pool2.uuid) + + # 8. Verify pool2 action is INTERACT, pool1 is STALE. + self.log_step("Verify the interactive and stale actions") + reports = self.get_reports(dmg_command) + self.expect_reports(reports, [{ + "pool_uuid": pool1.uuid, + "class": ENUM_CIC_POOL_NONEXIST_ON_MS, + "action": ENUM_CIA_STALE, + }, { + "pool_uuid": pool2.uuid, + "class": ENUM_CIC_POOL_NONEXIST_ON_MS, + "action": ENUM_CIA_INTERACT, + }]) + + # 9. Stop the checker. + self.log_step("Stop the checker") + dmg_command.check_stop() + + # 10. Start the checker on pool1. + self.log_step("Start the checker on pool1") + dmg_command.check_start(pool=pool1.uuid) + + # 11. Verify pool1 action is INTERACT, pool2 is STALE. + self.log_step("Verify the interactive and stale actions") + reports = self.get_reports(dmg_command) + self.expect_reports(reports, [{ + "pool_uuid": pool1.uuid, + "class": ENUM_CIC_POOL_NONEXIST_ON_MS, + "action": ENUM_CIA_INTERACT, + }, { + "pool_uuid": pool2.uuid, + "class": ENUM_CIC_POOL_NONEXIST_ON_MS, + "action": ENUM_CIA_STALE, + }]) + + # 12. Stop the checker. + self.log_step("Stop the checker") + dmg_command.check_stop() + + # 13. Start the checker on the whole system. + self.log_step("Start the checker on the whole system") + dmg_command.check_start() + + # 14. Verify both pool actions are INTERACT. + self.log_step("Verify the interactive actions") + reports = self.get_reports(dmg_command) + self.expect_reports(reports, [{ + "pool_uuid": pool1.uuid, + "class": ENUM_CIC_POOL_NONEXIST_ON_MS, + "action": ENUM_CIA_INTERACT, + }, { + "pool_uuid": pool2.uuid, + "class": ENUM_CIC_POOL_NONEXIST_ON_MS, + "action": ENUM_CIA_INTERACT, + }]) + + # 15. Repair both of the injected faults. + self.log_step("Repairing all findings with default option") + for report in reports: + dmg_command.check_repair(seq_num=report["seq"], action=0) + + # Disable the checker to prepare for the tearDown. + dmg_command.check_disable() def test_check_start_failout(self): """Test dmg check start --failout=on. @@ -233,6 +364,36 @@ def test_check_start_failout(self): # The pool is orphan pool, so skip the cleanup. pool.skip_cleanup() + def query_nr_reports(self, dmg_command, nr_exp_reports): + """ + Query until the number of expected reports are found or max retries reached. + """ + query_wait = 10 # initial wait after starting the check, in seconds + query_sleep = 5 # wait period between retries in seconds + + time.sleep(query_wait) + query_reports = None + for _ in range(self.MAX_QUERY_RETRY): + check_query_out = dmg_command.check_query() + # Even if "status" is RUNNING, "reports" may be null/None, so check both. + status = check_query_out["response"]["status"] + query_reports = check_query_out["response"]["reports"] + if query_reports and len(query_reports) > 0: + self.log.debug("found %d reports, need %d", len(query_reports), nr_exp_reports) + if status == "RUNNING" and query_reports and len(query_reports) >= nr_exp_reports: + break + time.sleep(query_sleep) + + if not query_reports: + if nr_exp_reports > 0: + self.fail("Checker didn't detect any inconsistency!") + else: + return query_reports + + if len(query_reports) < nr_exp_reports: + self.fail(f"Expected at least {nr_exp_reports} reports, but found {len(query_reports)}") + return query_reports + def test_check_start_find_orphans(self): """Test dmg check start --find-orphans. @@ -265,7 +426,7 @@ def test_check_start_find_orphans(self): """ # 1. Create a pool and a container. self.log_step("Create a pool and a container.") - pool_1 = self.get_pool(connect=False) + pool_1 = self.get_pool(connect=False, size="45%") container = self.get_container(pool=pool_1) # 2. Inject non orphan pool fault such as orphan container. @@ -286,16 +447,7 @@ def test_check_start_find_orphans(self): # 4. Check that orphan container is detected. self.log_step("Check that orphan container is detected.") - for _ in range(8): - check_query_out = dmg_command.check_query() - # Even if "status" is RUNNING, "reports" may be null/None, so check both. - status = check_query_out["response"]["status"] - query_reports = check_query_out["response"]["reports"] - if status == "RUNNING" and query_reports: - break - time.sleep(5) - if not query_reports: - self.fail("Checker didn't detect any inconsistency!") + query_reports = self.query_nr_reports(dmg_command, 1) fault_msg = query_reports[0]["msg"] orphan_container = "orphan container" if orphan_container not in fault_msg: @@ -310,7 +462,7 @@ def test_check_start_find_orphans(self): # 6. Create an orphan pool. self.log_step("Create an orphan pool.") - pool_2 = self.get_pool(connect=False) + pool_2 = self.get_pool(connect=False, size="45%") dmg_command.faults_mgmt_svc_pool( pool=pool_2.identifier, checker_report_class="CIC_POOL_NONEXIST_ON_MS") @@ -321,14 +473,7 @@ def test_check_start_find_orphans(self): # 8. Check that orphan pool isn't detected. self.log_step("Check that orphan pool isn't detected.") - for _ in range(8): - check_query_out = dmg_command.check_query() - if check_query_out["response"]["status"] == "RUNNING": - query_reports = check_query_out["response"]["reports"] - break - time.sleep(5) - if not query_reports: - self.fail("Checker didn't detect any inconsistency!") + query_reports = self.query_nr_reports(dmg_command, 1) orphan_pool = "orphan pool" # Now we have multiple faults, so iterate query_reports. for query_report in query_reports: @@ -343,14 +488,7 @@ def test_check_start_find_orphans(self): # 10. Verify that the orphan pool is detected this time. self.log_step("Verify that the orphan pool is detected this time.") - for _ in range(8): - check_query_out = dmg_command.check_query() - if check_query_out["response"]["status"] == "RUNNING": - query_reports = check_query_out["response"]["reports"] - break - time.sleep(5) - if not query_reports: - self.fail("Checker didn't detect any inconsistency!") + query_reports = self.query_nr_reports(dmg_command, 2) orphan_pool_found = False pool_2_seq_num = None for query_report in query_reports: @@ -370,7 +508,7 @@ def test_check_start_find_orphans(self): dmg_command.check_repair(seq_num=pool_2_seq_num, action="0") repair_phase = None orphan_pool_repaired = False - for _ in range(8): + for _ in range(self.MAX_QUERY_RETRY): check_query_out = dmg_command.check_query() if check_query_out["response"]["status"] == "RUNNING": # Check the "phase" field of pool_2. Look for CSP_DONE. diff --git a/src/tests/ftest/recovery/check_start_options.yaml b/src/tests/ftest/recovery/check_start_options.yaml index 857c79dd527..ad603b0d728 100644 --- a/src/tests/ftest/recovery/check_start_options.yaml +++ b/src/tests/ftest/recovery/check_start_options.yaml @@ -12,7 +12,6 @@ server_config: engines_per_host: 1 engines: 0: - log_file: daos_server0.log nr_xs_helpers: 1 storage: auto diff --git a/src/tests/suite/daos_cr.c b/src/tests/suite/daos_cr.c index e8c1459eee1..9821740cf55 100644 --- a/src/tests/suite/daos_cr.c +++ b/src/tests/suite/daos_cr.c @@ -2344,14 +2344,16 @@ cr_engine_resume(void **state) static void cr_reset_specified(void **state) { - test_arg_t *arg = *state; - struct test_pool pools[2] = { 0 }; - struct test_cont conts[2] = { 0 }; - struct daos_check_info dcis[2] = { 0 }; - uint32_t classes[3]; - uint32_t actions[3]; - int rc; - int i; + test_arg_t *arg = *state; + struct test_pool pools[2] = {0}; + struct test_cont conts[2] = {0}; + struct daos_check_info dcis[2] = {0}; + const int NR_REPORTS = 3; + uint32_t classes[NR_REPORTS]; + uint32_t actions[NR_REPORTS]; + uint32_t stale_actions[NR_REPORTS]; + int rc; + int i; FAULT_INJECTION_REQUIRED(); @@ -2368,6 +2370,12 @@ cr_reset_specified(void **state) actions[1] = TCA_INTERACT; actions[2] = TCA_INTERACT; + for (i = 0; i < NR_REPORTS; i++) { + stale_actions[i] = actions[i]; + if (stale_actions[i] == TCA_INTERACT) + stale_actions[i] = TCA_STALE; + } + for (i = 0; i < 2; i++) { rc = cr_pool_create(state, &pools[i], true, classes[0]); assert_rc_equal(rc, 0); @@ -2418,7 +2426,8 @@ cr_reset_specified(void **state) assert_rc_equal(rc, 0); /* Pool2's (old) report should be still there. */ - rc = cr_pool_verify(&dcis[1], pools[1].pool_uuid, TCPS_STOPPED, 2, classes, actions, NULL); + rc = cr_pool_verify(&dcis[1], pools[1].pool_uuid, TCPS_STOPPED, 2, classes, stale_actions, + NULL); assert_rc_equal(rc, 0); rc = cr_check_stop(0, NULL); @@ -2433,8 +2442,8 @@ cr_reset_specified(void **state) rc = cr_ins_verify(&dcis[1], TCIS_RUNNING); assert_rc_equal(rc, 0); - /* There are 3 reports for pool2: two are old (since not reset), another one is new. */ - rc = cr_pool_verify(&dcis[1], pools[1].pool_uuid, TCPS_PENDING, 3, classes, actions, NULL); + /* Pool2's stale report is re-generated */ + rc = cr_pool_verify(&dcis[1], pools[1].pool_uuid, TCPS_PENDING, 2, classes, actions, NULL); assert_rc_equal(rc, 0); rc = cr_check_stop(0, NULL); From d9f7d8386af5d83bd4cbeb66706da74a3a14a852 Mon Sep 17 00:00:00 2001 From: Liu Xuezhao Date: Tue, 13 Jan 2026 16:03:55 +0800 Subject: [PATCH 126/253] DAOS-17444 rebuild: cache obj open handle for rebuild (#16373) * DAOS-17444 rebuild: cache obj open handle for rebuild Only call dsc_obj_open() one time for each object for rebuild puller rather than open it for each dkey migrate, to save layout calculation overhead. Signed-off-by: Xuezhao Liu --- src/object/srv_obj_migrate.c | 191 ++++++++++++++++++----------------- 1 file changed, 100 insertions(+), 91 deletions(-) diff --git a/src/object/srv_obj_migrate.c b/src/object/srv_obj_migrate.c index d2a95aa3c79..c3d4074516e 100644 --- a/src/object/srv_obj_migrate.c +++ b/src/object/srv_obj_migrate.c @@ -101,6 +101,7 @@ static struct migr_engine_res migr_eng_res; struct migrate_one { struct migrate_pool_tls *mo_tls; + struct iter_obj_arg *mo_obj_arg; daos_key_t mo_dkey; uint64_t mo_dkey_hash; uuid_t mo_pool_uuid; @@ -179,6 +180,9 @@ struct iter_obj_arg { uuid_t pool_uuid; uuid_t cont_uuid; daos_unit_oid_t oid; + daos_handle_t ioa_oh; + int ioa_obj_ref; + struct daos_oclass_attr ioa_oca; daos_epoch_t epoch; daos_epoch_t punched_epoch; unsigned int shard; @@ -189,6 +193,40 @@ struct iter_obj_arg { uint32_t generation; }; +void +migrate_pool_tls_put(struct migrate_pool_tls *tls); + +static void +migrate_obj_get(struct iter_obj_arg *arg) +{ + arg->ioa_obj_ref++; +} + +static void +obj_iter_arg_free(struct iter_obj_arg *arg) +{ + if (arg->pool_tls) + migrate_pool_tls_put(arg->pool_tls); + if (arg->snaps) + D_FREE(arg->snaps); + D_FREE(arg); +} + +static void +migrate_obj_put(struct iter_obj_arg *arg) +{ + D_ASSERTF(arg->ioa_obj_ref > 0, DF_CONT " obj " DF_UOID " bad ioa_obj_ref %d\n", + DP_CONT(arg->pool_uuid, arg->cont_uuid), DP_UOID(arg->oid), arg->ioa_obj_ref); + arg->ioa_obj_ref--; + if (arg->ioa_obj_ref == 0) { + if (daos_handle_is_valid(arg->ioa_oh)) { + dsc_obj_close(arg->ioa_oh); + arg->ioa_oh = DAOS_HDL_INVAL; + } + obj_iter_arg_free(arg); + } +} + static int obj_tree_destory_cb(daos_handle_t ih, d_iov_t *key_iov, d_iov_t *val_iov, void *data) @@ -1676,47 +1714,28 @@ static int migrate_dkey(struct migrate_pool_tls *tls, struct migrate_one *mrone, daos_size_t data_size) { - struct ds_cont_child *cont = NULL; - struct cont_props props; - daos_handle_t coh = DAOS_HDL_INVAL; + struct ds_cont_child *cont = NULL; daos_handle_t oh = DAOS_HDL_INVAL; int rc; D_ASSERT(dss_get_module_info()->dmi_xs_id != 0); rc = migrate_get_cont_child(tls, mrone->mo_cont_uuid, &cont, true); if (rc || cont == NULL) - D_GOTO(cont_put, rc); - - rc = dsc_pool_open(tls->mpt_pool_uuid, tls->mpt_poh_uuid, 0, - NULL, tls->mpt_pool->spc_pool->sp_map, - &tls->mpt_svc_list, &tls->mpt_pool_hdl); - if (rc) - D_GOTO(cont_put, rc); - - /* Open client dc handle used to read the remote object data */ - rc = migrate_cont_open(tls, mrone->mo_cont_uuid, 0, &coh); - if (rc) - D_GOTO(cont_put, rc); + D_GOTO(out, rc); - /* Open the remote object */ - rc = dsc_obj_open(coh, mrone->mo_oid.id_pub, DAOS_OO_RO, &oh); - if (rc) - D_GOTO(cont_put, rc); + D_ASSERTF(mrone->mo_obj_arg->ioa_obj_ref > 0, + DF_RB ": oid " DF_UOID ", bad ioa_obj_ref %d\n", DP_RB_MPT(tls), + DP_UOID(mrone->mo_oid), mrone->mo_obj_arg->ioa_obj_ref); + D_ASSERT(daos_handle_is_valid(mrone->mo_obj_arg->ioa_oh)); + oh = mrone->mo_obj_arg->ioa_oh; + mrone->mo_oca = mrone->mo_obj_arg->ioa_oca; if (DAOS_FAIL_CHECK(DAOS_REBUILD_TGT_NOSPACE)) - D_GOTO(obj_close, rc = -DER_NOSPACE); + D_GOTO(out, rc = -DER_NOSPACE); if (DAOS_FAIL_CHECK(DAOS_REBUILD_NO_REBUILD)) { D_DEBUG(DB_REBUILD, DF_RB ": fault injected, disable rebuild\n", DP_RB_MPT(tls)); - D_GOTO(obj_close, rc); - } - - dsc_cont_get_props(coh, &props); - rc = dsc_obj_id2oc_attr(mrone->mo_oid.id_pub, &props, &mrone->mo_oca); - if (rc) { - D_ERROR(DF_RB ": unknown object class: %u\n", DP_RB_MPT(tls), - daos_obj_id2class(mrone->mo_oid.id_pub)); - D_GOTO(obj_close, rc); + D_GOTO(out, rc); } /* punch the object */ @@ -1728,21 +1747,21 @@ migrate_dkey(struct migrate_pool_tls *tls, struct migrate_one *mrone, if (rc) { DL_ERROR(rc, DF_RB ": " DF_UOID " punch obj failed", DP_RB_MPT(tls), DP_UOID(mrone->mo_oid)); - D_GOTO(obj_close, rc); + D_GOTO(out, rc); } } rc = migrate_punch(tls, mrone, cont); if (rc) - D_GOTO(obj_close, rc); + D_GOTO(out, rc); if (data_size == 0) { D_DEBUG(DB_REBUILD, DF_RB ": empty mrone %p\n", DP_RB_MPT(tls), mrone); - D_GOTO(obj_close, rc); + D_GOTO(out, rc); } if (DAOS_FAIL_CHECK(DAOS_REBUILD_UPDATE_FAIL)) - D_GOTO(obj_close, rc = -DER_INVAL); + D_GOTO(out, rc = -DER_INVAL); if (mrone->mo_iods[0].iod_type == DAOS_IOD_SINGLE) rc = migrate_fetch_update_single(mrone, oh, cont); @@ -1758,9 +1777,8 @@ migrate_dkey(struct migrate_pool_tls *tls, struct migrate_one *mrone, tls->mpt_rec_count += mrone->mo_rec_num; tls->mpt_size += mrone->mo_size; -obj_close: - dsc_obj_close(oh); -cont_put: + +out: if (cont != NULL) ds_cont_child_put(cont); return rc; @@ -1810,6 +1828,8 @@ migrate_one_destroy(struct migrate_one *mrone) if (mrone->mo_iods_csums) D_FREE(mrone->mo_iods_csums); + if (mrone->mo_obj_arg) + migrate_obj_put(mrone->mo_obj_arg); if (mrone->mo_tls) migrate_pool_tls_put(mrone->mo_tls); @@ -1940,7 +1960,7 @@ migrate_res_release(struct migrate_pool_tls *tls, int res_type, long units) static void migrate_one_ult(void *arg) { - struct migrate_one *mrone = arg; + struct migrate_one *mrone = arg; struct migrate_pool_tls *tls; daos_size_t data_size; int rc = 0; @@ -2780,6 +2800,8 @@ migrate_start_ult(struct enum_unpack_arg *unpack_arg) break; d_list_del_init(&mrone->mo_list); + migrate_obj_get(arg); + mrone->mo_obj_arg = arg; migrate_pool_tls_get(tls); mrone->mo_tls = tls; @@ -2815,13 +2837,10 @@ migrate_one_epoch_object(daos_epoch_range_t *epr, struct migrate_pool_tls *tls, daos_key_desc_t kds[KDS_NUM] = {0}; d_iov_t csum = {0}; d_iov_t *p_csum; - uint8_t stack_csum_buf[CSUM_BUF_SIZE] = {0}; - struct cont_props props; + uint8_t stack_csum_buf[CSUM_BUF_SIZE] = {0}; struct enum_unpack_arg unpack_arg = { 0 }; d_iov_t iov = { 0 }; - d_sg_list_t sgl = { 0 }; - daos_handle_t coh = DAOS_HDL_INVAL; - daos_handle_t oh = DAOS_HDL_INVAL; + d_sg_list_t sgl = {0}; uint32_t minimum_nr; uint32_t enum_flags; uint32_t num; @@ -2838,44 +2857,15 @@ migrate_one_epoch_object(daos_epoch_range_t *epr, struct migrate_pool_tls *tls, D_ASSERT(dss_get_module_info()->dmi_xs_id != 0); - rc = dsc_pool_open(tls->mpt_pool_uuid, tls->mpt_poh_uuid, 0, - NULL, tls->mpt_pool->spc_pool->sp_map, - &tls->mpt_svc_list, &tls->mpt_pool_hdl); - if (rc) { - DL_ERROR(rc, DF_RB ": dsc_pool_open failed", DP_RB_MPT(tls)); - D_GOTO(out, rc); - } - - rc = migrate_cont_open(tls, arg->cont_uuid, 0, &coh); - if (rc) { - DL_ERROR(rc, DF_RB ": migrate_cont_open failed", DP_RB_MPT(tls)); - D_GOTO(out, rc); - } - - /* Only open with RW flag, reintegrating flag will be set, which is needed - * during unpack_cb to check if parity shard alive. - */ - rc = dsc_obj_open(coh, arg->oid.id_pub, DAOS_OO_RO, &oh); - if (rc) { - DL_ERROR(rc, DF_RB ": dsc_obj_open failed", DP_RB_MPT(tls)); - D_GOTO(out, rc); - } - + D_ASSERT(daos_handle_is_valid(arg->ioa_oh)); unpack_arg.arg = arg; unpack_arg.epr = *epr; - unpack_arg.oh = oh; + unpack_arg.oh = arg->ioa_oh; unpack_arg.version = tls->mpt_version; D_INIT_LIST_HEAD(&unpack_arg.merge_list); buf = stack_buf; buf_len = ITER_BUF_SIZE; - - dsc_cont_get_props(coh, &props); - rc = dsc_obj_id2oc_attr(arg->oid.id_pub, &props, &unpack_arg.oc_attr); - if (rc) { - DL_ERROR(rc, DF_RB ": unknown object class: %u", DP_RB_MPT(tls), - daos_obj_id2class(arg->oid.id_pub)); - D_GOTO(out_obj, rc); - } + unpack_arg.oc_attr = arg->ioa_oca; memset(&anchor, 0, sizeof(anchor)); memset(&akey_anchor, 0, sizeof(akey_anchor)); @@ -2927,9 +2917,8 @@ migrate_one_epoch_object(daos_epoch_range_t *epr, struct migrate_pool_tls *tls, daos_anchor_set_flags(&dkey_anchor, enum_flags); num = KDS_NUM; - rc = dsc_obj_list_obj(oh, epr, NULL, NULL, NULL, - &num, kds, &sgl, &anchor, - &dkey_anchor, &akey_anchor, p_csum); + rc = dsc_obj_list_obj(arg->ioa_oh, epr, NULL, NULL, NULL, &num, kds, &sgl, &anchor, + &dkey_anchor, &akey_anchor, p_csum); if (rc == -DER_KEY2BIG) { D_DEBUG(DB_REBUILD, @@ -3068,9 +3057,7 @@ migrate_one_epoch_object(daos_epoch_range_t *epr, struct migrate_pool_tls *tls, if (csum.iov_buf != NULL && csum.iov_buf != stack_csum_buf) D_FREE(csum.iov_buf); -out_obj: - dsc_obj_close(oh); -out: + D_DEBUG(DB_REBUILD, DF_RB ": obj " DF_UOID " shard %u eph " DF_U64 "-" DF_U64 ": " DF_RC "\n", DP_RB_MPT(tls), DP_UOID(arg->oid), arg->shard, epr->epr_lo, epr->epr_hi, DP_RC(rc)); @@ -3166,16 +3153,6 @@ migrate_obj_punch(struct iter_obj_arg *arg) arg->tgt_idx, MIGRATE_STACK_SIZE); } -static void -obj_iter_arg_free(struct iter_obj_arg *arg) -{ - if (arg->pool_tls) - migrate_pool_tls_put(arg->pool_tls); - if (arg->snaps) - D_FREE(arg->snaps); - D_FREE(arg); -} - /** * This ULT manages migration one object ID for one container. It does not do * the data migration itself - instead it iterates akeys/dkeys as a client and @@ -3196,9 +3173,12 @@ migrate_obj_ult(void *data) struct migrate_pool_tls *tls = NULL; daos_epoch_range_t epr; daos_epoch_t stable_epoch = 0; + daos_handle_t coh = DAOS_HDL_INVAL; + struct cont_props props; int i; int rc = 0; + migrate_obj_get(arg); tls = arg->pool_tls; if (tls->mpt_fini) { D_WARN("someone aborted the rebuild " DF_UUID "\n", DP_UUID(arg->pool_uuid)); @@ -3250,6 +3230,33 @@ migrate_obj_ult(void *data) ds_cont_child_put(cont_child); } + rc = dsc_pool_open(tls->mpt_pool_uuid, tls->mpt_poh_uuid, 0, NULL, + tls->mpt_pool->spc_pool->sp_map, &tls->mpt_svc_list, &tls->mpt_pool_hdl); + if (rc) { + DL_ERROR(rc, DF_RB ": dsc_pool_open failed", DP_RB_MPT(tls)); + D_GOTO(out, rc); + } + + rc = migrate_cont_open(tls, arg->cont_uuid, 0, &coh); + if (rc) { + DL_ERROR(rc, DF_RB ": migrate_cont_open failed", DP_RB_MPT(tls)); + D_GOTO(out, rc); + } + + rc = dsc_obj_open(coh, arg->oid.id_pub, DAOS_OO_RO, &arg->ioa_oh); + if (rc) { + DL_ERROR(rc, DF_RB ": dsc_obj_open failed", DP_RB_MPT(tls)); + D_GOTO(out, rc); + } + + dsc_cont_get_props(coh, &props); + rc = dsc_obj_id2oc_attr(arg->oid.id_pub, &props, &arg->ioa_oca); + if (rc) { + DL_ERROR(rc, DF_RB ": unknown object class: %u", DP_RB_MPT(tls), + daos_obj_id2class(arg->oid.id_pub)); + D_GOTO(out, rc); + } + for (i = 0; i < arg->snap_cnt; i++) { daos_epoch_t lower_epoch = 0; @@ -3325,7 +3332,7 @@ migrate_obj_ult(void *data) tls->mpt_tgt_dkey_ult_cnt, tls->mpt_obj_count, DP_RC(rc)); free_notls: migrate_res_release(tls, MIGR_OBJ, 1); - obj_iter_arg_free(arg); + migrate_obj_put(arg); } struct migrate_obj_val { @@ -3357,6 +3364,8 @@ migrate_one_object(daos_unit_oid_t oid, daos_epoch_t eph, daos_epoch_t punched_e migrate_pool_tls_get(tls); obj_arg->pool_tls = tls; obj_arg->oid = oid; + obj_arg->ioa_oh = DAOS_HDL_INVAL; + obj_arg->ioa_obj_ref = 0; obj_arg->epoch = eph; obj_arg->shard = shard; obj_arg->punched_epoch = punched_eph; From 29279295a5511fcd10b0ee378f6aa2ffd38583c4 Mon Sep 17 00:00:00 2001 From: Nasf-Fan Date: Tue, 13 Jan 2026 21:19:09 +0800 Subject: [PATCH 127/253] DAOS-18381 ddb: properly transfer ddb cmdline options (#17354) The db_path maybe not contained inside the vos_path parameter, especially under md-on-ssd mode. Related go interfaces need to properly transfer cmdline option db_path from control plane to the lower ddb utils. The patch also adds more check for the vos pool open and close status before real VOS operation. Signed-off-by: Fan Yong --- src/control/cmd/ddb/commands_wrapper.go | 13 ++- src/control/cmd/ddb/ddb_commands.go | 9 ++- src/control/cmd/ddb/main.go | 26 +++--- src/utils/ddb/ddb.h | 3 +- src/utils/ddb/ddb_commands.c | 102 ++++++++++++++---------- src/utils/ddb/ddb_parse.h | 4 +- src/utils/ddb/ddb_vos.c | 9 ++- src/utils/ddb/ddb_vos.h | 4 +- src/utils/ddb/tests/ddb_test_driver.c | 4 +- 9 files changed, 104 insertions(+), 70 deletions(-) diff --git a/src/control/cmd/ddb/commands_wrapper.go b/src/control/cmd/ddb/commands_wrapper.go index e393b7b7b47..1ba453d082d 100644 --- a/src/control/cmd/ddb/commands_wrapper.go +++ b/src/control/cmd/ddb/commands_wrapper.go @@ -1,6 +1,6 @@ // // (C) Copyright 2022-2024 Intel Corporation. -// (C) Copyright 2025 Hewlett Packard Enterprise Development LP. +// (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP. // (C) Copyright 2025 Vdura Inc. // // SPDX-License-Identifier: BSD-2-Clause-Patent @@ -78,13 +78,12 @@ func ddbLs(ctx *DdbContext, path string, recursive bool, details bool) error { return daosError(C.ddb_run_ls(&ctx.ctx, &options)) } -func ddbOpen(ctx *DdbContext, path string, db_path string, write_mode bool) error { +func ddbOpen(ctx *DdbContext, path string, write_mode bool) error { /* Set up the options */ options := C.struct_open_options{} options.path = C.CString(path) defer freeString(options.path) - options.db_path = C.CString(db_path) - defer freeString(options.db_path) + options.db_path = ctx.ctx.dc_db_path options.write_mode = C.bool(write_mode) /* Run the c code command */ return daosError(C.ddb_run_open(&ctx.ctx, &options)) @@ -232,13 +231,12 @@ func ddbDtxActAbort(ctx *DdbContext, path string, dtx_id string) error { return daosError(C.ddb_run_dtx_act_abort(&ctx.ctx, &options)) } -func ddbFeature(ctx *DdbContext, path, db_path, enable, disable string, show bool) error { +func ddbFeature(ctx *DdbContext, path, enable, disable string, show bool) error { /* Set up the options */ options := C.struct_feature_options{} options.path = C.CString(path) defer freeString(options.path) - options.db_path = C.CString(db_path) - defer freeString(options.db_path) + options.db_path = ctx.ctx.dc_db_path if enable != "" { err := daosError(C.ddb_feature_string2flags(&ctx.ctx, C.CString(enable), &options.set_compat_flags, &options.set_incompat_flags)) @@ -263,6 +261,7 @@ func ddbRmPool(ctx *DdbContext, path string) error { options := C.struct_rm_pool_options{} options.path = C.CString(path) defer freeString(options.path) + options.db_path = ctx.ctx.dc_db_path /* Run the c code command */ return daosError(C.ddb_run_rm_pool(&ctx.ctx, &options)) } diff --git a/src/control/cmd/ddb/ddb_commands.go b/src/control/cmd/ddb/ddb_commands.go index aa8bb7f968a..7273ac94b52 100644 --- a/src/control/cmd/ddb/ddb_commands.go +++ b/src/control/cmd/ddb/ddb_commands.go @@ -1,7 +1,7 @@ // // (C) Copyright 2022-2024 Intel Corporation. // (C) Copyright 2025 Vdura Inc. -// (C) Copyright 2025 Hewlett Packard Enterprise Development LP. +// (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -51,7 +51,7 @@ pool shard. Part of the path is used to determine what the pool uuid is.`, a.String("path", "Path to the vos file to open.") }, Run: func(c *grumble.Context) error { - return ddbOpen(ctx, c.Args.String("path"), c.Flags.String("db_path"), c.Flags.Bool("write_mode")) + return ddbOpen(ctx, c.Args.String("path"), c.Flags.Bool("write_mode")) }, Completer: openCompleter, }) @@ -319,7 +319,7 @@ the path must include the extent, otherwise, it must not.`, a.String("path", "Optional, Path to the vos file", grumble.Default("")) }, Run: func(c *grumble.Context) error { - return ddbFeature(ctx, c.Args.String("path"), c.Flags.String("db_path"), c.Flags.String("enable"), c.Flags.String("disable"), c.Flags.Bool("show")) + return ddbFeature(ctx, c.Args.String("path"), c.Flags.String("enable"), c.Flags.String("disable"), c.Flags.Bool("show")) }, Completer: featureCompleter, }) @@ -330,6 +330,9 @@ the path must include the extent, otherwise, it must not.`, Help: "Remove a vos pool.", LongHelp: "", HelpGroup: "vos", + Flags: func(f *grumble.Flags) { + f.String("p", "db_path", "", "Path to the sys db.") + }, Args: func(a *grumble.Args) { a.String("path", "Optional, Path to the vos file", grumble.Default("")) }, diff --git a/src/control/cmd/ddb/main.go b/src/control/cmd/ddb/main.go index 7eb6a9cc7ce..e608cf6d20c 100644 --- a/src/control/cmd/ddb/main.go +++ b/src/control/cmd/ddb/main.go @@ -260,6 +260,10 @@ ddb "" ls --help return nil } + if opts.Args.RunCmd != "" && opts.CmdFile != "" { + return errors.New("Cannot use both command file and a command string") + } + if opts.Debug { log.WithLogLevel(logging.LogLevelDebug) log.Debug("debug output enabled") @@ -272,26 +276,30 @@ ddb "" ls --help defer cleanup() app := createGrumbleApp(ctx) + if opts.SysdbPath != "" { + ctx.ctx.dc_db_path = C.CString(string(opts.SysdbPath)) + defer C.free(unsafe.Pointer(ctx.ctx.dc_db_path)) + } + if opts.Args.VosPath != "" { + ctx.ctx.dc_pool_path = C.CString(string(opts.Args.VosPath)) + defer C.free(unsafe.Pointer(ctx.ctx.dc_pool_path)) + if !strings.HasPrefix(string(opts.Args.RunCmd), "feature") && + !strings.HasPrefix(string(opts.Args.RunCmd), "open") && + !strings.HasPrefix(string(opts.Args.RunCmd), "close") && + !strings.HasPrefix(string(opts.Args.RunCmd), "prov_mem") && + !strings.HasPrefix(string(opts.Args.RunCmd), "smd_sync") && !strings.HasPrefix(string(opts.Args.RunCmd), "rm_pool") && !strings.HasPrefix(string(opts.Args.RunCmd), "dev_list") && !strings.HasPrefix(string(opts.Args.RunCmd), "dev_replace") { log.Debugf("Connect to path: %s\n", opts.Args.VosPath) - if err := ddbOpen(ctx, string(opts.Args.VosPath), string(opts.SysdbPath), opts.WriteMode); err != nil { + if err := ddbOpen(ctx, string(opts.Args.VosPath), bool(opts.WriteMode)); err != nil { return errors.Wrapf(err, "Error opening path: %s", opts.Args.VosPath) } } } - if opts.Args.RunCmd != "" && opts.CmdFile != "" { - return errors.New("Cannot use both command file and a command string") - } - - if opts.Args.VosPath != "" { - ctx.ctx.dc_pool_path = C.CString(string(opts.Args.VosPath)) - defer C.free(unsafe.Pointer(ctx.ctx.dc_pool_path)) - } if opts.Args.RunCmd != "" || opts.CmdFile != "" { // Non-interactive mode if opts.Args.RunCmd != "" { diff --git a/src/utils/ddb/ddb.h b/src/utils/ddb/ddb.h index 3bc63c8f40f..818881cd94a 100644 --- a/src/utils/ddb/ddb.h +++ b/src/utils/ddb/ddb.h @@ -1,6 +1,6 @@ /** * (C) Copyright 2022-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP. + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP. * (C) Copyright 2025 Vdura Inc. * * SPDX-License-Identifier: BSD-2-Clause-Patent @@ -204,6 +204,7 @@ struct feature_options { struct rm_pool_options { const char *path; + const char *db_path; }; struct dev_list_options { diff --git a/src/utils/ddb/ddb_commands.c b/src/utils/ddb/ddb_commands.c index 705c0eaabda..ed3edef9121 100644 --- a/src/utils/ddb/ddb_commands.c +++ b/src/utils/ddb/ddb_commands.c @@ -1,7 +1,7 @@ /** * (C) Copyright 2022-2024 Intel Corporation. * (C) Copyright 2025 Vdura Inc. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP. + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -27,6 +27,24 @@ #define ilog_path_required_error_message "Path to object, dkey, or akey required\n" #define error_msg_write_mode_only "Can only modify the VOS tree in 'write mode'\n" +/* clang-format off */ +#define DDB_POOL_SHOULD_OPEN(ctx) \ + do { \ + if (daos_handle_is_inval((ctx)->dc_poh)) { \ + ddb_error(ctx, "Cannot operate on a closed pool. Open it firstly.\n"); \ + return -DER_NO_HDL; \ + } \ + } while (0) + +#define DDB_POOL_SHOULD_CLOSE(ctx) \ + do { \ + if (daos_handle_is_valid((ctx)->dc_poh)) { \ + ddb_error(ctx, "Cannot operate on an opened pool. Close it firstly.\n"); \ + return -DER_BUSY; \ + } \ + } while (0) +/* clang-format on */ + int ddb_run_version(struct ddb_ctx *ctx) { @@ -62,10 +80,8 @@ ddb_pool_is_open(struct ddb_ctx *ctx) int ddb_run_open(struct ddb_ctx *ctx, struct open_options *opt) { - if (ddb_pool_is_open(ctx)) { - ddb_error(ctx, "Must close pool before can open another\n"); - return -DER_EXIST; - } + DDB_POOL_SHOULD_CLOSE(ctx); + ctx->dc_write_mode = opt->write_mode; return dv_pool_open(opt->path, opt->db_path, &ctx->dc_poh, 0); } @@ -75,10 +91,8 @@ ddb_run_close(struct ddb_ctx *ctx) { int rc; - if (!ddb_pool_is_open(ctx)) { - ddb_error(ctx, "No pool open to close\n"); + if (!ddb_pool_is_open(ctx)) return 0; - } rc = dv_pool_close(ctx->dc_poh); ctx->dc_poh = DAOS_HDL_INVAL; @@ -217,12 +231,9 @@ ddb_run_ls(struct ddb_ctx *ctx, struct ls_options *opt) struct dv_tree_path vtp; struct ls_ctx lsctx = {0}; - if (daos_handle_is_inval(ctx->dc_poh)) { - ddb_error(ctx, "Not connected to a pool. Use 'open' to connect to a pool.\n"); - return -DER_NONEXIST; - } - rc = init_path(ctx, opt->path, &itp); + DDB_POOL_SHOULD_OPEN(ctx); + rc = init_path(ctx, opt->path, &itp); if (!SUCCESS(rc)) return rc; @@ -266,8 +277,9 @@ ddb_run_superblock_dump(struct ddb_ctx *ctx) { int rc; - rc = dv_superblock(ctx->dc_poh, print_superblock_cb, ctx); + DDB_POOL_SHOULD_OPEN(ctx); + rc = dv_superblock(ctx->dc_poh, print_superblock_cb, ctx); if (rc == -DER_DF_INVAL) ddb_error(ctx, "Error with pool superblock"); @@ -331,6 +343,8 @@ ddb_run_value_dump(struct ddb_ctx *ctx, struct value_dump_options *opt) dv_dump_value_cb cb = NULL; int rc; + DDB_POOL_SHOULD_OPEN(ctx); + if (!opt->path) { ddb_error(ctx, "A VOS path to dump is required.\n"); return -DER_INVAL; @@ -383,6 +397,8 @@ ddb_run_ilog_dump(struct ddb_ctx *ctx, struct ilog_dump_options *opt) daos_handle_t coh; int rc; + DDB_POOL_SHOULD_OPEN(ctx); + if (!opt->path) { ddb_error(ctx, ilog_path_required_error_message); return -DER_INVAL; @@ -460,6 +476,8 @@ ddb_run_dtx_dump(struct ddb_ctx *ctx, struct dtx_dump_options *opt) bool both = !(opt->committed ^ opt->active); struct dtx_cb_args args = {.ctx = ctx, .entry_count = 0}; + DDB_POOL_SHOULD_OPEN(ctx); + rc = init_path(ctx, opt->path, &itp); if (!SUCCESS(rc)) return rc; @@ -512,6 +530,8 @@ ddb_run_rm(struct ddb_ctx *ctx, struct rm_options *opt) struct dv_tree_path vtp; int rc; + DDB_POOL_SHOULD_OPEN(ctx); + if (!ctx->dc_write_mode) { ddb_error(ctx, error_msg_write_mode_only); return -DER_INVAL; @@ -549,6 +569,8 @@ ddb_run_value_load(struct ddb_ctx *ctx, struct value_load_options *opt) size_t file_size; int rc; + DDB_POOL_SHOULD_OPEN(ctx); + if (!ctx->dc_write_mode) { ddb_error(ctx, error_msg_write_mode_only); return -DER_INVAL; @@ -616,6 +638,8 @@ process_ilog_op(struct ddb_ctx *ctx, char *path, enum ddb_ilog_op op) daos_handle_t coh = {0}; int rc; + DDB_POOL_SHOULD_OPEN(ctx); + if (!ctx->dc_write_mode) { ddb_error(ctx, error_msg_write_mode_only); return -DER_INVAL; @@ -686,6 +710,8 @@ ddb_run_dtx_cmt_clear(struct ddb_ctx *ctx, struct dtx_cmt_clear_options *opt) daos_handle_t coh = {0}; int rc; + DDB_POOL_SHOULD_OPEN(ctx); + if (!ctx->dc_write_mode) { ddb_error(ctx, error_msg_write_mode_only); return -DER_INVAL; @@ -764,10 +790,7 @@ ddb_run_smd_sync(struct ddb_ctx *ctx, struct smd_sync_options *opt) char db_path[DDB_PATH_MAX] = DEFAULT_DB_PATH; int rc; - if (daos_handle_is_valid(ctx->dc_poh)) { - ddb_print(ctx, "Close pool connection before attempting to sync smd\n"); - return -DER_INVAL; - } + DDB_POOL_SHOULD_CLOSE(ctx); if (opt->nvme_conf != NULL) { if (strlen(opt->nvme_conf) == 0 || strlen(opt->nvme_conf) >= DDB_PATH_MAX) { @@ -816,6 +839,8 @@ ddb_run_vea_dump(struct ddb_ctx *ctx) struct dump_vea_cb_args args = {.dva_ctx = ctx, .dva_count = 0}; int rc; + DDB_POOL_SHOULD_OPEN(ctx); + rc = dv_enumerate_vea(ctx->dc_poh, dump_vea_cb, &args); ddb_printf(ctx, "Total Free Regions: %d\n", args.dva_count); @@ -894,6 +919,8 @@ ddb_run_vea_update(struct ddb_ctx *ctx, struct vea_update_options *opt) uint32_t blk_cnt; int rc; + DDB_POOL_SHOULD_OPEN(ctx); + if (!ctx->dc_write_mode) { ddb_error(ctx, error_msg_write_mode_only); return -DER_INVAL; @@ -983,6 +1010,8 @@ ddb_run_dtx_act_commit(struct ddb_ctx *ctx, struct dtx_act_options *opt) struct dtx_modify_args args = {0}; int rc; + DDB_POOL_SHOULD_OPEN(ctx); + if (!ctx->dc_write_mode) { ddb_error(ctx, error_msg_write_mode_only); return -DER_INVAL; @@ -1013,6 +1042,8 @@ ddb_run_dtx_act_abort(struct ddb_ctx *ctx, struct dtx_act_options *opt) struct dtx_modify_args args = {0}; int rc; + DDB_POOL_SHOULD_OPEN(ctx); + if (!ctx->dc_write_mode) { ddb_error(ctx, error_msg_write_mode_only); return -DER_INVAL; @@ -1115,12 +1146,9 @@ ddb_run_feature(struct ddb_ctx *ctx, struct feature_options *opt) int ddb_run_rm_pool(struct ddb_ctx *ctx, struct rm_pool_options *opt) { - if (ddb_pool_is_open(ctx)) { - ddb_error(ctx, "Must close pool before can open another\n"); - return -DER_BUSY; - } + DDB_POOL_SHOULD_CLOSE(ctx); - return dv_pool_destroy(opt->path); + return dv_pool_destroy(opt->path, opt->db_path); } #define DTI_ALL "all" @@ -1161,6 +1189,8 @@ ddb_run_dtx_act_discard_invalid(struct ddb_ctx *ctx, struct dtx_act_options *opt struct dtx_active_entry_discard_invalid_cb_arg bundle = {.ctx = ctx, .args = &args}; int rc; + DDB_POOL_SHOULD_OPEN(ctx); + if (!ctx->dc_write_mode) { ddb_error(ctx, error_msg_write_mode_only); return -DER_INVAL; @@ -1197,10 +1227,7 @@ ddb_run_dev_list(struct ddb_ctx *ctx, struct dev_list_options *opt) d_list_t dev_list; int rc, dev_cnt = 0; - if (daos_handle_is_valid(ctx->dc_poh)) { - ddb_print(ctx, "Close pool connection before attempting to list devices\n"); - return -DER_INVAL; - } + DDB_POOL_SHOULD_CLOSE(ctx); if (opt->db_path != NULL) { if (strlen(opt->db_path) == 0 || strlen(opt->db_path) >= DDB_PATH_MAX) { @@ -1240,10 +1267,7 @@ ddb_run_dev_replace(struct ddb_ctx *ctx, struct dev_replace_options *opt) uuid_t old_devid, new_devid; int rc; - if (daos_handle_is_valid(ctx->dc_poh)) { - ddb_print(ctx, "Close pool connection before attempting to replace device\n"); - return -DER_INVAL; - } + DDB_POOL_SHOULD_CLOSE(ctx); if (opt->db_path != NULL) { if (strlen(opt->db_path) == 0 || strlen(opt->db_path) >= DDB_PATH_MAX) { @@ -1591,11 +1615,7 @@ ddb_run_dtx_stat(struct ddb_ctx *ctx, struct dtx_stat_options *opt) struct vos_iter_anchors anchors = {0}; int rc; - if (daos_handle_is_inval(ctx->dc_poh)) { - ddb_error(ctx, "Not connected to a pool. Use 'open' to connect to a pool.\n"); - rc = -DER_NONEXIST; - goto done; - } + DDB_POOL_SHOULD_OPEN(ctx); args.ctx = ctx; args.opt = opt; @@ -1723,18 +1743,14 @@ ddb_run_dtx_aggr(struct ddb_ctx *ctx, struct dtx_aggr_options *opt) struct vos_iter_anchors anchors = {0}; int rc; + DDB_POOL_SHOULD_OPEN(ctx); + if (!ctx->dc_write_mode) { ddb_error(ctx, error_msg_write_mode_only); rc = -DER_INVAL; goto done; } - if (daos_handle_is_inval(ctx->dc_poh)) { - ddb_error(ctx, "Not connected to a pool. Use 'open' to connect to a pool.\n"); - rc = -DER_NONEXIST; - goto done; - } - switch (opt->format) { case DDB_DTX_AGGR_NOW: args.cmt_time = NULL; @@ -1774,6 +1790,8 @@ ddb_run_prov_mem(struct ddb_ctx *ctx, struct prov_mem_options *opt) { int rc = 0; + DDB_POOL_SHOULD_CLOSE(ctx); + if (opt->db_path == NULL || strlen(opt->db_path) == 0 || strlen(opt->db_path) >= DDB_PATH_MAX) { ddb_errorf(ctx, "db_path '%s' either too short (==0) or too long (>=%d).\n", diff --git a/src/utils/ddb/ddb_parse.h b/src/utils/ddb/ddb_parse.h index df5d43771db..439791823a2 100644 --- a/src/utils/ddb/ddb_parse.h +++ b/src/utils/ddb/ddb_parse.h @@ -1,6 +1,6 @@ /** * (C) Copyright 2019-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -23,7 +23,7 @@ struct program_args { bool pa_write_mode; bool pa_get_help; }; -#define DB_PATH_LEN 64 +#define DB_PATH_LEN 256 struct vos_file_parts { char vf_db_path[DB_PATH_LEN]; uuid_t vf_pool_uuid; diff --git a/src/utils/ddb/ddb_vos.c b/src/utils/ddb/ddb_vos.c index 22160ae0e8f..1fd2e9893f7 100644 --- a/src/utils/ddb/ddb_vos.c +++ b/src/utils/ddb/ddb_vos.c @@ -1,7 +1,7 @@ /** * (C) Copyright 2022-2025 Intel Corporation. * (C) Copyright 2025 Vdura Inc. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP. + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -61,7 +61,7 @@ dv_pool_open(const char *path, const char *db_path, daos_handle_t *poh, uint32_t } int -dv_pool_destroy(const char *path) +dv_pool_destroy(const char *path, const char *db_path) { struct vos_file_parts path_parts = {0}; int rc, flags = 0; @@ -70,6 +70,11 @@ dv_pool_destroy(const char *path) if (!SUCCESS(rc)) return rc; + if (db_path != NULL && strnlen(db_path, PATH_MAX) != 0) { + memset(path_parts.vf_db_path, 0, sizeof(path_parts.vf_db_path)); + strncpy(path_parts.vf_db_path, db_path, sizeof(path_parts.vf_db_path) - 1); + } + rc = vos_self_init(path_parts.vf_db_path, true, path_parts.vf_target_idx); if (!SUCCESS(rc)) { D_ERROR("Failed to initialize VOS with path '%s': " DF_RC "\n", diff --git a/src/utils/ddb/ddb_vos.h b/src/utils/ddb/ddb_vos.h index 3303c643340..7d1da8900fc 100644 --- a/src/utils/ddb/ddb_vos.h +++ b/src/utils/ddb/ddb_vos.h @@ -1,6 +1,6 @@ /** * (C) Copyright 2022-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP. + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP. * (C) Copyright 2025 Vdura Inc. * * SPDX-License-Identifier: BSD-2-Clause-Patent @@ -55,7 +55,7 @@ int dv_pool_open(const char *path, const char *db_path, daos_handle_t *poh, uint32_t flags); int dv_pool_close(daos_handle_t poh); int -dv_pool_destroy(const char *path); +dv_pool_destroy(const char *path, const char *db_path); /* Update vos pool flags */ int diff --git a/src/utils/ddb/tests/ddb_test_driver.c b/src/utils/ddb/tests/ddb_test_driver.c index 07e0b0c8694..89746bd43c5 100644 --- a/src/utils/ddb/tests/ddb_test_driver.c +++ b/src/utils/ddb/tests/ddb_test_driver.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2022-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -314,7 +314,7 @@ ddb_teardown_vos(void **state) } if (tctx->dvt_special_pool_destroy) { - rc = dv_pool_destroy(tctx->dvt_pmem_file); + rc = dv_pool_destroy(tctx->dvt_pmem_file, NULL); } else { vos_self_init("/mnt/daos", false, 0); assert_success(vos_pool_destroy(tctx->dvt_pmem_file, tctx->dvt_pool_uuid)); From 7285bf7af6dc836a413e45c9ac2be62b7e859c50 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 13 Jan 2026 08:10:41 -0800 Subject: [PATCH 128/253] DAOS-18406 java: Bump log4jfrom 2.17.1 to 2.25.3 (#17298) Bumps org.apache.logging.log4j:log4j-core from 2.17.1 to 2.25.3. Signed-off-by: dependabot[bot] --- src/client/java/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/client/java/pom.xml b/src/client/java/pom.xml index d01d4293704..bef30ccc4fc 100644 --- a/src/client/java/pom.xml +++ b/src/client/java/pom.xml @@ -16,7 +16,7 @@ 1.4.0 5.4.0 1.7.25 - 2.17.1 + 2.25.3 true 1.8 1.8 From 80040ac3055ed15ad17f1d21fe97dfd3e9663254 Mon Sep 17 00:00:00 2001 From: Nasf-Fan Date: Wed, 14 Jan 2026 21:48:34 +0800 Subject: [PATCH 129/253] DAOS-18441 chk: destroy check instance after check cleanup (#17364) Move the logic for destroying check instance from CHK sm_cleanup() interface to CHK sm_fini() interface. That will avoid accessing freed memory by RPC race during system stop. Signed-off-by: Fan Yong --- src/chk/chk_common.c | 23 ++++++++--- src/chk/chk_engine.c | 90 +++++++++++++++++++++++++++--------------- src/chk/chk_internal.h | 25 +++++++++--- src/chk/chk_leader.c | 88 ++++++++++++++++++++++++++--------------- src/chk/chk_srv.c | 33 ++++++++++------ src/chk/chk_vos.c | 6 +-- 6 files changed, 176 insertions(+), 89 deletions(-) diff --git a/src/chk/chk_common.c b/src/chk/chk_common.c index 0e270926e79..460c37a50ff 100644 --- a/src/chk/chk_common.c +++ b/src/chk/chk_common.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2022-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -1234,6 +1234,20 @@ chk_ins_merge_info(uint32_t *status_dst, uint32_t status_src, uint32_t *phase_ds *status_dst = status_src; } +void +chk_ins_cleanup(struct chk_instance *ins) +{ + chk_stop_sched(ins); + ins->ci_inited = 0; + + chk_iv_ns_cleanup(&ins->ci_iv_ns); + + if (ins->ci_iv_group != NULL) { + crt_group_secondary_destroy(ins->ci_iv_group); + ins->ci_iv_group = NULL; + } +} + int chk_ins_init(struct chk_instance **p_ins) { @@ -1300,11 +1314,8 @@ chk_ins_fini(struct chk_instance **p_ins) if (ins == NULL) return; - ins->ci_inited = 0; - chk_iv_ns_cleanup(&ins->ci_iv_ns); - - if (ins->ci_iv_group != NULL) - crt_group_secondary_destroy(ins->ci_iv_group); + D_ASSERT(ins->ci_iv_ns == NULL); + D_ASSERT(ins->ci_iv_group == NULL); d_rank_list_free(ins->ci_ranks); D_ASSERT(d_list_empty(&ins->ci_dead_ranks)); diff --git a/src/chk/chk_engine.c b/src/chk/chk_engine.c index 85af34498c5..c301d55a184 100644 --- a/src/chk/chk_engine.c +++ b/src/chk/chk_engine.c @@ -2345,6 +2345,7 @@ chk_engine_start(uint64_t gen, uint32_t rank_nr, d_rank_t *ranks, uint32_t polic if (rc != 0) goto out_stop; + ins->ci_pause = 0; ins->ci_sched_running = 1; rc = dss_ult_create(chk_engine_sched, ins, DSS_XS_SYS, 0, DSS_DEEP_STACK_SZ, @@ -2407,6 +2408,8 @@ chk_engine_stop(uint64_t gen, int pool_nr, uuid_t pools[], uint32_t *flags) int i; int active = false; + CHK_IS_READY(ins); + if (gen != 0 && gen != cbk->cb_gen) D_GOTO(log, rc = -DER_NOTAPPLICABLE); @@ -2596,6 +2599,8 @@ chk_engine_query(uint64_t gen, int pool_nr, uuid_t pools[], uint32_t *ins_status int rc = 0; int i; + CHK_IS_READY(ins); + /* * We will support to check query from new check leader under the case of old leader * crashed, that may have different check generation. So do not check "cb_gen" here, @@ -2648,6 +2653,8 @@ chk_engine_mark_rank_dead(uint64_t gen, d_rank_t rank, uint32_t version) d_rank_list_t *rank_list = NULL; int rc = 0; + CHK_IS_READY(ins); + if (cbk->cb_gen != gen) D_GOTO(out, rc = -DER_NOTAPPLICABLE); @@ -2745,6 +2752,8 @@ chk_engine_act(uint64_t gen, uint64_t seq, uint32_t act) struct chk_instance *ins = chk_engine; int rc; + CHK_IS_READY(ins); + if (ins->ci_bk.cb_gen != gen) D_GOTO(out, rc = -DER_NOTAPPLICABLE); @@ -2874,6 +2883,8 @@ chk_engine_cont_list(uint64_t gen, uuid_t pool_uuid, uuid_t **conts, uint32_t *c int i = 0; int rc = 0; + CHK_IS_READY(ins); + if (cbk->cb_gen != gen) D_GOTO(out, rc = -DER_NOTAPPLICABLE); @@ -2929,6 +2940,8 @@ chk_engine_pool_start(uint64_t gen, uuid_t uuid, uint32_t phase, uint32_t flags) d_iov_t kiov; int rc; + CHK_IS_READY(ins); + if (ins->ci_bk.cb_ins_status != CHK__CHECK_INST_STATUS__CIS_RUNNING) D_GOTO(out, rc = -DER_SHUTDOWN); @@ -3047,6 +3060,8 @@ chk_engine_pool_mbs(uint64_t gen, uuid_t uuid, uint32_t phase, const char *label int rc; int i; + CHK_IS_READY(ins); + if (ins->ci_bk.cb_ins_status != CHK__CHECK_INST_STATUS__CIS_RUNNING) D_GOTO(out, rc = -DER_SHUTDOWN); @@ -3158,6 +3173,8 @@ chk_engine_set_policy(uint64_t gen, uint32_t policy_nr, struct chk_policy *polic struct chk_property *prop = &ins->ci_prop; int rc = 0; + CHK_IS_READY(ins); + /* Do nothing if no (engine) check instance is running. */ if (cbk->cb_magic != CHK_BK_MAGIC_ENGINE || cbk->cb_gen != gen || cbk->cb_ins_status != CHK__CHECK_INST_STATUS__CIS_RUNNING) @@ -3295,6 +3312,8 @@ chk_engine_notify(struct chk_iv *iv) struct chk_pool_rec *cpr; int rc = 0; + CHK_IS_READY(ins); + if (cbk->cb_gen != iv->ci_gen) D_GOTO(out, rc = -DER_NOTAPPLICABLE); @@ -3512,31 +3531,16 @@ chk_engine_rejoin(void *args) DF_ENGINE" rejoin on rank %u with iv "DF_UUIDF": "DF_RC"\n", DP_ENGINE(ins), myrank, DP_UUID(cbk->cb_iv_uuid), DP_RC(rc)); ins->ci_rejoining = 0; - ins->ci_starting = 0; - ins->ci_inited = 1; -} - -void -chk_engine_pause(void) -{ - struct chk_instance *ins = chk_engine; - - chk_stop_sched(ins); - D_ASSERT(d_list_empty(&ins->ci_pool_list)); + ins->ci_starting = 0; } int -chk_engine_init(void) +chk_engine_setup(void) { - struct chk_traverse_pools_args ctpa = { 0 }; - struct chk_bookmark *cbk; - int rc; - - rc = chk_ins_init(&chk_engine); - if (rc != 0) - goto fini; - - chk_report_seq_init(chk_engine); + struct chk_instance *ins = chk_engine; + struct chk_bookmark *cbk = &ins->ci_bk; + struct chk_traverse_pools_args ctpa = {0}; + int rc; /* * DAOS global consistency check depends on all related engines' local @@ -3545,7 +3549,6 @@ chk_engine_init(void) * related local inconsistency firstly. */ - cbk = &chk_engine->ci_bk; rc = chk_bk_fetch_engine(cbk); if (rc == -DER_NONEXIST) goto prop; @@ -3569,37 +3572,46 @@ chk_engine_init(void) cbk->cb_time.ct_stop_time = time(NULL); rc = chk_bk_update_engine(cbk); if (rc != 0) { - D_ERROR(DF_ENGINE" failed to reset status as 'PAUSED': "DF_RC"\n", - DP_ENGINE(chk_engine), DP_RC(rc)); + D_ERROR(DF_ENGINE " failed to reset status as 'PAUSED': " DF_RC "\n", + DP_ENGINE(ins), DP_RC(rc)); goto fini; } ctpa.ctpa_gen = cbk->cb_gen; - ctpa.ctpa_ins = chk_engine; + ctpa.ctpa_ins = ins; rc = chk_traverse_pools(chk_pools_pause_cb, &ctpa); /* * Failed to reset pool status will not affect next check start, so it is not fatal, * but related check query result may be confused for user. */ if (rc != 0) - D_WARN(DF_ENGINE" failed to reset pools status as 'PAUSED': "DF_RC"\n", - DP_ENGINE(chk_engine), DP_RC(rc)); + D_WARN(DF_ENGINE " failed to reset pools status as 'PAUSED': " DF_RC "\n", + DP_ENGINE(ins), DP_RC(rc)); } prop: - rc = chk_prop_fetch(&chk_engine->ci_prop, &chk_engine->ci_ranks); + rc = chk_prop_fetch(&ins->ci_prop, &ins->ci_ranks); if (rc == -DER_NONEXIST) rc = 0; fini: - if (rc != 0) - chk_ins_fini(&chk_engine); + if (rc != 0) { + chk_ins_fini(&ins); + } else { + chk_report_seq_init(ins); + ins->ci_inited = 1; + ins->ci_pause = 0; + } + return rc; } void -chk_engine_fini(void) +chk_engine_cleanup(void) { - chk_ins_fini(&chk_engine); + struct chk_instance *ins = chk_engine; + + chk_ins_cleanup(ins); + D_ASSERT(d_list_empty(&ins->ci_pool_list)); } int @@ -3609,6 +3621,8 @@ chk_engine_pool_stop(uuid_t pool_uuid, bool destroy) uint32_t phase; int rc = 0; + CHK_IS_READY(chk_engine); + if (destroy) { status = CHK__CHECK_POOL_STATUS__CPS_CHECKED; phase = CHK__CHECK_SCAN_PHASE__CSP_DONE; @@ -3624,3 +3638,15 @@ chk_engine_pool_stop(uuid_t pool_uuid, bool destroy) return rc; } + +int +chk_engine_init(void) +{ + return chk_ins_init(&chk_engine); +} + +void +chk_engine_fini(void) +{ + chk_ins_fini(&chk_engine); +} diff --git a/src/chk/chk_internal.h b/src/chk/chk_internal.h index eb60039cb7d..6c1d5508260 100644 --- a/src/chk/chk_internal.h +++ b/src/chk/chk_internal.h @@ -771,6 +771,8 @@ uint32_t chk_pool_merge_status(uint32_t status_a, uint32_t status_b); void chk_ins_merge_info(uint32_t *status_dst, uint32_t status_src, uint32_t *phase_dst, uint32_t phase_src, uint64_t *gen_dst, uint64_t gen_src); +void chk_ins_cleanup(struct chk_instance *ins); + int chk_ins_init(struct chk_instance **p_ins); void chk_ins_fini(struct chk_instance **p_ins); @@ -806,7 +808,9 @@ int chk_engine_notify(struct chk_iv *iv); void chk_engine_rejoin(void *args); -void chk_engine_pause(void); +int chk_engine_setup(void); + +void chk_engine_cleanup(void); int chk_engine_init(void); @@ -833,7 +837,9 @@ int chk_leader_notify(struct chk_iv *iv); int chk_leader_rejoin(uint64_t gen, d_rank_t rank, uuid_t iv_uuid, uint32_t *flags, int *pool_nr, uuid_t **pools); -void chk_leader_pause(void); +int chk_leader_setup(void); + +void chk_leader_cleanup(void); int chk_leader_init(void); @@ -912,9 +918,16 @@ int chk_prop_update(struct chk_property *cpp, d_rank_list_t *rank_list); int chk_traverse_pools(sys_db_trav_cb_t cb, void *args); -void chk_vos_init(void); +void chk_vos_setup(void); + +void chk_vos_cleanup(void); + +#define CHK_IS_READY(ins) \ + do { \ + if (unlikely((ins)->ci_inited == 0)) \ + return -DER_UNINIT; \ + } while (0) -void chk_vos_fini(void); /* clang-format on */ static inline bool @@ -1228,7 +1241,9 @@ chk_stop_sched(struct chk_instance *ins) static inline int chk_ins_can_start(struct chk_instance *ins) { - if (unlikely(!ins->ci_inited)) + CHK_IS_READY(ins); + + if (!ins->ci_is_leader && ins->ci_rejoining) return -DER_AGAIN; if (ins->ci_starting) diff --git a/src/chk/chk_leader.c b/src/chk/chk_leader.c index a2cf5ca8995..3f9d54b0d25 100644 --- a/src/chk/chk_leader.c +++ b/src/chk/chk_leader.c @@ -3013,6 +3013,7 @@ chk_leader_start(uint32_t rank_nr, d_rank_t *ranks, uint32_t policy_nr, struct c if (rc != 0) goto out_stop_remote; + ins->ci_pause = 0; ins->ci_sched_running = 1; rc = dss_ult_create(chk_leader_sched, ins, DSS_XS_SYS, 0, DSS_DEEP_STACK_SZ, @@ -3114,6 +3115,8 @@ chk_leader_stop(int pool_nr, uuid_t pools[]) int rc = 0; int i; + CHK_IS_READY(ins); + if (ins->ci_starting) D_GOTO(log, rc = -DER_BUSY); @@ -3329,6 +3332,8 @@ chk_leader_query(int pool_nr, uuid_t pools[], chk_query_head_cb_t head_cb, int i; bool skip; + CHK_IS_READY(ins); + /* * NOTE: Similar as stop case, we need the ability to query check information from * new leader if the old one dead. But the information from new leader may be @@ -3473,6 +3478,8 @@ chk_leader_prop(chk_prop_cb_t prop_cb, void *buf) { struct chk_property *prop = &chk_leader->ci_prop; + CHK_IS_READY(chk_leader); + return prop_cb(buf, prop->cp_policies, CHK_POLICY_MAX - 1, prop->cp_flags); } @@ -3531,6 +3538,8 @@ chk_leader_act(uint64_t seq, uint32_t act) struct chk_bookmark *cbk = &ins->ci_bk; int rc; + CHK_IS_READY(ins); + if (cbk->cb_magic != CHK_BK_MAGIC_LEADER) D_GOTO(out, rc = -DER_NOTLEADER); @@ -3564,6 +3573,8 @@ chk_leader_set_policy(uint32_t policy_nr, struct chk_policy *policies) struct chk_pending_rec *tmp; int rc; + CHK_IS_READY(ins); + /* Do nothing if no (leader) check instance is running. */ if (cbk->cb_magic != CHK_BK_MAGIC_LEADER || cbk->cb_ins_status != CHK__CHECK_INST_STATUS__CIS_RUNNING) @@ -3612,6 +3623,8 @@ chk_leader_report(struct chk_report_unit *cru, uint64_t *seq, int *decision) d_iov_t riov; int rc; + CHK_IS_READY(ins); + if (cbk->cb_magic != CHK_BK_MAGIC_LEADER) D_GOTO(out, rc = -DER_NOTLEADER); @@ -3739,6 +3752,8 @@ chk_leader_notify(struct chk_iv *iv) d_iov_t riov; int rc = 0; + CHK_IS_READY(ins); + /* Ignore the notification that is not applicable to current rank. */ if (cbk->cb_magic != CHK_BK_MAGIC_LEADER) @@ -3818,6 +3833,8 @@ chk_leader_rejoin(uint64_t gen, d_rank_t rank, uuid_t iv_uuid, uint32_t *flags, struct chk_bookmark *cbk = &ins->ci_bk; int rc = 0; + CHK_IS_READY(ins); + if (cbk->cb_magic != CHK_BK_MAGIC_LEADER) D_GOTO(out, rc = -DER_NOTLEADER); @@ -3847,15 +3864,6 @@ chk_leader_rejoin(uint64_t gen, d_rank_t rank, uuid_t iv_uuid, uint32_t *flags, return rc; } -void -chk_leader_pause(void) -{ - struct chk_instance *ins = chk_leader; - - chk_stop_sched(ins); - D_ASSERT(d_list_empty(&ins->ci_rank_list)); -} - static void chk_rank_event_cb(d_rank_t rank, uint64_t incarnation, enum crt_event_source src, enum crt_event_type type, void *arg) @@ -3910,18 +3918,12 @@ chk_rank_event_cb(d_rank_t rank, uint64_t incarnation, enum crt_event_source src } int -chk_leader_init(void) +chk_leader_setup(void) { - struct chk_traverse_pools_args ctpa = { 0 }; - struct chk_bookmark *cbk; - int rc; - - rc = chk_ins_init(&chk_leader); - if (rc != 0) - goto fini; - - chk_leader->ci_is_leader = 1; - chk_report_seq_init(chk_leader); + struct chk_instance *ins = chk_leader; + struct chk_bookmark *cbk = &ins->ci_bk; + struct chk_traverse_pools_args ctpa = {0}; + int rc; /* * DAOS global consistency check depends on all related engines' local @@ -3930,7 +3932,6 @@ chk_leader_init(void) * related local inconsistency firstly. */ - cbk = &chk_leader->ci_bk; rc = chk_bk_fetch_leader(cbk); if (rc == -DER_NONEXIST) goto prop; @@ -3965,32 +3966,57 @@ chk_leader_init(void) cbk->cb_time.ct_stop_time = time(NULL); rc = chk_bk_update_leader(cbk); if (rc != 0) { - D_ERROR(DF_LEADER" failed to reset ins status as 'PAUSED': "DF_RC"\n", - DP_LEADER(chk_leader), DP_RC(rc)); + D_ERROR(DF_LEADER " failed to reset ins status as 'PAUSED': " DF_RC "\n", + DP_LEADER(ins), DP_RC(rc)); goto fini; } ctpa.ctpa_gen = cbk->cb_gen; - ctpa.ctpa_ins = chk_leader; + ctpa.ctpa_ins = ins; rc = chk_traverse_pools(chk_pools_pause_cb, &ctpa); /* * Failed to reset pool status will not affect next check start, so it is not fatal, * but related check query result may be confused for user. */ if (rc != 0) - D_WARN(DF_LEADER" failed to reset pools status as 'PAUSED': "DF_RC"\n", - DP_LEADER(chk_leader), DP_RC(rc)); + D_WARN(DF_LEADER " failed to reset pools status as 'PAUSED': " DF_RC "\n", + DP_LEADER(ins), DP_RC(rc)); } prop: - rc = chk_prop_fetch(&chk_leader->ci_prop, &chk_leader->ci_ranks); + rc = chk_prop_fetch(&ins->ci_prop, &ins->ci_ranks); if (rc == 0 || rc == -DER_NONEXIST) rc = crt_register_event_cb(chk_rank_event_cb, NULL); fini: - if (rc != 0) - chk_ins_fini(&chk_leader); - else - chk_leader->ci_inited = 1; + if (rc != 0) { + chk_ins_fini(&ins); + } else { + chk_report_seq_init(ins); + ins->ci_inited = 1; + ins->ci_pause = 0; + } + + return rc; +} + +void +chk_leader_cleanup(void) +{ + struct chk_instance *ins = chk_leader; + + chk_ins_cleanup(ins); + D_ASSERT(d_list_empty(&ins->ci_rank_list)); +} + +int +chk_leader_init(void) +{ + int rc; + + rc = chk_ins_init(&chk_leader); + if (rc == 0) + chk_leader->ci_is_leader = 1; + return rc; } diff --git a/src/chk/chk_srv.c b/src/chk/chk_srv.c index 48543de0f96..84d6f3a21bc 100644 --- a/src/chk/chk_srv.c +++ b/src/chk/chk_srv.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2022-2023 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -307,6 +307,14 @@ ds_chk_init(void) goto out; rc = chk_iv_init(); + if (rc != 0) + goto out; + + rc = chk_leader_init(); + if (rc != 0) + goto out; + + rc = chk_engine_init(); out: return rc; @@ -315,6 +323,9 @@ ds_chk_init(void) static int ds_chk_fini(void) { + chk_engine_fini(); + chk_leader_fini(); + return chk_iv_fini(); } @@ -323,14 +334,14 @@ ds_chk_setup(void) { int rc; - /* Do NOT move chk_vos_init into ds_chk_init, because sys_db is not ready at that time. */ - chk_vos_init(); + /* Do NOT move chk_vos_setup into ds_chk_init, because sys_db is not ready at that time. */ + chk_vos_setup(); - rc = chk_leader_init(); + rc = chk_leader_setup(); if (rc != 0) goto out_vos; - rc = chk_engine_init(); + rc = chk_engine_setup(); if (rc != 0) goto out_leader; @@ -347,9 +358,9 @@ ds_chk_setup(void) goto out_done; out_leader: - chk_leader_fini(); + chk_leader_cleanup(); out_vos: - chk_vos_fini(); + chk_vos_cleanup(); out_done: return rc; } @@ -357,11 +368,9 @@ ds_chk_setup(void) static int ds_chk_cleanup(void) { - chk_engine_pause(); - chk_leader_pause(); - chk_engine_fini(); - chk_leader_fini(); - chk_vos_fini(); + chk_engine_cleanup(); + chk_leader_cleanup(); + chk_vos_cleanup(); return 0; } diff --git a/src/chk/chk_vos.c b/src/chk/chk_vos.c index 4cd7356e7ae..5970f3207db 100644 --- a/src/chk/chk_vos.c +++ b/src/chk/chk_vos.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2022 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -303,13 +303,13 @@ chk_traverse_pools(sys_db_trav_cb_t cb, void *args) } void -chk_vos_init(void) +chk_vos_setup(void) { chk_db = vos_db_get(); } void -chk_vos_fini(void) +chk_vos_cleanup(void) { chk_db = NULL; } From 8a24f84703dd499a3912af52da69e5c75841b3c1 Mon Sep 17 00:00:00 2001 From: Li Wei Date: Wed, 14 Jan 2026 22:55:35 +0900 Subject: [PATCH 130/253] DAOS-18296 rdb: Call vos_pool_create/open in ULTs (#17302) Call vos_pool_create and vos_pool_open in new deep-stack ULTs on the xstream to avoid pmemobj_create and pmemobj_open from potentially overflowing caller stacks. Signed-off-by: Li Wei --- src/engine/ult.c | 80 +++++++++++++++++++++++++++++- src/include/daos_srv/daos_engine.h | 9 +++- src/rdb/rdb.c | 18 +++---- 3 files changed, 95 insertions(+), 12 deletions(-) diff --git a/src/engine/ult.c b/src/engine/ult.c index 5e39533c202..bd21eae4908 100644 --- a/src/engine/ult.c +++ b/src/engine/ult.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2016-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -9,6 +9,7 @@ #include #include +#include #include #include "srv_internal.h" @@ -931,3 +932,80 @@ dss_chore_queue_fini(struct dss_xstream *dx) ABT_cond_free(&queue->chq_cond); ABT_mutex_free(&queue->chq_mutex); } + +struct dss_vos_pool_create_args { + const char *spc_path; + unsigned char *spc_uuid; + daos_size_t spc_scm_size; + daos_size_t spc_data_sz; + daos_size_t spc_meta_sz; + unsigned int spc_flags; + uint32_t spc_version; + daos_handle_t *spc_pool; +}; + +static int +dss_vos_pool_create_ult(void *varg) +{ + struct dss_vos_pool_create_args *arg = varg; + + return vos_pool_create(arg->spc_path, arg->spc_uuid, arg->spc_scm_size, arg->spc_data_sz, + arg->spc_meta_sz, arg->spc_flags, arg->spc_version, arg->spc_pool); +} + +/** + * Call vos_pool_create in a new deep-stack ULT on the same xstream. This is to + * avoid pmemobj_create or SPDK from overflowing the stack of the calling ULT. + */ +int +dss_vos_pool_create(const char *path, unsigned char *uuid, daos_size_t scm_size, + daos_size_t data_sz, daos_size_t meta_sz, unsigned int flags, uint32_t version, + daos_handle_t *pool) +{ + struct dss_vos_pool_create_args args; + + args.spc_path = path; + args.spc_uuid = uuid; + args.spc_scm_size = scm_size; + args.spc_data_sz = data_sz; + args.spc_meta_sz = meta_sz; + args.spc_flags = flags; + args.spc_version = version; + args.spc_pool = pool; + + return dss_ult_execute(dss_vos_pool_create_ult, &args, NULL /* user_cb */, + NULL /* cb_args */, DSS_XS_SELF, 0 /* tgt_id */, DSS_DEEP_STACK_SZ); +} + +struct dss_vos_pool_open_args { + const char *spo_path; + unsigned char *spo_uuid; + unsigned int spo_flags; + daos_handle_t *spo_pool; +}; + +static int +dss_vos_pool_open_ult(void *varg) +{ + struct dss_vos_pool_open_args *arg = varg; + + return vos_pool_open(arg->spo_path, arg->spo_uuid, arg->spo_flags, arg->spo_pool); +} + +/** + * Call vos_pool_open in a new deep-stack ULT on the same xstream. This is to + * avoid pmemobj_open or SPDK from overflowing the stack of the calling ULT. + */ +int +dss_vos_pool_open(const char *path, unsigned char *uuid, unsigned int flags, daos_handle_t *pool) +{ + struct dss_vos_pool_open_args args; + + args.spo_path = path; + args.spo_uuid = uuid; + args.spo_flags = flags; + args.spo_pool = pool; + + return dss_ult_execute(dss_vos_pool_open_ult, &args, NULL /* user_cb */, NULL /* cb_args */, + DSS_XS_SELF, 0 /* tgt_id */, DSS_DEEP_STACK_SZ); +} diff --git a/src/include/daos_srv/daos_engine.h b/src/include/daos_srv/daos_engine.h index 94573b38bc7..c32b580db47 100644 --- a/src/include/daos_srv/daos_engine.h +++ b/src/include/daos_srv/daos_engine.h @@ -1,6 +1,6 @@ /** * (C) Copyright 2016-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -846,4 +846,11 @@ dss_select_module_version(int module_id, uint8_t *module_ver) return dss_select_module_version(module_id, version); \ } +int +dss_vos_pool_create(const char *path, unsigned char *uuid, daos_size_t scm_size, + daos_size_t data_sz, daos_size_t meta_sz, unsigned int flags, uint32_t version, + daos_handle_t *pool); +int +dss_vos_pool_open(const char *path, unsigned char *uuid, unsigned int flags, daos_handle_t *pool); + #endif /* __DSS_API_H__ */ diff --git a/src/rdb/rdb.c b/src/rdb/rdb.c index c92adbe7b71..d40fb39d758 100644 --- a/src/rdb/rdb.c +++ b/src/rdb/rdb.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2017-2023 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -57,13 +57,12 @@ rdb_create(const char *path, const uuid_t uuid, uint64_t caller_term, * basic system memory reservation and VOS_POF_EXCL for concurrent * access protection. */ - rc = vos_pool_create(path, (unsigned char *)uuid, params->rcp_size, 0 /* data_sz */, - 0 /* meta_sz */, - VOS_POF_SMALL | VOS_POF_EXCL | VOS_POF_RDB | VOS_POF_EXTERNAL_CHKPT, - params->rcp_vos_df_version, &pool); + rc = dss_vos_pool_create( + path, (unsigned char *)uuid, params->rcp_size, 0 /* data_sz */, 0 /* meta_sz */, + VOS_POF_SMALL | VOS_POF_EXCL | VOS_POF_RDB | VOS_POF_EXTERNAL_CHKPT, + params->rcp_vos_df_version, &pool); if (rc != 0) goto out; - ABT_thread_yield(); /* Create and open the metadata container. */ rc = vos_cont_create(pool, (unsigned char *)uuid); @@ -427,9 +426,9 @@ rdb_open(const char *path, const uuid_t uuid, uint64_t caller_term, struct rdb_c * RDB pools specify VOS_POF_SMALL for basic system memory reservation * and VOS_POF_EXCL for concurrent access protection. */ - rc = vos_pool_open(path, (unsigned char *)uuid, - VOS_POF_SMALL | VOS_POF_EXCL | VOS_POF_RDB | VOS_POF_EXTERNAL_CHKPT, - &pool); + rc = dss_vos_pool_open(path, (unsigned char *)uuid, + VOS_POF_SMALL | VOS_POF_EXCL | VOS_POF_RDB | VOS_POF_EXTERNAL_CHKPT, + &pool); if (rc == -DER_ID_MISMATCH) { ds_notify_ras_eventf(RAS_RDB_DF_INCOMPAT, RAS_TYPE_INFO, RAS_SEV_ERROR, NULL /* hwid */, NULL /* rank */, NULL /* inc */, @@ -442,7 +441,6 @@ rdb_open(const char *path, const uuid_t uuid, uint64_t caller_term, struct rdb_c path, DP_RC(rc)); goto err; } - ABT_thread_yield(); rc = vos_cont_open(pool, (unsigned char *)uuid, &mc); if (rc != 0) { From e717e1fdf4a2396e4ea55a1f8c42c1dfaff0da5a Mon Sep 17 00:00:00 2001 From: Liu Xuezhao Date: Fri, 16 Jan 2026 16:35:59 +0800 Subject: [PATCH 131/253] DAOS-18368 rebuild: fix bug of ec_agg_boundary and agg peer update (#17324) 1. fix a bug of using ec_agg_boundary before checking its valid 2. add some more logs for rebuild fetch getting zero iod_size, to provide some hints for layout information. 3. fix a bug of EC agg peer update, some failed update need to be retried to avoid data corruption. 4. refine some detailed process of dtx_resync wating for rebuild scan. Signed-off-by: Xuezhao Liu --- src/include/daos_srv/pool.h | 3 +- src/object/obj_internal.h | 4 +- src/object/obj_layout.c | 34 ++++++++++++++ src/object/srv_ec_aggregate.c | 73 ++++++++++++++++++++++++++---- src/object/srv_obj.c | 80 +++++++++++++++++++++++++-------- src/object/srv_obj_migrate.c | 64 ++++++++++++++++++++------ src/rebuild/scan.c | 49 +++++++++++--------- src/rebuild/srv.c | 85 +++++++++++++++++++++++------------ 8 files changed, 298 insertions(+), 94 deletions(-) diff --git a/src/include/daos_srv/pool.h b/src/include/daos_srv/pool.h index e27f0bd89a4..9ad67d3e170 100644 --- a/src/include/daos_srv/pool.h +++ b/src/include/daos_srv/pool.h @@ -1,6 +1,6 @@ /* * (C) Copyright 2016-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -80,6 +80,7 @@ struct ds_pool { struct sched_request *sp_ec_ephs_req; uint32_t sp_dtx_resync_version; + uint32_t sp_gl_dtx_resync_version; /* global DTX resync version */ /* Special pool/container handle uuid, which are * created on the pool leader step up, and propagated * to all servers by IV. Then they will be used by server diff --git a/src/object/obj_internal.h b/src/object/obj_internal.h index 598c37644ee..ba3191e761b 100644 --- a/src/object/obj_internal.h +++ b/src/object/obj_internal.h @@ -1,6 +1,6 @@ /** * (C) Copyright 2016-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -1181,6 +1181,8 @@ iov_alloc_for_csum_info(d_iov_t *iov, struct dcs_csum_info *csum_info); /* obj_layout.c */ int obj_pl_grp_idx(uint32_t layout_gl_ver, uint64_t hash, uint32_t grp_nr); +void +obj_dump_grp_layout(daos_handle_t oh, uint32_t shard); int obj_pl_place(struct pl_map *map, uint16_t layout_ver, struct daos_obj_md *md, diff --git a/src/object/obj_layout.c b/src/object/obj_layout.c index 189261ad31e..87958b70a11 100644 --- a/src/object/obj_layout.c +++ b/src/object/obj_layout.c @@ -1,5 +1,6 @@ /* * (C) Copyright 2016-2023 Intel Corporation. + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -95,3 +96,36 @@ obj_layout_diff(struct pl_map *map, daos_unit_oid_t oid, uint32_t new_ver, uint3 return rc; } + +void +obj_dump_grp_layout(daos_handle_t oh, uint32_t shard) +{ + struct dc_object *obj; + struct dc_obj_shard *obj_shard; + uint32_t grp_idx, i, nr; + + obj = obj_hdl2ptr(oh); + if (obj == NULL) { + D_INFO("invalid oh"); + return; + } + if (shard >= obj->cob_shards_nr) { + D_ERROR("bad shard %d, cob_shards_nr %d", shard, obj->cob_shards_nr); + goto out; + } + + grp_idx = shard / obj->cob_grp_size; + D_INFO(DF_OID " shard %d, grp_idx %d, grp_size %d", DP_OID(obj->cob_md.omd_id), shard, + grp_idx, obj->cob_grp_size); + for (i = grp_idx * obj->cob_grp_size, nr = 0; nr < obj->cob_grp_size; i++, nr++) { + obj_shard = &obj->cob_shards->do_shards[i]; + D_INFO("shard %d/%d/%d, tgt_id %d, rank %d, tgt_idx %d, " + "rebuilding %d, reintegrating %d, fseq %d", + i, obj_shard->do_shard_idx, obj_shard->do_shard, obj_shard->do_target_id, + obj_shard->do_target_rank, obj_shard->do_target_idx, + obj_shard->do_rebuilding, obj_shard->do_reintegrating, obj_shard->do_fseq); + } + +out: + obj_decref(obj); +} diff --git a/src/object/srv_ec_aggregate.c b/src/object/srv_ec_aggregate.c index 96abd078284..708f77540a8 100644 --- a/src/object/srv_ec_aggregate.c +++ b/src/object/srv_ec_aggregate.c @@ -1,7 +1,7 @@ /** * (C) Copyright 2020-2024 Intel Corporation. * (C) Copyright 2025 Google LLC - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -1278,6 +1278,42 @@ agg_process_partial_stripe(struct ec_agg_entry *entry) return rc; } +static bool +agg_peer_failed(struct ec_agg_param *agg_param, struct daos_shard_loc *peer_loc) +{ + struct pool_target *targets = NULL; + uint32_t failed_tgts_cnt = 0; + int i; + int rc; + + rc = pool_map_find_failed_tgts(agg_param->ap_pool_info.api_pool->sp_map, &targets, + &failed_tgts_cnt); + if (rc) { + DL_ERROR(rc, DF_CONT " pool_map_find_failed_tgts failed.", + DP_CONT(agg_param->ap_pool_info.api_pool_uuid, + agg_param->ap_pool_info.api_cont_uuid)); + return false; + } + + if (targets == NULL || failed_tgts_cnt == 0) + return false; + + for (i = 0; i < failed_tgts_cnt; i++) { + if (targets[i].ta_comp.co_rank == peer_loc->sd_rank && + targets[i].ta_comp.co_index == peer_loc->sd_tgt_idx) { + D_DEBUG(DB_EPC, DF_CONT " peer parity tgt failed rank %d, tgt_idx %d.\n", + DP_CONT(agg_param->ap_pool_info.api_pool_uuid, + agg_param->ap_pool_info.api_cont_uuid), + peer_loc->sd_rank, peer_loc->sd_tgt_idx); + D_FREE(targets); + return true; + } + } + + D_FREE(targets); + return false; +} + int agg_peer_check_avail(struct ec_agg_param *agg_param, struct ec_agg_entry *entry) { @@ -1334,6 +1370,12 @@ agg_peer_check_avail(struct ec_agg_param *agg_param, struct ec_agg_entry *entry) return rc; } +static bool +agg_peer_retryable_err(int err) +{ + return err == -DER_STALE || err == -DER_TIMEDOUT || daos_crt_network_error(err); +} + /* Sends the generated parity and the stripe number to the peer * parity target. Handler writes the parity and deletes the replicas * for the stripe. @@ -1382,7 +1424,7 @@ agg_peer_update_ult(void *arg) obj = obj_hdl2ptr(entry->ae_obj_hdl); for (peer = 0; peer < p; peer++) { uint64_t enqueue_id = 0; - bool overloaded; + bool peer_retry; if (peer == pidx) continue; @@ -1390,7 +1432,7 @@ agg_peer_update_ult(void *arg) tgt_ep.ep_rank = entry->ae_peer_pshards[peer].sd_rank; tgt_ep.ep_tag = entry->ae_peer_pshards[peer].sd_tgt_idx; retry: - overloaded = false; + peer_retry = false; rc = ds_obj_req_create(dss_get_module_info()->dmi_ctx, &tgt_ep, DAOS_OBJ_RPC_EC_AGGREGATE, &rpc); if (rc) { @@ -1470,13 +1512,20 @@ agg_peer_update_ult(void *arg) rc = ec_agg_out->ea_status; if (rc == -DER_OVERLOAD_RETRY) { enqueue_id = ec_agg_out->ea_comm_out.req_out_enqueue_id; - overloaded = true; + peer_retry = true; } D_CDEBUG(rc == 0, DB_TRACE, DLOG_ERR, "update parity[%d] to %d:%d, status = " DF_RC "\n", peer, tgt_ep.ep_rank, tgt_ep.ep_tag, DP_RC(rc)); peer_updated += rc == 0; } + if (rc != 0 && peer_updated && agg_peer_retryable_err(rc) && + !agg_peer_failed(agg_param, &entry->ae_peer_pshards[peer])) { + DL_INFO(rc, DF_UOID " pidx %d to parity[%d] will retry.", + DP_UOID(entry->ae_oid), pidx, peer); + peer_retry = true; + } + next: if (bulk_hdl) crt_bulk_free(bulk_hdl); @@ -1487,7 +1536,7 @@ agg_peer_update_ult(void *arg) rpc = NULL; bulk_hdl = NULL; iod_csums = NULL; - if (overloaded) { + if (peer_retry) { dss_sleep(daos_rpc_rand_delay(max_delay) << 10); goto retry; } @@ -1665,13 +1714,13 @@ agg_process_holes_ult(void *arg) for (peer = 0; peer < p; peer++) { uint64_t enqueue_id = 0; uint32_t peer_shard; - bool overloaded; + bool peer_retry; if (pidx == peer) continue; retry: - overloaded = false; + peer_retry = false; D_ASSERT(entry->ae_peer_pshards[peer].sd_rank != DAOS_TGT_IGNORE); tgt_ep.ep_rank = entry->ae_peer_pshards[peer].sd_rank; tgt_ep.ep_tag = entry->ae_peer_pshards[peer].sd_tgt_idx; @@ -1719,7 +1768,7 @@ agg_process_holes_ult(void *arg) rc = ec_rep_out->er_status; if (rc == -DER_OVERLOAD_RETRY) { enqueue_id = ec_rep_out->er_comm_out.req_out_enqueue_id; - overloaded = true; + peer_retry = true; } D_CDEBUG(rc == 0, DB_TRACE, DLOG_ERR, DF_UOID " parity[%d] er_status = " DF_RC "\n", @@ -1728,7 +1777,13 @@ agg_process_holes_ult(void *arg) } crt_req_decref(rpc); rpc = NULL; - if (overloaded) { + if (rc != 0 && peer_updated && agg_peer_retryable_err(rc) && + !agg_peer_failed(agg_param, &entry->ae_peer_pshards[peer])) { + DL_INFO(rc, DF_UOID " pidx %d to parity[%d] will retry.", + DP_UOID(entry->ae_oid), pidx, peer); + peer_retry = true; + } + if (peer_retry) { dss_sleep(daos_rpc_rand_delay(max_delay) << 10); goto retry; } diff --git a/src/object/srv_obj.c b/src/object/srv_obj.c index 6ea4bb63ab6..b08b8981dee 100644 --- a/src/object/srv_obj.c +++ b/src/object/srv_obj.c @@ -1,7 +1,7 @@ /** * (C) Copyright 2016-2024 Intel Corporation. * (C) Copyright 2025 Google LLC - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -701,6 +701,22 @@ obj_set_reply_sizes(crt_rpc_t *rpc, daos_iod_t *iods, int iod_nr, uint8_t *skips sizes[i] = iods[idx].iod_size; D_DEBUG(DB_IO, DF_UOID" %d:"DF_U64"\n", DP_UOID(orw->orw_oid), i, iods[idx].iod_size); + if ((orw->orw_flags & ORF_FOR_MIGRATION) && sizes[i] == 0) { + D_DEBUG(DB_REBUILD, + DF_CONT " obj " DF_UOID "rebuild fetch zero iod_size, " + "i:%d/idx:%d, iod_nr %d, orw_epoch " DF_X64 + ", orw_epoch_first " DF_X64 " may cause DER_DATA_LOSS", + DP_CONT(orw->orw_pool_uuid, orw->orw_co_uuid), + DP_UOID(orw->orw_oid), i, idx, iods[idx].iod_nr, orw->orw_epoch, + orw->orw_epoch_first); + if (iods[idx].iod_type == DAOS_IOD_ARRAY) { + int j; + + for (j = 0; j < min(8, iods[idx].iod_nr); j++) + D_DEBUG(DB_REBUILD, "recx[%d] - " DF_RECX, j, + DP_RECX(iods[idx].iod_recxs[j])); + } + } idx++; } @@ -1368,7 +1384,7 @@ struct ec_agg_boundary_arg { }; static int -obj_fetch_ec_agg_boundary(void *data) +obj_fetch_ec_agg_boundary_ult(void *data) { struct ec_agg_boundary_arg *arg = data; int rc; @@ -1381,6 +1397,33 @@ obj_fetch_ec_agg_boundary(void *data) return rc; } +static int +obj_fetch_ec_agg_boundary(struct obj_io_context *ioc, daos_unit_oid_t *uoid) +{ + struct ec_agg_boundary_arg arg; + int rc; + + arg.eab_pool = ioc->ioc_coc->sc_pool->spc_pool; + uuid_copy(arg.eab_co_uuid, ioc->ioc_coc->sc_uuid); + rc = dss_ult_execute(obj_fetch_ec_agg_boundary_ult, &arg, NULL, NULL, DSS_XS_SYS, 0, 0); + if (rc) { + DL_ERROR(rc, DF_CONT ", " DF_UOID " fetch ec_agg_boundary failed.", + DP_CONT(ioc->ioc_coc->sc_pool_uuid, ioc->ioc_coc->sc_uuid), + DP_UOID(*uoid)); + return rc; + } + if (ioc->ioc_coc->sc_ec_agg_eph_valid == 0) { + rc = -DER_FETCH_AGAIN; + DL_INFO(rc, DF_CONT ", " DF_UOID " zero ec_agg_boundary.", + DP_CONT(ioc->ioc_coc->sc_pool_uuid, ioc->ioc_coc->sc_uuid), DP_UOID(*uoid)); + return rc; + } + D_DEBUG(DB_IO, DF_CONT ", " DF_UOID " fetched ec_agg_eph_boundary " DF_X64 "\n", + DP_CONT(ioc->ioc_coc->sc_pool_uuid, ioc->ioc_coc->sc_uuid), DP_UOID(*uoid), + ioc->ioc_coc->sc_ec_agg_eph_boundary); + return 0; +} + static int obj_local_rw_internal(crt_rpc_t *rpc, struct obj_io_context *ioc, daos_iod_t *iods, struct dcs_iod_csums *iod_csums, uint64_t *offs, uint8_t *skips, @@ -1503,29 +1546,14 @@ obj_local_rw_internal(crt_rpc_t *rpc, struct obj_io_context *ioc, daos_iod_t *io } if ((ec_deg_fetch || (ec_recov && get_parity_list)) && ioc->ioc_coc->sc_ec_agg_eph_valid == 0) { - struct ec_agg_boundary_arg arg; - - arg.eab_pool = ioc->ioc_coc->sc_pool->spc_pool; - uuid_copy(arg.eab_co_uuid, ioc->ioc_coc->sc_uuid); - rc = dss_ult_execute(obj_fetch_ec_agg_boundary, &arg, NULL, NULL, - DSS_XS_SYS, 0, 0); + rc = obj_fetch_ec_agg_boundary(ioc, &orw->orw_oid); if (rc) { DL_ERROR(rc, DF_CONT ", " DF_UOID " fetch ec_agg_boundary failed.", DP_CONT(ioc->ioc_coc->sc_pool_uuid, ioc->ioc_coc->sc_uuid), DP_UOID(orw->orw_oid)); goto out; } - if (ioc->ioc_coc->sc_ec_agg_eph_valid == 0) { - rc = -DER_FETCH_AGAIN; - DL_INFO(rc, DF_CONT ", " DF_UOID " zero ec_agg_boundary.", - DP_CONT(ioc->ioc_coc->sc_pool_uuid, ioc->ioc_coc->sc_uuid), - DP_UOID(orw->orw_oid)); - goto out; - } - D_DEBUG(DB_IO, - DF_CONT ", " DF_UOID " fetched ec_agg_eph_boundary " DF_X64 "\n", - DP_CONT(ioc->ioc_coc->sc_pool_uuid, ioc->ioc_coc->sc_uuid), - DP_UOID(orw->orw_oid), ioc->ioc_coc->sc_ec_agg_eph_boundary); + D_ASSERT(ioc->ioc_coc->sc_ec_agg_eph_valid); } if (get_parity_list) { D_ASSERT(!ec_deg_fetch); @@ -3030,6 +3058,20 @@ ds_obj_rw_handler(crt_rpc_t *rpc) if (orw->orw_flags & ORF_FETCH_EPOCH_EC_AGG_BOUNDARY) { uint64_t rebuild_epoch; + if (ioc.ioc_coc->sc_ec_agg_eph_valid == 0) { + rc = obj_fetch_ec_agg_boundary(&ioc, &orw->orw_oid); + if (rc) { + DL_ERROR(rc, + DF_CONT ", " DF_UOID " fetch ec_agg_boundary " + "failed.", + DP_CONT(ioc.ioc_coc->sc_pool_uuid, + ioc.ioc_coc->sc_uuid), + DP_UOID(orw->orw_oid)); + goto out; + } + D_ASSERT(ioc.ioc_coc->sc_ec_agg_eph_valid); + } + D_ASSERTF(orw->orw_epoch <= orw->orw_epoch_first, "bad orw_epoch " DF_X64 ", orw_epoch_first " DF_X64 "\n", orw->orw_epoch, orw->orw_epoch_first); diff --git a/src/object/srv_obj_migrate.c b/src/object/srv_obj_migrate.c index c3d4074516e..1c4655c099c 100644 --- a/src/object/srv_obj_migrate.c +++ b/src/object/srv_obj_migrate.c @@ -828,7 +828,7 @@ migrate_fetch_update_inline(struct migrate_one *mrone, daos_handle_t oh, struct dcs_iod_csums *iod_csums = NULL; int iod_cnt = 0; int start; - char iov_buf[OBJ_ENUM_UNPACK_MAX_IODS][MAX_BUF_SIZE]; + char iov_buf[OBJ_ENUM_UNPACK_MAX_IODS][MAX_BUF_SIZE]; bool fetch = false; int i; int rc = 0; @@ -1194,6 +1194,28 @@ migrate_fetch_update_parity(struct migrate_one *mrone, daos_handle_t oh, return rc; } +static void +mrone_dump_info(struct migrate_one *mrone, daos_handle_t oh, daos_iod_t *iod) +{ + int i; + + if (daos_is_dkey_uint64(mrone->mo_oid.id_pub) && mrone->mo_dkey.iov_len == 8) + D_INFO(DF_RB ": " DF_UOID " int dkey " DF_U64 ", akey " DF_KEY ", iod_type %d, " + " iod_nr %d, iod_size " DF_U64, + DP_RB_MPT(mrone->mo_tls), DP_UOID(mrone->mo_oid), + *(uint64_t *)mrone->mo_dkey.iov_buf, DP_KEY(&iod->iod_name), iod->iod_type, + iod->iod_nr, iod->iod_size); + else + D_INFO(DF_RB ": " DF_UOID " dkey " DF_KEY ", akey " DF_KEY ", iod_type %d, " + " iod_nr %d, iod_size " DF_U64, + DP_RB_MPT(mrone->mo_tls), DP_UOID(mrone->mo_oid), DP_KEY(&mrone->mo_dkey), + DP_KEY(&iod->iod_name), iod->iod_type, iod->iod_nr, iod->iod_size); + if (iod->iod_type == DAOS_IOD_ARRAY) + for (i = 0; i < min(8, iod->iod_nr); i++) + D_INFO("recxs[%d] - " DF_RECX, i, DP_RECX(iod->iod_recxs[i])); + obj_dump_grp_layout(oh, mrone->mo_oid.id_shard); +} + static int migrate_fetch_update_single(struct migrate_one *mrone, daos_handle_t oh, struct ds_cont_child *ds_cont) @@ -1262,6 +1284,8 @@ migrate_fetch_update_single(struct migrate_one *mrone, daos_handle_t oh, daos_iod_t *iod = &mrone->mo_iods[i]; if (mrone->mo_iods[i].iod_size == 0) { + static __thread int log_nr; + /* zero size iod will cause assertion failure * in VOS, so let's check here. * So the object is being destroyed between @@ -1273,12 +1297,17 @@ migrate_fetch_update_single(struct migrate_one *mrone, daos_handle_t oh, * the rebuild and retry. */ rc = -DER_DATA_LOSS; - D_DEBUG(DB_REBUILD, - DF_RB ": " DF_UOID " %p dkey " DF_KEY " " DF_KEY - " nr %d/%d eph " DF_U64 " " DF_RC "\n", - DP_RB_MRO(mrone), DP_UOID(mrone->mo_oid), mrone, - DP_KEY(&mrone->mo_dkey), DP_KEY(&mrone->mo_iods[i].iod_name), - mrone->mo_iod_num, i, mrone->mo_epoch, DP_RC(rc)); + DL_INFO(rc, + DF_RB ": cont " DF_UUID " obj " DF_UOID " dkey " DF_KEY " " DF_KEY + " nr %d/%d eph " DF_X64, + DP_RB_MRO(mrone), DP_UUID(mrone->mo_cont_uuid), + DP_UOID(mrone->mo_oid), DP_KEY(&mrone->mo_dkey), + DP_KEY(&mrone->mo_iods[i].iod_name), mrone->mo_iod_num, i, + mrone->mo_epoch); + if (log_nr <= 128) { + mrone_dump_info(mrone, oh, &mrone->mo_iods[i]); + log_nr++; + } D_GOTO(out, rc); } @@ -1445,6 +1474,8 @@ __migrate_fetch_update_bulk(struct migrate_one *mrone, daos_handle_t oh, for (i = 0; rc == 0 && i < iod_num; i++) { if (iods[i].iod_size == 0) { + static __thread int log_nr; + /* zero size iod will cause assertion failure * in VOS, so let's check here. * So the object is being destroyed between @@ -1456,11 +1487,16 @@ __migrate_fetch_update_bulk(struct migrate_one *mrone, daos_handle_t oh, * the rebuild and retry. */ rc = -DER_DATA_LOSS; - D_INFO(DF_RB ": " DF_UOID " %p dkey " DF_KEY " " DF_KEY - " nr %d/%d eph " DF_U64 " " DF_RC "\n", - DP_RB_MRO(mrone), DP_UOID(mrone->mo_oid), mrone, - DP_KEY(&mrone->mo_dkey), DP_KEY(&iods[i].iod_name), iod_num, i, - mrone->mo_epoch, DP_RC(rc)); + DL_INFO(rc, + DF_RB ": cont " DF_UUID " obj " DF_UOID " dkey " DF_KEY " " DF_KEY + " nr %d/%d mo_epoch " DF_X64 " fetch_eph " DF_X64, + DP_RB_MRO(mrone), DP_UUID(mrone->mo_cont_uuid), + DP_UOID(mrone->mo_oid), DP_KEY(&mrone->mo_dkey), + DP_KEY(&iods[i].iod_name), iod_num, i, mrone->mo_epoch, fetch_eph); + if (log_nr <= 128) { + mrone_dump_info(mrone, oh, &mrone->mo_iods[i]); + log_nr++; + } D_GOTO(end, rc); } } @@ -3023,8 +3059,8 @@ migrate_one_epoch_object(daos_epoch_range_t *epr, struct migrate_pool_tls *tls, /* Each object enumeration RPC will at least one OID */ if (num < minimum_nr && (enum_flags & DIOF_TO_SPEC_GROUP)) { - D_DEBUG(DB_REBUILD, DF_RB ": enumeration buffer %u empty" DF_UOID "\n", - DP_RB_MPT(tls), num, DP_UOID(arg->oid)); + D_INFO(DF_RB ": enumeration buffer %u empty" DF_UOID, DP_RB_MPT(tls), num, + DP_UOID(arg->oid)); break; } diff --git a/src/rebuild/scan.c b/src/rebuild/scan.c index e38e9d73e00..61f8d86680c 100644 --- a/src/rebuild/scan.c +++ b/src/rebuild/scan.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2017-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -68,9 +68,9 @@ rebuild_obj_fill_buf(daos_handle_t ih, d_iov_t *key_iov, shards[count] = obj_val->shard; arg->count++; - D_DEBUG(DB_REBUILD, "send oid/con "DF_UOID"/"DF_UUID" ephs "DF_U64 - "shard %d cnt %d tgt_id %d\n", DP_UOID(oids[count]), - DP_UUID(arg->cont_uuid), obj_val->eph, shards[count], + D_DEBUG(DB_REBUILD, + "send oid/con " DF_UOID "/" DF_UUID " ephs " DF_X64 " shard %d cnt %d tgt_id %d\n", + DP_UOID(oids[count]), DP_UUID(arg->cont_uuid), obj_val->eph, shards[count], arg->count, arg->tgt_id); rc = dbtree_iter_delete(ih, NULL); @@ -1078,13 +1078,21 @@ static void rebuild_scan_leader(void *data) { struct rebuild_tgt_pool_tracker *rpt = data; - struct rebuild_pool_tls *tls; - int rc; - bool wait = false; - - D_DEBUG(DB_REBUILD, DF_RB " check resync %u/%u < %u\n", DP_RB_RPT(rpt), - rpt->rt_pool->sp_dtx_resync_version, rpt->rt_global_dtx_resync_version, - rpt->rt_rebuild_ver); + struct rebuild_pool_tls *tls; + int rc; + + if (rpt->rt_pool->sp_gl_dtx_resync_version >= rpt->rt_rebuild_ver) { + D_DEBUG(DB_REBUILD, DF_RB " sp_gl_dtx_resync_version %d exceed rt_rebuild_ver %d.", + DP_RB_RPT(rpt), rpt->rt_pool->sp_gl_dtx_resync_version, + rpt->rt_rebuild_ver); + if (rpt->rt_global_dtx_resync_version < rpt->rt_pool->sp_gl_dtx_resync_version) + rpt->rt_global_dtx_resync_version = rpt->rt_pool->sp_gl_dtx_resync_version; + goto do_scan; + } else { + D_DEBUG(DB_REBUILD, DF_RB " check resync %u/%u < %u\n", DP_RB_RPT(rpt), + rpt->rt_pool->sp_dtx_resync_version, rpt->rt_global_dtx_resync_version, + rpt->rt_rebuild_ver); + } /* Wait for dtx resync to finish */ while (rpt->rt_global_dtx_resync_version < rpt->rt_rebuild_ver) { @@ -1093,7 +1101,6 @@ rebuild_scan_leader(void *data) if (rpt->rt_global_dtx_resync_version < rpt->rt_rebuild_ver) { D_INFO(DF_RB " wait for global dtx %u\n", DP_RB_RPT(rpt), rpt->rt_global_dtx_resync_version); - wait = true; ABT_cond_wait(rpt->rt_global_dtx_wait_cond, rpt->rt_lock); } ABT_mutex_unlock(rpt->rt_lock); @@ -1103,23 +1110,21 @@ rebuild_scan_leader(void *data) D_GOTO(out, rc = -DER_SHUTDOWN); } } + if (rpt->rt_pool->sp_gl_dtx_resync_version < rpt->rt_global_dtx_resync_version) { + rpt->rt_pool->sp_gl_dtx_resync_version = rpt->rt_global_dtx_resync_version; + D_INFO(DF_RB " update sp_gl_dtx_resync_version to %d", DP_RB_RPT(rpt), + rpt->rt_pool->sp_gl_dtx_resync_version); + } - if (wait) - D_INFO(DF_RB " scan collective begin\n", DP_RB_RPT(rpt)); - else - D_DEBUG(DB_REBUILD, DF_RB " scan collective begin\n", DP_RB_RPT(rpt)); - +do_scan: + D_INFO(DF_RB " scan collective begin\n", DP_RB_RPT(rpt)); rc = ds_pool_thread_collective(rpt->rt_pool_uuid, PO_COMP_ST_NEW | PO_COMP_ST_DOWN | PO_COMP_ST_DOWNOUT, rebuild_scanner, rpt, DSS_ULT_DEEP_STACK); if (rc) D_GOTO(out, rc); - if (wait) - D_INFO(DF_RB " rebuild scan collective done\n", DP_RB_RPT(rpt)); - else - D_DEBUG(DB_REBUILD, DF_RB "rebuild scan collective done\n", DP_RB_RPT(rpt)); - + D_INFO(DF_RB " rebuild scan collective done\n", DP_RB_RPT(rpt)); ABT_mutex_lock(rpt->rt_lock); rc = ds_pool_task_collective(rpt->rt_pool_uuid, PO_COMP_ST_NEW | PO_COMP_ST_DOWN | PO_COMP_ST_DOWNOUT, rebuild_scan_done, rpt, 0); diff --git a/src/rebuild/srv.c b/src/rebuild/srv.c index 88e6b53c851..5dad600030e 100644 --- a/src/rebuild/srv.c +++ b/src/rebuild/srv.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2016-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -288,6 +288,9 @@ rebuild_leader_set_status(struct rebuild_global_pool_tracker *rgt, return; } + if (status->dtx_resync_version != resync_ver) + D_INFO(DF_RB " rank %d, update dtx_resync_version from %d to %d", DP_RB_RGT(rgt), + rank, status->dtx_resync_version, resync_ver); status->dtx_resync_version = resync_ver; if (flags & SCAN_DONE) status->scan_done = 1; @@ -309,6 +312,7 @@ rebuild_leader_set_update_time(struct rebuild_global_pool_tracker *rgt, d_rank_t D_INFO("rank %u is not included in this rebuild.\n", rank); } +#define RB_DTX_RESYNC_VER_SKIP ((uint32_t)-1) static uint32_t rebuild_get_global_dtx_resync_ver(struct rebuild_global_pool_tracker *rgt) { @@ -318,7 +322,7 @@ rebuild_get_global_dtx_resync_ver(struct rebuild_global_pool_tracker *rgt) D_ASSERT(rgt->rgt_servers_number > 0); D_ASSERT(rgt->rgt_servers != NULL); for (i = 0; i < rgt->rgt_servers_number; i++) { - if (rgt->rgt_servers[i].dtx_resync_version == (uint32_t)(-1)) + if (rgt->rgt_servers[i].dtx_resync_version == RB_DTX_RESYNC_VER_SKIP) continue; if (min > rgt->rgt_servers[i].dtx_resync_version) @@ -958,53 +962,69 @@ rebuild_leader_status_check(struct ds_pool *pool, uint32_t op, char sbuf[RBLD_SBUF_LEN]; double now; char *str; - d_rank_list_t excluded = {0}; + d_rank_list_t rank_list = {0}; bool rebuild_abort = false; int i; + now = ABT_get_wtime(); ABT_rwlock_rdlock(pool->sp_lock); rc = map_ranks_init(pool->sp_map, - PO_COMP_ST_UP | PO_COMP_ST_DOWN | - PO_COMP_ST_DOWNOUT | PO_COMP_ST_NEW, - &excluded); + PO_COMP_ST_UP | PO_COMP_ST_DOWN | PO_COMP_ST_DOWNOUT | + PO_COMP_ST_NEW, + &rank_list); if (rc != 0) { D_INFO(DF_RB ": get rank list: %d\n", DP_RB_RGT(rgt), rc); ABT_rwlock_unlock(pool->sp_lock); goto sleep; } - for (i = 0; i < excluded.rl_nr; i++) { + for (i = 0; i < rank_list.rl_nr; i++) { struct pool_domain *dom; - dom = pool_map_find_dom_by_rank(pool->sp_map, excluded.rl_ranks[i]); + dom = pool_map_find_dom_by_rank(pool->sp_map, rank_list.rl_ranks[i]); D_ASSERT(dom != NULL); if (rgt->rgt_opc == RB_OP_REBUILD) { if (dom->do_comp.co_status == PO_COMP_ST_UP) { if (dom->do_comp.co_in_ver > rgt->rgt_rebuild_ver) { - D_INFO(DF_RB ": cancel rebuild co_in_ver=%u\n", - DP_RB_RGT(rgt), dom->do_comp.co_in_ver); + D_INFO(DF_RB ": cancel rebuild due to new REINT, " + "co_rank %d, co_in_ver %u\n", + DP_RB_RGT(rgt), dom->do_comp.co_rank, + dom->do_comp.co_in_ver); rebuild_abort = true; break; - } else { - continue; } } else if (dom->do_comp.co_status == PO_COMP_ST_DOWN) { if (dom->do_comp.co_fseq > rgt->rgt_rebuild_ver) { - D_INFO(DF_RB ": cancel rebuild co_fseq=%u\n", - DP_RB_RGT(rgt), dom->do_comp.co_fseq); + D_INFO(DF_RB ": cancel rebuild due to new DOWN, " + "co_rank %d, co_fseq %u\n", + DP_RB_RGT(rgt), dom->do_comp.co_rank, + dom->do_comp.co_fseq); rebuild_abort = true; break; } } } - D_INFO(DF_RB " exclude rank %d/%x.\n", DP_RB_RGT(rgt), dom->do_comp.co_rank, - dom->do_comp.co_status); - rebuild_leader_set_status(rgt, dom->do_comp.co_rank, - -1, SCAN_DONE | PULL_DONE); + + if (now - last_print > 20) + D_INFO(DF_RB " rank %d, status 0x%x.\n", DP_RB_RGT(rgt), + dom->do_comp.co_rank, dom->do_comp.co_status); + + /* Some engines don't participate the rebuild that will not report + * progress/completion or dtx resync version through IV, mark the complete/ + * skip. + * 1) PO_COMP_ST_DOWN | PO_COMP_ST_DOWNOUT | PO_COMP_ST_NEW ranks + * 2) PO_COMP_ST_UP but co_in_ver > rebuild_ver also will be excluded from + * rebuild request, see rebuild_scan_broadcast(). + */ + if (dom->do_comp.co_status != PO_COMP_ST_UP || + dom->do_comp.co_in_ver > rgt->rgt_rebuild_ver) + rebuild_leader_set_status(rgt, dom->do_comp.co_rank, + RB_DTX_RESYNC_VER_SKIP, + SCAN_DONE | PULL_DONE); } ABT_rwlock_unlock(pool->sp_lock); - map_ranks_fini(&excluded); + map_ranks_fini(&rank_list); if (rebuild_abort) { rgt->rgt_abort = 1; @@ -1048,7 +1068,6 @@ rebuild_leader_status_check(struct ds_pool *pool, uint32_t op, break; } - now = ABT_get_wtime(); /* print something at least for each 10 seconds */ if (now - last_print > 10) { last_print = now; @@ -1304,11 +1323,15 @@ rebuild_scan_broadcast(struct ds_pool *pool, struct rebuild_global_pool_tracker dom = pool_map_find_dom_by_rank(pool->sp_map, up_ranks.rl_ranks[i]); D_ASSERT(dom != NULL); - D_DEBUG(DB_REBUILD, DF_RB " rank %u co_in_ver %u\n", DP_RB_RGT(rgt), - up_ranks.rl_ranks[i], dom->do_comp.co_in_ver); - if (dom->do_comp.co_in_ver < rgt->rgt_rebuild_ver) + D_DEBUG(DB_REBUILD, DF_RB " rank %u co_in_ver %u, rebuild_ver %u.\n", + DP_RB_RGT(rgt), up_ranks.rl_ranks[i], dom->do_comp.co_in_ver, + rgt->rgt_rebuild_ver); + if (dom->do_comp.co_in_ver <= rgt->rgt_rebuild_ver) continue; + D_INFO(DF_RB " bypass UP rank %u co_in_ver %u exceed rebuild_ver %u\n", + DP_RB_RGT(rgt), up_ranks.rl_ranks[i], dom->do_comp.co_in_ver, + rgt->rgt_rebuild_ver); excluded->rl_ranks[nr++] = up_ranks.rl_ranks[i]; } excluded->rl_nr = nr; @@ -1318,13 +1341,11 @@ rebuild_scan_broadcast(struct ds_pool *pool, struct rebuild_global_pool_tracker rc = ds_pool_bcast_create(dss_get_module_info()->dmi_ctx, pool, DAOS_REBUILD_MODULE, REBUILD_OBJECTS_SCAN, rebuild_ver, &rpc, NULL, excluded, NULL); if (rc != 0) { - DL_ERROR(rc, DF_RB " pool map broadcast failed", DP_RB_RGT(rgt)); + DL_ERROR(rc, DF_RB " failed to create scan broadcast request", DP_RB_RGT(rgt)); D_GOTO(out, rc); } rsi = crt_req_get(rpc); - D_DEBUG(DB_REBUILD, DF_RB " scan broadcast\n", DP_RB_RGT(rgt)); - uuid_copy(rsi->rsi_pool_uuid, pool->sp_uuid); rsi->rsi_ns_id = pool->sp_iv_ns->iv_ns_id; rsi->rsi_leader_term = rgt->rgt_leader_term; @@ -1343,11 +1364,13 @@ rebuild_scan_broadcast(struct ds_pool *pool, struct rebuild_global_pool_tracker rso = crt_reply_get(rpc); if (rc == 0) rc = rso->rso_status; + else + DL_ERROR(rc, DF_RB " scan broadcast send failed.", DP_RB_RGT(rgt)); rgt->rgt_init_scan = 1; rgt->rgt_stable_epoch = rso->rso_stable_epoch; - D_DEBUG(DB_REBUILD, DF_RB " " DF_RC " got stable/reclaim epoch " DF_X64 "/" DF_X64 "\n", - DP_RB_RGT(rgt), DP_RC(rc), rgt->rgt_stable_epoch, rgt->rgt_reclaim_epoch); + DL_INFO(rc, DF_RB " got stable/reclaim epoch " DF_X64 "/" DF_X64, DP_RB_RGT(rgt), + rgt->rgt_stable_epoch, rgt->rgt_reclaim_epoch); crt_req_decref(rpc); out: if (excluded) @@ -2778,6 +2801,7 @@ rebuild_tgt_status_check_ult(void *arg) { struct rebuild_tgt_pool_tracker *rpt = arg; struct sched_req_attr attr = { 0 }; + uint32_t reported_dtx_resyc_ver = 0; D_ASSERT(rpt != NULL); sched_req_attr_init(&attr, SCHED_REQ_MIGRATE, &rpt->rt_pool_uuid); @@ -2881,6 +2905,11 @@ rebuild_tgt_status_check_ult(void *arg) rpt->rt_reported_obj_cnt = status.obj_count; rpt->rt_reported_rec_cnt = status.rec_count; rpt->rt_reported_size = status.size; + if (iv.riv_dtx_resyc_version > reported_dtx_resyc_ver) { + D_INFO(DF_RB "reported riv_dtx_resyc_version %d", + DP_RB_RPT(rpt), iv.riv_dtx_resyc_version); + reported_dtx_resyc_ver = iv.riv_dtx_resyc_version; + } } else { DL_WARN(rc, DF_RB " rebuild iv update failed", DP_RB_RPT(rpt)); /* Already finished rebuild, cannot find rebuild status on leader From de05ea9c82830c192d5c44ecfcaff70ce9724fee Mon Sep 17 00:00:00 2001 From: Niu Yawei Date: Mon, 19 Jan 2026 11:08:02 +0800 Subject: [PATCH 132/253] DAOS-18164 bio: bump default cluster_sz (#17334) Bump default BS cluster size from 32MB to 128MB in md-on-ssd mode, make the cluster size configurable through DAOS_BS_CLUSTER_MB. Signed-off-by: Niu Yawei --- src/bio/bio_xstream.c | 20 +++++++++++----- src/tests/suite/daos_md_replication.c | 19 ++++++++------- src/tests/suite/daos_mgmt.c | 34 ++++++++++++--------------- src/vos/tests/wal_ut.c | 13 +++++----- 4 files changed, 47 insertions(+), 39 deletions(-) diff --git a/src/bio/bio_xstream.c b/src/bio/bio_xstream.c index 059556ba89a..e72223be2fd 100644 --- a/src/bio/bio_xstream.c +++ b/src/bio/bio_xstream.c @@ -1,7 +1,7 @@ /** * (C) Copyright 2018-2024 Intel Corporation. * (C) Copyright 2025 Google LLC - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -31,8 +31,8 @@ /* These Macros should be turned into DAOS configuration in the future */ #define DAOS_MSG_RING_SZ 4096 -/* SPDK blob parameters */ -#define DAOS_BS_CLUSTER_SZ (1ULL << 25) /* 32MB */ +/* Default cluster size in MB */ +#define DAOS_DEFAULT_CLUSTER_MB 128 /* DMA buffer parameters */ #define DAOS_DMA_CHUNK_INIT_PCT 50 /* Default per-xstream init chunks, in percentage */ #define DAOS_DMA_CHUNK_CNT_MAX 128 /* Default per-xstream max chunks, 1GB */ @@ -224,6 +224,7 @@ bio_nvme_init_ext(const char *nvme_conf, int numa_node, unsigned int mem_size, char *env; int rc, fd; unsigned int size_mb = BIO_DMA_CHUNK_MB, io_timeout_secs = 0; + unsigned int cluster_mb = DAOS_DEFAULT_CLUSTER_MB; if (tgt_nr <= 0) { D_ERROR("tgt_nr: %u should be > 0\n", tgt_nr); @@ -323,8 +324,14 @@ bio_nvme_init_ext(const char *nvme_conf, int numa_node, unsigned int mem_size, D_INFO("Set per-xstream DMA buffer upper bound to %u %uMB chunks, prealloc %u chunks\n", bio_chk_cnt_max, size_mb, init_chk_cnt()); + d_getenv_uint("DAOS_BS_CLUSTER_MB", &cluster_mb); + if (cluster_mb < 32 || cluster_mb > 1024) { + D_WARN("DAOS_BS_CLUSTER_MB %u is invalid, default %u is used\n", cluster_mb, + DAOS_DEFAULT_CLUSTER_MB); + cluster_mb = DAOS_DEFAULT_CLUSTER_MB; + } spdk_bs_opts_init(&nvme_glb.bd_bs_opts, sizeof(nvme_glb.bd_bs_opts)); - nvme_glb.bd_bs_opts.cluster_sz = DAOS_BS_CLUSTER_SZ; + nvme_glb.bd_bs_opts.cluster_sz = (cluster_mb << 20); nvme_glb.bd_bs_opts.max_channel_ops = BIO_BS_MAX_CHANNEL_OPS; d_agetenv_str(&env, "VOS_BDEV_CLASS"); @@ -368,8 +375,9 @@ bio_nvme_init_ext(const char *nvme_conf, int numa_node, unsigned int mem_size, if (!bio_nvme_configured(SMD_DEV_TYPE_META)) nvme_glb.bd_bs_opts.cluster_sz = (1UL << 30); /* 1GB */ - D_INFO("MD on SSD is %s\n", - bio_nvme_configured(SMD_DEV_TYPE_META) ? "enabled" : "disabled"); + D_INFO("MD on SSD is %s, %u cluster size is used\n", + bio_nvme_configured(SMD_DEV_TYPE_META) ? "enabled" : "disabled", + nvme_glb.bd_bs_opts.cluster_sz); bio_spdk_inited = true; diff --git a/src/tests/suite/daos_md_replication.c b/src/tests/suite/daos_md_replication.c index 1c10bae3d4b..a1d1ceb6692 100644 --- a/src/tests/suite/daos_md_replication.c +++ b/src/tests/suite/daos_md_replication.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2017-2022 Intel Corporation. + * (C) Copyright 2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -11,6 +12,12 @@ #include #include "daos_test.h" +/* + * Given the 128MB default blobstore cluster size, the minimal pool scm_size for + * an 8 targets engine would be 128MB * 8 = 1GB. + */ +#define MIN_SCM_SIZE (1ULL << 30) + static void mdr_stop_pool_svc(void **argv) { @@ -24,10 +31,8 @@ mdr_stop_pool_svc(void **argv) /* Create the pool. */ if (arg->myrank == 0) { print_message("creating pool\n"); - rc = dmg_pool_create(dmg_config_file, - geteuid(), getegid(), arg->group, - NULL, 256 * 1024 * 1024, 0, - NULL, arg->pool.svc, uuid); + rc = dmg_pool_create(dmg_config_file, geteuid(), getegid(), arg->group, NULL, + MIN_SCM_SIZE, 0, NULL, arg->pool.svc, uuid); } par_bcast(PAR_COMM_WORLD, &rc, 1, PAR_INT, 0); assert_rc_equal(rc, 0); @@ -134,10 +139,8 @@ mdr_stop_cont_svc(void **argv) int rc; print_message("creating pool\n"); - rc = dmg_pool_create(dmg_config_file, - geteuid(), getegid(), arg->group, - NULL, 256 * 1024 * 1024, 0, - NULL, arg->pool.svc, pool_uuid); + rc = dmg_pool_create(dmg_config_file, geteuid(), getegid(), arg->group, NULL, MIN_SCM_SIZE, + 0, NULL, arg->pool.svc, pool_uuid); assert_rc_equal(rc, 0); if (arg->pool.svc->rl_nr < 3) { diff --git a/src/tests/suite/daos_mgmt.c b/src/tests/suite/daos_mgmt.c index 49c520fa53b..6866a8d5376 100644 --- a/src/tests/suite/daos_mgmt.c +++ b/src/tests/suite/daos_mgmt.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2016-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -20,6 +20,12 @@ #include #include +/* + * Given the 128MB default blobstore cluster size, the minimal pool scm_size for + * an 8 targets engine would be 128MB * 8 = 1GB. + */ +#define MIN_SCM_SIZE (1ULL << 30) + /** create/destroy pool on all tgts */ static void pool_create_all(void **state) @@ -36,11 +42,8 @@ pool_create_all(void **state) /** create container */ print_message("creating pool synchronously ... "); - rc = dmg_pool_create(dmg_config_file, - geteuid(), getegid(), - arg->group, NULL /* tgts */, - 256 * 1024 * 1024 /* minimal size */, - 0 /* nvme size */, NULL /* prop */, + rc = dmg_pool_create(dmg_config_file, geteuid(), getegid(), arg->group, NULL /* tgts */, + MIN_SCM_SIZE, 0 /* nvme size */, NULL /* prop */, arg->pool.svc /* svc */, uuid); assert_rc_equal(rc, 0); @@ -341,11 +344,8 @@ pool_create_and_destroy_retry(void **state) test_set_engine_fail_loc(arg, CRT_NO_RANK, DAOS_POOL_CREATE_FAIL_CORPC | DAOS_FAIL_ONCE); print_message("creating pool synchronously ... "); - rc = dmg_pool_create(dmg_config_file, - geteuid(), getegid(), - arg->group, NULL /* tgts */, - 256 * 1024 * 1024 /* minimal size */, - 0 /* nvme size */, NULL /* prop */, + rc = dmg_pool_create(dmg_config_file, geteuid(), getegid(), arg->group, NULL /* tgts */, + MIN_SCM_SIZE, 0 /* nvme size */, NULL /* prop */, arg->pool.svc /* svc */, uuid); assert_rc_equal(rc, 0); print_message("success uuid = "DF_UUIDF"\n", DP_UUID(uuid)); @@ -435,8 +435,7 @@ pool_create_steps_down_from_up_empty(void **state) svc.rl_ranks = &rank; svc.rl_nr = 1; rc = dmg_pool_create(dmg_config_file, geteuid(), getegid(), arg->group, NULL /* tgts */, - 256 * 1024 * 1024 /* minimal size */, 0 /* nvme size */, - NULL /* prop */, &svc, uuid); + MIN_SCM_SIZE, 0 /* nvme size */, NULL /* prop */, &svc, uuid); assert_rc_equal(rc, 0); print_message("success uuid = "DF_UUIDF"\n", DP_UUID(uuid)); @@ -466,8 +465,7 @@ pool_destroy_disconnect_all(void **state) print_message("creating pool synchronously ... "); rc = dmg_pool_create(dmg_config_file, geteuid(), getegid(), arg->group, NULL /* tgts */, - 256 * 1024 * 1024 /* minimal size */, 0 /* nvme size */, - NULL /* prop */, arg->pool.svc, uuid); + MIN_SCM_SIZE, 0 /* nvme size */, NULL /* prop */, arg->pool.svc, uuid); assert_rc_equal(rc, 0); print_message("success uuid = "DF_UUIDF"\n", DP_UUID(uuid)); @@ -515,8 +513,7 @@ pool_destroy_cancel_rfcheck(void **state) print_message("creating pool synchronously ... "); rc = dmg_pool_create(dmg_config_file, geteuid(), getegid(), arg->group, NULL /* tgts */, - 256 * 1024 * 1024 /* minimal size */, 0 /* nvme size */, - NULL /* prop */, arg->pool.svc, uuid); + MIN_SCM_SIZE, 0 /* nvme size */, NULL /* prop */, arg->pool.svc, uuid); assert_rc_equal(rc, 0); print_message("success uuid = "DF_UUIDF"\n", DP_UUID(uuid)); @@ -544,8 +541,7 @@ pool_create_query_fail(void **state) print_message("creating pool synchronously ... "); rc = dmg_pool_create(dmg_config_file, geteuid(), getegid(), arg->group, NULL /* tgts */, - 256 * 1024 * 1024 /* minimal size */, 0 /* nvme size */, - NULL /* prop */, arg->pool.svc, uuid); + MIN_SCM_SIZE, 0 /* nvme size */, NULL /* prop */, arg->pool.svc, uuid); assert_rc_equal(rc, 0); print_message("success uuid = " DF_UUIDF "\n", DP_UUID(uuid)); diff --git a/src/vos/tests/wal_ut.c b/src/vos/tests/wal_ut.c index 32b4b4c9957..0bdc85a38d7 100644 --- a/src/vos/tests/wal_ut.c +++ b/src/vos/tests/wal_ut.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2023-2024 Intel Corporation. + * (C) Copyright 2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -669,8 +670,8 @@ ut_fill_wal(struct bio_ut_args *args, int tx_nr, struct ut_tx_array **txa_ptr) tx = txa->ta_tx_ptrs[0]; /* - * Each tx is roughly 800k, 40 txs will consume 32000k, which is more than - * half of 50MB WAL size. + * Each tx is roughly 800k, 100 txs will consume 80MB, which is more than + * half of 128MB WAL size. */ for (i = 0; i < tx_nr; i++) { tx = txa->ta_tx_ptrs[i]; @@ -705,11 +706,11 @@ static void wal_ut_wrap(void **state) { struct bio_ut_args *args = *state; - uint64_t meta_sz = (50ULL << 20); /* 50 MB */ + uint64_t meta_sz = (128ULL << 20); /* 128 MB */ struct ut_tx_array *txa; struct umem_wal_tx *tx; struct ut_fake_tx *fake_tx; - int tx_nr = 40, rc; + int tx_nr = 100, rc; rc = ut_mc_init(args, meta_sz, meta_sz, meta_sz); assert_rc_equal(rc, 0); @@ -745,11 +746,11 @@ static void wal_ut_wrap_many(void **state) { struct bio_ut_args *args = *state; - uint64_t meta_sz = (50ULL << 20); /* 50 MB */ + uint64_t meta_sz = (128ULL << 20); /* 128 MB */ struct ut_tx_array *txa; struct umem_wal_tx *tx; struct ut_fake_tx *fake_tx; - int tx_nr = 40, rc; + int tx_nr = 100, rc; rc = ut_mc_init(args, meta_sz, meta_sz, meta_sz); assert_rc_equal(rc, 0); From f0587a1a88ad4efbf3aab0966e1be47b710a8f24 Mon Sep 17 00:00:00 2001 From: Niu Yawei Date: Tue, 20 Jan 2026 14:16:53 +0800 Subject: [PATCH 133/253] DAOS-18418 ddb: misc fixes (#17369) 1. Parse rdb pool filename to get proper target ID for rdb. 2. Recreate vos file based on the remaining target IDs. 3. Delete the scm_size stored in TABLE_POOLS_EX only when last pool target is deleted. Signed-off-by: Niu Yawei --- src/bio/bio_context.c | 40 +------------------------- src/bio/smd/smd_pool.c | 15 ++++++---- src/include/daos_srv/bio.h | 16 +---------- src/include/daos_srv/mgmt_tgt_common.h | 11 +++---- src/mgmt/mgmt_common.c | 31 +++++--------------- src/mgmt/srv_target.c | 5 ++-- src/utils/ddb/ddb_mgmt.c | 5 ++-- src/utils/ddb/ddb_parse.c | 8 +++++- 8 files changed, 37 insertions(+), 94 deletions(-) diff --git a/src/bio/bio_context.c b/src/bio/bio_context.c index a404915705c..0a93de41696 100644 --- a/src/bio/bio_context.c +++ b/src/bio/bio_context.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2018-2025 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -1365,41 +1365,3 @@ bio_mc2ioc(struct bio_meta_context *mc, enum smd_dev_type type) return NULL; } } - -/* - * Check if any blob (WAL, meta or data) is created for a pool target, - * return true if any blob is created, otherwise return false. - */ -bool -bio_pool_tgt_created(uuid_t pool_id, int tgt_id, enum bio_mc_flags flags) -{ - enum smd_dev_type st; - spdk_blob_id blob_id; - int rc; - - /* Always return true for pmem mode */ - if (!bio_nvme_configured(SMD_DEV_TYPE_META)) - return true; - - for (st = SMD_DEV_TYPE_DATA; st < SMD_DEV_TYPE_MAX; st++) { - if (flags & BIO_MC_FL_RDB) { - if (st == SMD_DEV_TYPE_DATA) - continue; - rc = smd_rdb_get_blob(pool_id, tgt_id, st, &blob_id); - } else { - rc = smd_pool_get_blob(pool_id, tgt_id, st, &blob_id); - } - - if (rc == 0) { - return true; - } else if (rc == -DER_NONEXIST) { - continue; - } else if (rc) { - DL_ERROR(rc, "Failed to query pool " DF_UUID " tgt:%d", DP_UUID(pool_id), - tgt_id); - continue; - } - } - - return false; -} diff --git a/src/bio/smd/smd_pool.c b/src/bio/smd/smd_pool.c index 169e548b1c8..a2d8c86faf0 100644 --- a/src/bio/smd/smd_pool.c +++ b/src/bio/smd/smd_pool.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2018-2025 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -179,7 +179,7 @@ smd_rdb_add_tgt(uuid_t pool_id, uint32_t tgt_id, uint64_t blob_id, enum smd_dev_ } static int -pool_del_tgt(uuid_t pool_id, uint32_t tgt_id, char *table_name) +pool_del_tgt(uuid_t pool_id, uint32_t tgt_id, char *table_name, int *tgt_cnt) { struct smd_pool pool; struct d_uuid id; @@ -226,6 +226,9 @@ pool_del_tgt(uuid_t pool_id, uint32_t tgt_id, char *table_name) rc = 1; /* Inform caller that last target is deleted */ } + if (tgt_cnt) + *tgt_cnt = pool.sp_tgt_cnt; + return rc; } @@ -234,15 +237,15 @@ smd_pool_del_tgt(uuid_t pool_id, uint32_t tgt_id, enum smd_dev_type st) { struct smd_pool_meta meta = { 0 }; struct d_uuid id; - int rc; + int rc, remaining = 0; smd_db_lock(); - rc = pool_del_tgt(pool_id, tgt_id, TABLE_POOLS[st]); + rc = pool_del_tgt(pool_id, tgt_id, TABLE_POOLS[st], &remaining); if (rc <= 0) goto out; rc = 0; - if (st == SMD_DEV_TYPE_META) { + if (st == SMD_DEV_TYPE_META && !remaining) { uuid_copy(id.uuid, pool_id); rc = smd_db_fetch(TABLE_POOLS_EX[st], &id, sizeof(id), &meta, sizeof(meta)); @@ -269,7 +272,7 @@ smd_rdb_del_tgt(uuid_t pool_id, uint32_t tgt_id, enum smd_dev_type st) int rc; smd_db_lock(); - rc = pool_del_tgt(pool_id, tgt_id, TABLE_RDBS[st]); + rc = pool_del_tgt(pool_id, tgt_id, TABLE_RDBS[st], NULL); smd_db_unlock(); return rc < 0 ? rc : 0; diff --git a/src/include/daos_srv/bio.h b/src/include/daos_srv/bio.h index 3b9ecddadcc..f6ae1c4a177 100644 --- a/src/include/daos_srv/bio.h +++ b/src/include/daos_srv/bio.h @@ -1,6 +1,6 @@ /** * (C) Copyright 2018-2025 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -1202,18 +1202,4 @@ bool bio_meta_is_empty(struct bio_meta_context *mc); */ int bio_meta_clear_empty(struct bio_meta_context *mc); -/* - * Check if any blob is created for a pool target. This function is supposed to - * be called in md-on-ssd mode. - * - * \param[in] pool_id pool UUID - * \param[in] tgt_id VOS target ID - * \param[in] flags bio_mc_flags - * - * \return true, when any blob is created for the pool target - * false, when no blob is created - */ -bool -bio_pool_tgt_created(uuid_t pool_id, int tgt_id, enum bio_mc_flags flags); - #endif /* __BIO_API_H__ */ diff --git a/src/include/daos_srv/mgmt_tgt_common.h b/src/include/daos_srv/mgmt_tgt_common.h index f7c551d39f8..8268fc6bff3 100644 --- a/src/include/daos_srv/mgmt_tgt_common.h +++ b/src/include/daos_srv/mgmt_tgt_common.h @@ -1,5 +1,5 @@ /** - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * (C) Copyright 2025 Vdura Inc. * * SPDX-License-Identifier: BSD-2-Clause-Patent @@ -57,13 +57,14 @@ typedef void (*bind_cpu_fn_t)(int tgt_id); * \param[in] pool_uuid Pool uuid * \param[in] scm_size Per vos file size * \param[in] tgt_nr Vos files number + * \param[in] tgts Target ID array * \param[in] rdb_blob_sz rdb file size (rdb file will not be recreated if size is zero) * \param[in] storage_path Base path to store vos and rdb files * \param[in] bind_cpu_fn Bind a separate cpu to each vos file allocation */ int -ds_mgmt_tgt_recreate(uuid_t pool_uuid, daos_size_t scm_size, int tgt_nr, daos_size_t rdb_blob_sz, - const char *storage_path, bind_cpu_fn_t bind_cpu_fn); +ds_mgmt_tgt_recreate(uuid_t pool_uuid, daos_size_t scm_size, int tgt_nr, int *tgts, + daos_size_t rdb_blob_sz, const char *storage_path, bind_cpu_fn_t bind_cpu_fn); /** * Parallel recreate vos files. @@ -74,12 +75,12 @@ ds_mgmt_tgt_recreate(uuid_t pool_uuid, daos_size_t scm_size, int tgt_nr, daos_si * \param[in] cancel_pending If true, preallocate will abort * \param[in] newborns_path Base path for store vos/rdb files * \param[in] bind_cpu_fn e.g. `dss_bind_to_xstream_cpuset` - * \param[in] skip_bitmap Bitmap for the targets being skipped + * \param[in] tgts Target ID array */ int ds_mgmt_tgt_preallocate_parallel(uuid_t uuid, daos_size_t scm_size, int tgt_nr, bool *cancel_pending, const char *newborns_path, - bind_cpu_fn_t bind_cpu_fn, uint8_t *skip_bitmap); + bind_cpu_fn_t bind_cpu_fn, int *tgts); /** * Sequential recreate vos files. diff --git a/src/mgmt/mgmt_common.c b/src/mgmt/mgmt_common.c index 6cf29102fa9..a58cb4ecec7 100644 --- a/src/mgmt/mgmt_common.c +++ b/src/mgmt/mgmt_common.c @@ -1,5 +1,5 @@ /* - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * (C) Copyright 2025 Vdura Inc. * * SPDX-License-Identifier: BSD-2-Clause-Patent @@ -82,8 +82,8 @@ ds_mgmt_dir_fsync(const char *dir) } int -ds_mgmt_tgt_recreate(uuid_t pool_uuid, daos_size_t scm_size, int tgt_nr, daos_size_t rdb_blob_sz, - const char *storage_path, bind_cpu_fn_t bind_cpu_fn) +ds_mgmt_tgt_recreate(uuid_t pool_uuid, daos_size_t scm_size, int tgt_nr, int *tgts, + daos_size_t rdb_blob_sz, const char *storage_path, bind_cpu_fn_t bind_cpu_fn) { char *newborns_path = NULL; char *pool_newborns_path = NULL; @@ -91,9 +91,8 @@ ds_mgmt_tgt_recreate(uuid_t pool_uuid, daos_size_t scm_size, int tgt_nr, daos_si char *rdb_path = NULL; bool dummy_cancel_state = false; int rc; - int fd, tgt_id; + int fd; struct stat statbuf; - uint8_t *skip_bitmap = NULL; D_ASSERT(bio_nvme_configured(SMD_DEV_TYPE_META)); @@ -133,22 +132,9 @@ ds_mgmt_tgt_recreate(uuid_t pool_uuid, daos_size_t scm_size, int tgt_nr, daos_si goto out; } - D_ASSERT(tgt_nr > 0); - D_ALLOC(skip_bitmap, (tgt_nr + 7) / 8); - if (skip_bitmap == NULL) { - rc = -DER_NOMEM; - D_ERROR("Failed to allocate target bitmap.\n"); - goto out; - } - - for (tgt_id = 0; tgt_id < tgt_nr; tgt_id++) { - if (!bio_pool_tgt_created(pool_uuid, tgt_id, 0)) - setbit(skip_bitmap, tgt_id); - } - /** create VOS files */ rc = ds_mgmt_tgt_preallocate_parallel(pool_uuid, scm_size, tgt_nr, &dummy_cancel_state, - newborns_path, bind_cpu_fn, skip_bitmap); + newborns_path, bind_cpu_fn, tgts); if (rc) { D_ERROR(DF_UUID ": failed to create tgt vos files: " DF_RC "\n", DP_UUID(pool_uuid), DP_RC(rc)); @@ -196,7 +182,6 @@ ds_mgmt_tgt_recreate(uuid_t pool_uuid, daos_size_t scm_size, int tgt_nr, daos_si D_FREE(newborns_path); D_FREE(pool_newborns_path); D_FREE(pool_path); - D_FREE(skip_bitmap); return rc; } @@ -322,7 +307,7 @@ ds_mgmt_tgt_preallocate_sequential(uuid_t uuid, daos_size_t scm_size, int tgt_nr int ds_mgmt_tgt_preallocate_parallel(uuid_t uuid, daos_size_t scm_size, int tgt_nr, bool *cancel_pending, const char *newborns_path, - bind_cpu_fn_t bind_cpu_fn, uint8_t *skip_bitmap) + bind_cpu_fn_t bind_cpu_fn, int *tgts) { int i; int rc; @@ -341,12 +326,10 @@ ds_mgmt_tgt_preallocate_parallel(uuid_t uuid, daos_size_t scm_size, int tgt_nr, pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &old_cancelstate); for (i = 0; i < tgt_nr; i++) { - if (skip_bitmap && isset(skip_bitmap, i)) - continue; entry = &thrds_list[i]; uuid_copy(entry->tvt_args.tvpa_uuid, uuid); entry->tvt_args.tvpa_scm_size = scm_size; - entry->tvt_args.tvpa_tgt_id = i; + entry->tvt_args.tvpa_tgt_id = (tgts != NULL) ? tgts[i] : i; entry->tvt_args.tvpa_newborns_path = newborns_path; entry->tvt_args.tvpa_bind_cpu_fn = bind_cpu_fn; rc = pthread_create(&entry->tvt_tid, NULL, tgt_preallocate_thrd_func, diff --git a/src/mgmt/srv_target.c b/src/mgmt/srv_target.c index 3080d92f430..08db055e5ac 100644 --- a/src/mgmt/srv_target.c +++ b/src/mgmt/srv_target.c @@ -1,6 +1,6 @@ /* * (C) Copyright 2016-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -418,7 +418,8 @@ recreate_pooltgts() D_ASSERT(pool_info->spi_scm_sz > 0); rc = ds_mgmt_tgt_recreate(pool_info->spi_id, pool_info->spi_scm_sz, - pool_info->spi_tgt_cnt[SMD_DEV_TYPE_META], rdb_blob_sz, + pool_info->spi_tgt_cnt[SMD_DEV_TYPE_META], + pool_info->spi_tgts[SMD_DEV_TYPE_META], rdb_blob_sz, dss_storage_path, dss_bind_to_xstream_cpuset); if (rc) goto out; diff --git a/src/utils/ddb/ddb_mgmt.c b/src/utils/ddb/ddb_mgmt.c index 12387df6444..7bcf06605fc 100644 --- a/src/utils/ddb/ddb_mgmt.c +++ b/src/utils/ddb/ddb_mgmt.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2025 Vdura Inc. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -185,7 +185,8 @@ ddb_recreate_pooltgts(const char *storage_path) D_ASSERT(pool_info->spi_scm_sz > 0); rc = ds_mgmt_tgt_recreate(pool_info->spi_id, pool_info->spi_scm_sz, - pool_info->spi_tgt_cnt[SMD_DEV_TYPE_META], rdb_size, + pool_info->spi_tgt_cnt[SMD_DEV_TYPE_META], + pool_info->spi_tgts[SMD_DEV_TYPE_META], rdb_size, storage_path, NULL); if (rc != 0) break; diff --git a/src/utils/ddb/ddb_parse.c b/src/utils/ddb/ddb_parse.c index e1bf64272a7..2d493a18ec6 100644 --- a/src/utils/ddb/ddb_parse.c +++ b/src/utils/ddb/ddb_parse.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2019-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -8,6 +8,7 @@ #include #include #include +#include #include "daos_errno.h" #include "ddb_common.h" #include "ddb_parse.h" @@ -52,6 +53,11 @@ vos_path_parse(const char *path, struct vos_file_parts *vos_file_parts) strncpy(vos_file_parts->vf_vos_file, tok, ARRAY_SIZE(vos_file_parts->vf_vos_file) - 1); + if (strcmp(vos_file_parts->vf_vos_file, "rdb-pool") == 0) { + vos_file_parts->vf_target_idx = BIO_SYS_TGT_ID; + goto done; + } + /* * file name should be vos-N ... split on "-" * If not, might be test, just assume target of 0 From 7955a1caa3c2f082dd3308c66c8c836507998cda Mon Sep 17 00:00:00 2001 From: Tom Nabarro Date: Tue, 20 Jan 2026 13:47:41 +0000 Subject: [PATCH 134/253] DAOS-18366 control: Scale dmg pool timeout with ranks (#17375) Signed-off-by: Tom Nabarro --- src/control/lib/control/pool.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/control/lib/control/pool.go b/src/control/lib/control/pool.go index d1dee8907aa..bd9a3504a2e 100644 --- a/src/control/lib/control/pool.go +++ b/src/control/lib/control/pool.go @@ -1,6 +1,6 @@ // // (C) Copyright 2020-2024 Intel Corporation. -// (C) Copyright 2025 Hewlett Packard Enterprise Development LP +// (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -863,6 +863,9 @@ func getPoolRanksResp(ctx context.Context, rpcClient UnaryInvoker, req *PoolRank return nil, errors.New("no ranks in request") } + // Set timeout to 5 minutes per rank to allow sufficient time for operation + req.SetTimeout(time.Duration(len(req.Ranks)) * DefaultPoolTimeout) + results := []*PoolRankResult{} for _, rank := range req.Ranks { result, err := poolRankOp(ctx, rpcClient, req, rank) From 1d9174f3b9acdadbc94820d868132e6e7a2b68e5 Mon Sep 17 00:00:00 2001 From: Nasf-Fan Date: Wed, 21 Jan 2026 00:55:57 +0800 Subject: [PATCH 135/253] DAOS-18458 vos: correctly count the DTX entries in committed tree (#17379) Originally, when commit DTX entries, we count the new added entries into DTX committed tree via vos_dtx_post_handle(), and save it in the vos_container::vp_dtx_committed_count. But there may be yield between insert DTX entries into the tree and refrehing vp_dtx_committed_count. Then some others may see some inconsistency between the DTX committed tree and vp_dtx_committed_count. The patch changes the logic to guarantee that update (insert or delete) DTX entries in the committed tree will be together with refreshing the vp_dtx_committed_count without yield. Signed-off-by: Fan Yong --- src/dtx/tests/dts_aggregate.c | 36 +++++++----- src/vos/vos_dtx.c | 107 +++++++++++++++++++++------------- src/vos/vos_internal.h | 4 +- 3 files changed, 90 insertions(+), 57 deletions(-) diff --git a/src/dtx/tests/dts_aggregate.c b/src/dtx/tests/dts_aggregate.c index 0f341cf9f5b..4ef3a1f653c 100644 --- a/src/dtx/tests/dts_aggregate.c +++ b/src/dtx/tests/dts_aggregate.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP. + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -191,7 +191,7 @@ prep_dtx_entries(void) } static void -check_rollback(void) +check_rollback(uint32_t count) { int i; uint64_t cmt_time; @@ -214,8 +214,8 @@ check_rollback(void) umem_ptr2off(&mock_pool.vp_umm, mock_dbds[0])); assert_int_equal(mock_cont_df.cd_dtx_committed_tail, umem_ptr2off(&mock_pool.vp_umm, mock_dbds[DBD_BLOBS_CAP - 1])); - assert_int_equal(mock_cont.vc_dtx_committed_count, DBD_BLOBS_CAP * DBD_BLOB_DF_CAP); - assert_int_equal(mock_pool.vp_dtx_committed_count, DBD_BLOBS_CAP * DBD_BLOB_DF_CAP); + assert_int_equal(mock_cont.vc_dtx_committed_count, DBD_BLOBS_CAP * DBD_BLOB_DF_CAP - count); + assert_int_equal(mock_pool.vp_dtx_committed_count, DBD_BLOBS_CAP * DBD_BLOB_DF_CAP - count); assert_int_equal(mock_cont.vc_cmt_dtx_reindex_pos, umem_ptr2off(&mock_pool.vp_umm, mock_dbds[0])); } @@ -348,7 +348,7 @@ test_tx_begin_error(void **unused) rc = vos_dtx_aggregate(mock_coh, NULL); assert_rc_equal(rc, -DER_UNKNOWN); - check_rollback(); + check_rollback(0); } /* DAOS B-tree delete failure */ @@ -366,10 +366,12 @@ test_dbtree_delete_error(void **unused) will_return(__wrap_dbtree_delete, 0); will_return(__wrap_dbtree_delete, -DER_UNKNOWN); expect_value(tx_abort, error, -DER_UNKNOWN); + expect_value(__wrap_d_tm_dec_gauge, metric, mock_tls.vtl_committed); + expect_value(__wrap_d_tm_dec_gauge, value, 3); rc = vos_dtx_aggregate(mock_coh, NULL); assert_rc_equal(rc, -DER_UNKNOWN); - check_rollback(); + check_rollback(3); } /* Update of newest aggregated epoch failure */ @@ -389,10 +391,12 @@ test_newest_aggregated_error(void **unused) expect_value(tx_add_ptr, ptr_size, sizeof(mock_cont_df.cd_newest_aggregated)); will_return(tx_add_ptr, -DER_UNKNOWN); expect_value(tx_abort, error, -DER_UNKNOWN); + expect_value(__wrap_d_tm_dec_gauge, metric, mock_tls.vtl_committed); + expect_value(__wrap_d_tm_dec_gauge, value, DBD_BLOB_DF_CAP); rc = vos_dtx_aggregate(mock_coh, NULL); assert_rc_equal(rc, -DER_UNKNOWN); - check_rollback(); + check_rollback(DBD_BLOB_DF_CAP); } /* Update of DTX blob list failure */ @@ -415,10 +419,12 @@ test_committed_head_error(void **unused) expect_value(tx_add_ptr, ptr_size, sizeof(umem_off_t)); will_return(tx_add_ptr, -DER_UNKNOWN); expect_value(tx_abort, error, -DER_UNKNOWN); + expect_value(__wrap_d_tm_dec_gauge, metric, mock_tls.vtl_committed); + expect_value(__wrap_d_tm_dec_gauge, value, DBD_BLOB_DF_CAP); rc = vos_dtx_aggregate(mock_coh, NULL); assert_rc_equal(rc, -DER_UNKNOWN); - check_rollback(); + check_rollback(DBD_BLOB_DF_CAP); } /* Update of DTX blob list failure */ @@ -444,10 +450,12 @@ test_committed_prev_error(void **unused) expect_value(tx_add_ptr, ptr_size, sizeof(umem_off_t)); will_return(tx_add_ptr, -DER_UNKNOWN); expect_value(tx_abort, error, -DER_UNKNOWN); + expect_value(__wrap_d_tm_dec_gauge, metric, mock_tls.vtl_committed); + expect_value(__wrap_d_tm_dec_gauge, value, DBD_BLOB_DF_CAP); rc = vos_dtx_aggregate(mock_coh, NULL); assert_rc_equal(rc, -DER_UNKNOWN); - check_rollback(); + check_rollback(DBD_BLOB_DF_CAP); } /* Pmem free failure */ @@ -475,10 +483,12 @@ test_umm_free_error(void **unused) expect_value(tx_free, umoff, mock_dbds_off[0]); will_return(tx_free, -DER_UNKNOWN); expect_value(tx_abort, error, -DER_UNKNOWN); + expect_value(__wrap_d_tm_dec_gauge, metric, mock_tls.vtl_committed); + expect_value(__wrap_d_tm_dec_gauge, value, DBD_BLOB_DF_CAP); rc = vos_dtx_aggregate(mock_coh, NULL); assert_rc_equal(rc, -DER_UNKNOWN); - check_rollback(); + check_rollback(DBD_BLOB_DF_CAP); } /* Update of committed DTX entries failure */ @@ -507,7 +517,7 @@ test_committed_data_error(void **unused) cmt_time = CMT_TIME_START + (dtx_count - 1) * CMT_TIME_STEP; rc = vos_dtx_aggregate(mock_coh, &cmt_time); assert_rc_equal(rc, -DER_UNKNOWN); - check_rollback(); + check_rollback(0); } /* Update of committed DTX entries count failure */ @@ -539,7 +549,7 @@ test_dbd_count_error(void **unused) cmt_time = CMT_TIME_START + (dtx_count - 1) * CMT_TIME_STEP; rc = vos_dtx_aggregate(mock_coh, &cmt_time); assert_rc_equal(rc, -DER_UNKNOWN); - check_rollback(); + check_rollback(0); } /* Pmem commit transaction failure */ @@ -571,7 +581,7 @@ test_umm_commit_error(void **unused) cmt_time = CMT_TIME_START + (dtx_count - 1) * CMT_TIME_STEP; rc = vos_dtx_aggregate(mock_coh, &cmt_time); assert_rc_equal(rc, -DER_UNKNOWN); - check_rollback(); + check_rollback(0); } /* Pool without DTX committed transaction */ diff --git a/src/vos/vos_dtx.c b/src/vos/vos_dtx.c index 3391781adfa..e572d4be489 100644 --- a/src/vos/vos_dtx.c +++ b/src/vos/vos_dtx.c @@ -2242,17 +2242,16 @@ vos_dtx_commit_internal(struct vos_container *cont, struct dtx_id dtis[], rc = vos_dtx_commit_one(cont, &dtis[i], epoch, cmt_time, keep_act, &dces[i], daes != NULL ? &daes[i] : NULL, rm_cos != NULL ? &rm_cos[i] : NULL); - if (rc == 0 && (daes == NULL || daes[i] != NULL)) - committed++; - if (rc == -DER_ALREADY || rc == -DER_NONEXIST) rc = 0; if (rc != 0) goto out; - if (dces[i] != NULL) + if (dces[i] != NULL) { + committed++; j++; + } } if (j > dbd->dbd_count) { @@ -2333,6 +2332,11 @@ vos_dtx_commit_internal(struct vos_container *cont, struct dtx_id dtis[], goto again; out: + if (committed > 0) { + cont->vc_dtx_committed_count += committed; + cont->vc_pool->vp_dtx_committed_count += committed; + } + return rc < 0 ? rc : committed; } @@ -2342,9 +2346,11 @@ vos_dtx_post_handle(struct vos_container *cont, struct vos_dtx_cmt_ent **dces, int count, bool abort, bool rollback, bool keep_act) { - d_iov_t kiov; - int rc; - int i; + struct vos_tls *tls = vos_tls_get(false); + d_iov_t kiov; + int rc; + int i; + int j; D_ASSERT(daes != NULL); @@ -2359,7 +2365,7 @@ vos_dtx_post_handle(struct vos_container *cont, if (dces == NULL) return; - for (i = 0; i < count; i++) { + for (i = 0, j = 0; i < count; i++) { if (dces[i] == NULL) continue; @@ -2367,32 +2373,39 @@ vos_dtx_post_handle(struct vos_container *cont, sizeof(DCE_XID(dces[i]))); rc = dbtree_delete(cont->vc_dtx_committed_hdl, BTR_PROBE_EQ, &kiov, NULL); - if (rc != 0 && rc != -DER_NONEXIST) { + if (rc != 0) { D_WARN("Failed to rollback cmt DTX entry " DF_DTI": "DF_RC"\n", DP_DTI(&DCE_XID(dces[i])), DP_RC(rc)); dces[i]->dce_invalid = 1; + } else { + j++; } } + if (j > 0) { + D_ASSERTF( + cont->vc_dtx_committed_count >= j, + "Unexpected committed DTX entries count when rollback for " DF_UUID + ": %u vs %u\n", + DP_UUID(cont->vc_id), cont->vc_dtx_committed_count, j); + + cont->vc_dtx_committed_count -= j; + cont->vc_pool->vp_dtx_committed_count -= j; + d_tm_dec_gauge(tls->vtl_committed, j); + } + return; } if (!abort && dces != NULL) { - struct vos_tls *tls = vos_tls_get(false); - int j = 0; - - D_ASSERT(cont->vc_pool->vp_sysdb == false); - for (i = 0; i < count; i++) { + for (i = 0, j = 0; i < count; i++) { if (dces[i] != NULL) j++; } - if (j > 0) { - cont->vc_dtx_committed_count += j; - cont->vc_pool->vp_dtx_committed_count += j; + if (j > 0) d_tm_inc_gauge(tls->vtl_committed, j); - } } for (i = 0; i < count; i++) { @@ -3152,6 +3165,16 @@ dtx_blob_aggregate(struct umem_instance *umm, struct vos_tls *tls, struct vos_co } out_tx_end: + if (cached_count > 0) { + D_ASSERTF(cont->vc_dtx_committed_count >= cached_count, + "Unexpected committed DTX entries count during aggregation for " DF_UUID + ": %u vs %u\n", + DP_UUID(cont->vc_id), cont->vc_dtx_committed_count, cached_count); + + cont->vc_dtx_committed_count -= cached_count; + cont->vc_pool->vp_dtx_committed_count -= cached_count; + } + rc = umem_tx_end(umm, rc); if (likely(rc != 0)) { DL_ERROR(rc, @@ -3161,16 +3184,6 @@ dtx_blob_aggregate(struct umem_instance *umm, struct vos_tls *tls, struct vos_co goto out; } - if (cached_count > 0) { - D_ASSERTF(cont->vc_dtx_committed_count >= cached_count, - "Unexpected committed DTX entries count during aggregation: %u vs %u\n", - cont->vc_dtx_committed_count, cached_count); - - cont->vc_dtx_committed_count -= cached_count; - cont->vc_pool->vp_dtx_committed_count -= cached_count; - d_tm_dec_gauge(tls->vtl_committed, cached_count); - } - D_DEBUG(DB_TRACE, "Release %d/%d DTX committed entries of blob %p (" UMOFF_PF ") of cont " DF_UUID, cached_count, dtx_aggr_count, dbd, UMOFF_P(dbd_off), DP_UUID(cont->vc_id)); @@ -3184,6 +3197,9 @@ dtx_blob_aggregate(struct umem_instance *umm, struct vos_tls *tls, struct vos_co } out: + if (cached_count > 0) + d_tm_dec_gauge(tls->vtl_committed, cached_count); + return rc; } @@ -3204,6 +3220,9 @@ vos_dtx_aggregate(daos_handle_t coh, const uint64_t *cmt_time) D_ASSERT(cont != NULL); D_ASSERT(cont->vc_pool->vp_sysdb == false); + if (unlikely(cont->vc_dtx_reset == 1)) + return 0; + umm = vos_cont2umm(cont); cont_df = cont->vc_cont_df; dbd_off = cont_df->cd_dtx_committed_head; @@ -3927,25 +3946,31 @@ vos_dtx_cache_reset(daos_handle_t coh, bool force) cmt: if (daos_handle_is_valid(cont->vc_dtx_committed_hdl)) { - rc = dbtree_destroy(cont->vc_dtx_committed_hdl, NULL); - if (rc != 0) { - D_ERROR("Failed to destroy committed DTX tree for "DF_UUID": "DF_RC"\n", - DP_UUID(cont->vc_id), DP_RC(rc)); - return rc; - } + uint32_t count = cont->vc_dtx_committed_count; - D_ASSERTF(cont->vc_pool->vp_dtx_committed_count >= cont->vc_dtx_committed_count, - "Unexpected committed DTX entries count: %u vs %u\n", - cont->vc_pool->vp_dtx_committed_count, cont->vc_dtx_committed_count); + cont->vc_dtx_reset = 1; + rc = dbtree_destroy(cont->vc_dtx_committed_hdl, NULL); + /* + * If dbtree_destroy() failed, then the count of DTX entries in the committed index + * tree may not match cont->vc_dtx_committed_count any more and not easy to recover. + * Let's assert here. + */ + D_ASSERTF(rc == 0, + "Failed to destroy committed DTX tree for " DF_UUID ": " DF_RC "\n", + DP_UUID(cont->vc_id), DP_RC(rc)); - cont->vc_pool->vp_dtx_committed_count -= cont->vc_dtx_committed_count; D_ASSERT(cont->vc_pool->vp_sysdb == false); - d_tm_dec_gauge(vos_tls_get(false)->vtl_committed, cont->vc_dtx_committed_count); + D_ASSERTF(cont->vc_pool->vp_dtx_committed_count >= count, + "Unexpected committed DTX entries count for " DF_UUID ": %u vs %u\n", + DP_UUID(cont->vc_id), cont->vc_pool->vp_dtx_committed_count, count); - cont->vc_dtx_committed_hdl = DAOS_HDL_INVAL; + cont->vc_dtx_committed_hdl = DAOS_HDL_INVAL; cont->vc_dtx_committed_count = 0; - cont->vc_cmt_dtx_indexed = 0; + cont->vc_cmt_dtx_indexed = 0; cont->vc_cmt_dtx_reindex_pos = cont->vc_cont_df->cd_dtx_committed_head; + cont->vc_dtx_reset = 0; + cont->vc_pool->vp_dtx_committed_count -= count; + d_tm_dec_gauge(vos_tls_get(false)->vtl_committed, count); } rc = dbtree_create_inplace_ex(VOS_BTR_DTX_CMT_TABLE, 0, DTX_BTREE_ORDER, &uma, diff --git a/src/vos/vos_internal.h b/src/vos/vos_internal.h index 428051203ed..28d7ae765f7 100644 --- a/src/vos/vos_internal.h +++ b/src/vos/vos_internal.h @@ -439,9 +439,7 @@ struct vos_container { /* GC runtime for container */ struct vos_gc_info vc_gc_info; /* Various flags */ - unsigned int vc_in_aggregation:1, - vc_in_discard:1, - vc_cmt_dtx_indexed:1; + uint32_t vc_in_aggregation : 1, vc_in_discard : 1, vc_cmt_dtx_indexed : 1, vc_dtx_reset : 1; unsigned int vc_obj_discard_count; unsigned int vc_open_count; /* The latest pool map version that DTX resync has been done. */ From b4ab849fdbf9207216e32a2f8eeb8d2d9099f852 Mon Sep 17 00:00:00 2001 From: Liang Zhen Date: Wed, 21 Jan 2026 20:25:17 +0800 Subject: [PATCH 136/253] DAOS-18326 rebuild: indefinitely retry for NOMEM error (#17412) Rebuild data fetch indefinitely retry for NOMEM error Signed-off-by: Liang Zhen --- src/object/srv_obj_migrate.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/object/srv_obj_migrate.c b/src/object/srv_obj_migrate.c index 1c4655c099c..854d03f5ead 100644 --- a/src/object/srv_obj_migrate.c +++ b/src/object/srv_obj_migrate.c @@ -691,6 +691,7 @@ mrone_obj_fetch_internal(struct migrate_one *mrone, daos_handle_t oh, d_sg_list_ d_iov_t *csum_iov_fetch, struct migrate_pool_tls *tls) { uint32_t *extra_arg = NULL; + int waited = 0; int rc; /* pass rebuild epoch by extra_arg */ @@ -699,11 +700,10 @@ mrone_obj_fetch_internal(struct migrate_one *mrone, daos_handle_t oh, d_sg_list_ mrone->mo_epoch); extra_arg = (uint32_t *)mrone->mo_epoch; } - retry: rc = dsc_obj_fetch(oh, eph, &mrone->mo_dkey, iod_num, iods, sgls, NULL, flags, extra_arg, csum_iov_fetch); - if ((rc == -DER_TIMEDOUT || rc == -DER_FETCH_AGAIN) && + if ((rc == -DER_TIMEDOUT || rc == -DER_FETCH_AGAIN || rc == -DER_NOMEM) && tls->mpt_version + 1 >= tls->mpt_pool->spc_map_version) { if (tls->mpt_fini) { DL_ERROR(rc, DF_RB ": dsc_obj_fetch " DF_UOID "failed when mpt_fini", @@ -714,6 +714,17 @@ mrone_obj_fetch_internal(struct migrate_one *mrone, daos_handle_t oh, d_sg_list_ * fail out. */ DL_WARN(rc, DF_RB ": retry " DF_UOID, DP_RB_MPT(tls), DP_UOID(mrone->mo_oid)); + if (rc == -DER_NOMEM) { + /* sleep 10 seconds before retry, give other layers a chance to + * release resources. + */ + dss_sleep(10 * 1000); + if (waited != 0 && waited % 3600 == 0) { + DL_ERROR(rc, DF_RB ": waited memory for %d hour(s)", + DP_RB_MRO(mrone), waited / 3600); + } + } + waited += 10; D_GOTO(retry, rc); } From 64d17e09c3849a6f08382400e5426d9ef2d84321 Mon Sep 17 00:00:00 2001 From: Tom Nabarro Date: Wed, 21 Jan 2026 14:19:15 +0000 Subject: [PATCH 137/253] DAOS-18347 control: Add rebuild states to pool query (#17322) Add intermediate "derived" rebuild state field to indicate temporal pool rebuild conditions. Preserve rebuild state value (idle/done/busy) whilst adding intermediate states in derived_state field (stopped/stopping/failed/failing) to better inform administrator. Signed-off-by: Tom Nabarro --- src/control/cmd/daos/pretty/pool.go | 5 +- src/control/cmd/daos/pretty/pool_test.go | 153 ++++++- src/control/common/proto/mgmt/pool.pb.go | 418 ++++++++++-------- src/control/lib/control/pool.go | 4 + src/control/lib/control/pool_test.go | 209 ++++++++- src/control/lib/daos/api/pool.go | 4 + src/control/lib/daos/pool.go | 45 +- src/control/lib/daos/pool_test.go | 225 +++++++++- src/control/lib/daos/status.go | 3 + src/mgmt/pool.pb-c.c | 37 +- src/mgmt/pool.pb-c.h | 18 +- src/mgmt/tests/srv_drpc_tests.c | 92 +++- src/proto/mgmt/pool.proto | 13 +- .../ftest/control/dmg_pool_query_test.py | 3 +- 14 files changed, 973 insertions(+), 256 deletions(-) diff --git a/src/control/cmd/daos/pretty/pool.go b/src/control/cmd/daos/pretty/pool.go index 9126d51f145..e6665cedc4d 100644 --- a/src/control/cmd/daos/pretty/pool.go +++ b/src/control/cmd/daos/pretty/pool.go @@ -1,6 +1,6 @@ // // (C) Copyright 2020-2024 Intel Corporation. -// (C) Copyright 2025 Hewlett Packard Enterprise Development LP +// (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP // (C) Copyright 2025 Google LLC // // SPDX-License-Identifier: BSD-2-Clause-Patent @@ -136,7 +136,8 @@ func PrintPoolInfo(pi *daos.PoolInfo, out io.Writer) error { fmt.Fprintf(w, "- Rebuild %s, %d objs, %d recs\n", pi.Rebuild.State, pi.Rebuild.Objects, pi.Rebuild.Records) } else { - fmt.Fprintf(w, "- Rebuild failed, status=%d\n", pi.Rebuild.Status) + fmt.Fprintf(w, "- Rebuild %s (state=%s, status=%d)\n", + pi.Rebuild.DerivedState, pi.Rebuild.State, pi.Rebuild.Status) } } else { fmt.Fprintln(w, "- No rebuild status available.") diff --git a/src/control/cmd/daos/pretty/pool_test.go b/src/control/cmd/daos/pretty/pool_test.go index d1364805212..8bb07a787f1 100644 --- a/src/control/cmd/daos/pretty/pool_test.go +++ b/src/control/cmd/daos/pretty/pool_test.go @@ -1,6 +1,6 @@ // // (C) Copyright 2020-2024 Intel Corporation. -// (C) Copyright 2025 Hewlett Packard Enterprise Development LP +// (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -262,7 +262,7 @@ Pool space info: Free: 1 B, min:0 B, max:0 B, mean:0 B `, poolUUID.String()), }, - "rebuild failed": { + "rebuild failing": { pi: &daos.PoolInfo{ QueryMask: daos.DefaultPoolQueryMask, State: daos.PoolServiceStateTargetsExcluded, @@ -275,10 +275,11 @@ Pool space info: PoolLayoutVer: 1, UpgradeLayoutVer: 2, Rebuild: &daos.PoolRebuildStatus{ - Status: 2, - State: daos.PoolRebuildStateBusy, - Objects: 42, - Records: 21, + Status: -2, + State: daos.PoolRebuildStateBusy, + DerivedState: daos.PoolRebuildStateFailing, + Objects: 42, + Records: 21, }, TierStats: []*daos.StorageUsageStats{ { @@ -298,7 +299,7 @@ Pool space info: Pool %s, ntarget=2, disabled=1, leader=42, version=100, state=TargetsExcluded Pool layout out of date (1 < 2) -- see `+backtickStr+` for details. Pool health info: -- Rebuild failed, status=2 +- Rebuild failing (state=busy, status=-2) Pool space info: - Target count:1 - Storage tier 0 (SCM): @@ -355,6 +356,144 @@ Pool space info: - Data storage: Total size: 4 B Free: 2 B, min:0 B, max:0 B, mean:0 B +`, poolUUID.String()), + }, + "rebuild state idle": { + pi: &daos.PoolInfo{ + UUID: poolUUID, + TotalTargets: 8, + ActiveTargets: 8, + State: daos.PoolServiceStateReady, + Rebuild: &daos.PoolRebuildStatus{ + State: daos.PoolRebuildStateIdle, + DerivedState: daos.PoolRebuildStateIdle, + Status: 0, + Objects: 0, + Records: 0, + }, + }, + expPrintStr: fmt.Sprintf(` +Pool %s, ntarget=8, disabled=0, leader=0, version=0, state=Ready +Pool health info: +- Rebuild idle, 0 objs, 0 recs +`, poolUUID.String()), + }, + "rebuild state stopped": { + pi: &daos.PoolInfo{ + UUID: poolUUID, + TotalTargets: 8, + ActiveTargets: 8, + State: daos.PoolServiceStateReady, + Rebuild: &daos.PoolRebuildStatus{ + State: daos.PoolRebuildStateDone, + DerivedState: daos.PoolRebuildStateStopped, + Status: int32(daos.OpCanceled), + Objects: 0, + Records: 0, + }, + }, + expPrintStr: fmt.Sprintf(` +Pool %s, ntarget=8, disabled=0, leader=0, version=0, state=Ready +Pool health info: +- Rebuild stopped (state=done, status=-2027) +`, poolUUID.String()), + }, + "rebuild state done": { + pi: &daos.PoolInfo{ + UUID: poolUUID, + TotalTargets: 8, + ActiveTargets: 8, + State: daos.PoolServiceStateReady, + Rebuild: &daos.PoolRebuildStatus{ + State: daos.PoolRebuildStateDone, + DerivedState: daos.PoolRebuildStateDone, + Status: 0, + Objects: 200, + Records: 1000, + }, + }, + expPrintStr: fmt.Sprintf(` +Pool %s, ntarget=8, disabled=0, leader=0, version=0, state=Ready +Pool health info: +- Rebuild done, 200 objs, 1000 recs +`, poolUUID.String()), + }, + "rebuild state failed": { + pi: &daos.PoolInfo{ + UUID: poolUUID, + TotalTargets: 8, + ActiveTargets: 8, + State: daos.PoolServiceStateReady, + Rebuild: &daos.PoolRebuildStatus{ + State: daos.PoolRebuildStateDone, + DerivedState: daos.PoolRebuildStateFailed, + Status: -1, + }, + }, + expPrintStr: fmt.Sprintf(` +Pool %s, ntarget=8, disabled=0, leader=0, version=0, state=Ready +Pool health info: +- Rebuild failed (state=done, status=-1) +`, poolUUID.String()), + }, + "rebuild state busy": { + pi: &daos.PoolInfo{ + UUID: poolUUID, + TotalTargets: 8, + ActiveTargets: 8, + State: daos.PoolServiceStateReady, + Rebuild: &daos.PoolRebuildStatus{ + State: daos.PoolRebuildStateBusy, + DerivedState: daos.PoolRebuildStateBusy, + Status: 0, + Objects: 150, + Records: 750, + }, + }, + expPrintStr: fmt.Sprintf(` +Pool %s, ntarget=8, disabled=0, leader=0, version=0, state=Ready +Pool health info: +- Rebuild busy, 150 objs, 750 recs +`, poolUUID.String()), + }, + "rebuild state stopping": { + pi: &daos.PoolInfo{ + UUID: poolUUID, + TotalTargets: 8, + ActiveTargets: 8, + State: daos.PoolServiceStateReady, + Rebuild: &daos.PoolRebuildStatus{ + State: daos.PoolRebuildStateBusy, + DerivedState: daos.PoolRebuildStateStopping, + Status: int32(daos.OpCanceled), + Objects: 100, + Records: 500, + }, + }, + expPrintStr: fmt.Sprintf(` +Pool %s, ntarget=8, disabled=0, leader=0, version=0, state=Ready +Pool health info: +- Rebuild stopping (state=busy, status=-2027) +`, poolUUID.String()), + }, + "rebuild state failing": { + pi: &daos.PoolInfo{ + UUID: poolUUID, + TotalTargets: 8, + ActiveTargets: 8, + State: daos.PoolServiceStateReady, + Rebuild: &daos.PoolRebuildStatus{ + State: daos.PoolRebuildStateBusy, + DerivedState: daos.PoolRebuildStateFailing, + Status: -1, + Objects: 75, + Records: 300, + }, + }, + expPrintStr: fmt.Sprintf(` +Pool %s, ntarget=8, disabled=0, leader=0, version=0, state=Ready +Pool health info: +- Rebuild failing (state=busy, status=-1) `, poolUUID.String()), }, } { diff --git a/src/control/common/proto/mgmt/pool.pb.go b/src/control/common/proto/mgmt/pool.pb.go index 74767f17fd7..d3f73fda719 100644 --- a/src/control/common/proto/mgmt/pool.pb.go +++ b/src/control/common/proto/mgmt/pool.pb.go @@ -1,6 +1,6 @@ // // (C) Copyright 2019-2024 Intel Corporation. -// (C) Copyright 2025 Hewlett Packard Enterprise Development LP +// (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -131,9 +131,13 @@ func (PoolServiceState) EnumDescriptor() ([]byte, []int) { type PoolRebuildStatus_State int32 const ( - PoolRebuildStatus_BUSY PoolRebuildStatus_State = 0 // DRS_IN_PROGRESS - PoolRebuildStatus_IDLE PoolRebuildStatus_State = 1 // DRS_NOT_STARTED - PoolRebuildStatus_DONE PoolRebuildStatus_State = 2 // DRS_COMPLETED + PoolRebuildStatus_BUSY PoolRebuildStatus_State = 0 + PoolRebuildStatus_IDLE PoolRebuildStatus_State = 1 + PoolRebuildStatus_DONE PoolRebuildStatus_State = 2 + PoolRebuildStatus_STOPPING PoolRebuildStatus_State = 3 + PoolRebuildStatus_STOPPED PoolRebuildStatus_State = 4 + PoolRebuildStatus_FAILING PoolRebuildStatus_State = 5 + PoolRebuildStatus_FAILED PoolRebuildStatus_State = 6 ) // Enum value maps for PoolRebuildStatus_State. @@ -142,11 +146,19 @@ var ( 0: "BUSY", 1: "IDLE", 2: "DONE", + 3: "STOPPING", + 4: "STOPPED", + 5: "FAILING", + 6: "FAILED", } PoolRebuildStatus_State_value = map[string]int32{ - "BUSY": 0, - "IDLE": 1, - "DONE": 2, + "BUSY": 0, + "IDLE": 1, + "DONE": 2, + "STOPPING": 3, + "STOPPED": 4, + "FAILING": 5, + "FAILED": 6, } ) @@ -1730,10 +1742,11 @@ type PoolRebuildStatus struct { sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - Status int32 `protobuf:"varint,1,opt,name=status,proto3" json:"status,omitempty"` // DAOS error code - State PoolRebuildStatus_State `protobuf:"varint,2,opt,name=state,proto3,enum=mgmt.PoolRebuildStatus_State" json:"state,omitempty"` - Objects uint64 `protobuf:"varint,3,opt,name=objects,proto3" json:"objects,omitempty"` - Records uint64 `protobuf:"varint,4,opt,name=records,proto3" json:"records,omitempty"` + Status int32 `protobuf:"varint,1,opt,name=status,proto3" json:"status,omitempty"` // DAOS error code + State PoolRebuildStatus_State `protobuf:"varint,2,opt,name=state,proto3,enum=mgmt.PoolRebuildStatus_State" json:"state,omitempty"` + Objects uint64 `protobuf:"varint,3,opt,name=objects,proto3" json:"objects,omitempty"` + Records uint64 `protobuf:"varint,4,opt,name=records,proto3" json:"records,omitempty"` + DerivedState PoolRebuildStatus_State `protobuf:"varint,5,opt,name=derived_state,json=derivedState,proto3,enum=mgmt.PoolRebuildStatus_State" json:"derived_state,omitempty"` } func (x *PoolRebuildStatus) Reset() { @@ -1796,6 +1809,13 @@ func (x *PoolRebuildStatus) GetRecords() uint64 { return 0 } +func (x *PoolRebuildStatus) GetDerivedState() PoolRebuildStatus_State { + if x != nil { + return x.DerivedState + } + return PoolRebuildStatus_BUSY +} + // PoolQueryResp represents a pool query response. type PoolQueryResp struct { state protoimpl.MessageState @@ -3217,7 +3237,7 @@ var file_mgmt_pool_proto_rawDesc = []byte{ 0x35, 0x0a, 0x0a, 0x6d, 0x65, 0x64, 0x69, 0x61, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x16, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x4d, 0x65, 0x64, 0x69, 0x61, 0x54, 0x79, 0x70, 0x65, 0x52, 0x09, 0x6d, 0x65, 0x64, - 0x69, 0x61, 0x54, 0x79, 0x70, 0x65, 0x22, 0xbb, 0x01, 0x0a, 0x11, 0x50, 0x6f, 0x6f, 0x6c, 0x52, + 0x69, 0x61, 0x54, 0x79, 0x70, 0x65, 0x22, 0xb3, 0x02, 0x0a, 0x11, 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x65, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x33, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x02, 0x20, @@ -3226,177 +3246,184 @@ var file_mgmt_pool_proto_rawDesc = []byte{ 0x74, 0x65, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x6f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x04, 0x52, 0x07, 0x6f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x72, 0x65, 0x63, 0x6f, 0x72, 0x64, 0x73, 0x18, 0x04, - 0x20, 0x01, 0x28, 0x04, 0x52, 0x07, 0x72, 0x65, 0x63, 0x6f, 0x72, 0x64, 0x73, 0x22, 0x25, 0x0a, - 0x05, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x08, 0x0a, 0x04, 0x42, 0x55, 0x53, 0x59, 0x10, 0x00, - 0x12, 0x08, 0x0a, 0x04, 0x49, 0x44, 0x4c, 0x45, 0x10, 0x01, 0x12, 0x08, 0x0a, 0x04, 0x44, 0x4f, - 0x4e, 0x45, 0x10, 0x02, 0x22, 0x89, 0x07, 0x0a, 0x0d, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, - 0x72, 0x79, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x12, - 0x0a, 0x04, 0x75, 0x75, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x75, 0x75, - 0x69, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x18, 0x03, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x05, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x12, 0x23, 0x0a, 0x0d, 0x74, 0x6f, 0x74, 0x61, - 0x6c, 0x5f, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, - 0x0c, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x12, 0x25, 0x0a, - 0x0e, 0x61, 0x63, 0x74, 0x69, 0x76, 0x65, 0x5f, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x18, - 0x05, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0d, 0x61, 0x63, 0x74, 0x69, 0x76, 0x65, 0x54, 0x61, 0x72, - 0x67, 0x65, 0x74, 0x73, 0x12, 0x29, 0x0a, 0x10, 0x64, 0x69, 0x73, 0x61, 0x62, 0x6c, 0x65, 0x64, - 0x5f, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0f, - 0x64, 0x69, 0x73, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x12, - 0x31, 0x0a, 0x07, 0x72, 0x65, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x18, 0x07, 0x20, 0x01, 0x28, 0x0b, - 0x32, 0x17, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x65, 0x62, 0x75, - 0x69, 0x6c, 0x64, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x07, 0x72, 0x65, 0x62, 0x75, 0x69, - 0x6c, 0x64, 0x12, 0x36, 0x0a, 0x0a, 0x74, 0x69, 0x65, 0x72, 0x5f, 0x73, 0x74, 0x61, 0x74, 0x73, - 0x18, 0x08, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x17, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x74, - 0x6f, 0x72, 0x61, 0x67, 0x65, 0x55, 0x73, 0x61, 0x67, 0x65, 0x53, 0x74, 0x61, 0x74, 0x73, 0x52, - 0x09, 0x74, 0x69, 0x65, 0x72, 0x53, 0x74, 0x61, 0x74, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x76, 0x65, - 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x07, 0x76, 0x65, 0x72, - 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x16, 0x0a, 0x06, 0x6c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x18, 0x0b, - 0x20, 0x01, 0x28, 0x0d, 0x52, 0x06, 0x6c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x12, 0x23, 0x0a, 0x0d, - 0x65, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x0c, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x0c, 0x65, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x52, 0x61, 0x6e, 0x6b, - 0x73, 0x12, 0x25, 0x0a, 0x0e, 0x64, 0x69, 0x73, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x5f, 0x72, 0x61, - 0x6e, 0x6b, 0x73, 0x18, 0x0d, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x64, 0x69, 0x73, 0x61, 0x62, - 0x6c, 0x65, 0x64, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x23, 0x0a, 0x0d, 0x74, 0x6f, 0x74, 0x61, - 0x6c, 0x5f, 0x65, 0x6e, 0x67, 0x69, 0x6e, 0x65, 0x73, 0x18, 0x0e, 0x20, 0x01, 0x28, 0x0d, 0x52, - 0x0c, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x45, 0x6e, 0x67, 0x69, 0x6e, 0x65, 0x73, 0x12, 0x26, 0x0a, - 0x0f, 0x70, 0x6f, 0x6f, 0x6c, 0x5f, 0x6c, 0x61, 0x79, 0x6f, 0x75, 0x74, 0x5f, 0x76, 0x65, 0x72, - 0x18, 0x0f, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0d, 0x70, 0x6f, 0x6f, 0x6c, 0x4c, 0x61, 0x79, 0x6f, - 0x75, 0x74, 0x56, 0x65, 0x72, 0x12, 0x2c, 0x0a, 0x12, 0x75, 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, - 0x5f, 0x6c, 0x61, 0x79, 0x6f, 0x75, 0x74, 0x5f, 0x76, 0x65, 0x72, 0x18, 0x10, 0x20, 0x01, 0x28, - 0x0d, 0x52, 0x10, 0x75, 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, 0x4c, 0x61, 0x79, 0x6f, 0x75, 0x74, - 0x56, 0x65, 0x72, 0x12, 0x2c, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x11, 0x20, 0x01, - 0x28, 0x0e, 0x32, 0x16, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x65, - 0x72, 0x76, 0x69, 0x63, 0x65, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, - 0x65, 0x12, 0x17, 0x0a, 0x07, 0x73, 0x76, 0x63, 0x5f, 0x6c, 0x64, 0x72, 0x18, 0x12, 0x20, 0x01, - 0x28, 0x0d, 0x52, 0x06, 0x73, 0x76, 0x63, 0x4c, 0x64, 0x72, 0x12, 0x19, 0x0a, 0x08, 0x73, 0x76, - 0x63, 0x5f, 0x72, 0x65, 0x70, 0x73, 0x18, 0x13, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, 0x73, 0x76, - 0x63, 0x52, 0x65, 0x70, 0x73, 0x12, 0x1d, 0x0a, 0x0a, 0x71, 0x75, 0x65, 0x72, 0x79, 0x5f, 0x6d, - 0x61, 0x73, 0x6b, 0x18, 0x14, 0x20, 0x01, 0x28, 0x04, 0x52, 0x09, 0x71, 0x75, 0x65, 0x72, 0x79, - 0x4d, 0x61, 0x73, 0x6b, 0x12, 0x24, 0x0a, 0x0e, 0x6d, 0x65, 0x6d, 0x5f, 0x66, 0x69, 0x6c, 0x65, - 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x18, 0x15, 0x20, 0x01, 0x28, 0x04, 0x52, 0x0c, 0x6d, 0x65, - 0x6d, 0x46, 0x69, 0x6c, 0x65, 0x42, 0x79, 0x74, 0x65, 0x73, 0x12, 0x1d, 0x0a, 0x0a, 0x64, 0x65, - 0x61, 0x64, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x16, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, - 0x64, 0x65, 0x61, 0x64, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x27, 0x0a, 0x10, 0x6d, 0x64, 0x5f, - 0x6f, 0x6e, 0x5f, 0x73, 0x73, 0x64, 0x5f, 0x61, 0x63, 0x74, 0x69, 0x76, 0x65, 0x18, 0x17, 0x20, - 0x01, 0x28, 0x08, 0x52, 0x0d, 0x6d, 0x64, 0x4f, 0x6e, 0x53, 0x73, 0x64, 0x41, 0x63, 0x74, 0x69, - 0x76, 0x65, 0x12, 0x28, 0x0a, 0x10, 0x73, 0x65, 0x6c, 0x66, 0x5f, 0x68, 0x65, 0x61, 0x6c, 0x5f, - 0x70, 0x6f, 0x6c, 0x69, 0x63, 0x79, 0x18, 0x18, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x73, 0x65, - 0x6c, 0x66, 0x48, 0x65, 0x61, 0x6c, 0x50, 0x6f, 0x6c, 0x69, 0x63, 0x79, 0x12, 0x2f, 0x0a, 0x14, - 0x73, 0x79, 0x73, 0x5f, 0x73, 0x65, 0x6c, 0x66, 0x5f, 0x68, 0x65, 0x61, 0x6c, 0x5f, 0x70, 0x6f, - 0x6c, 0x69, 0x63, 0x79, 0x18, 0x19, 0x20, 0x01, 0x28, 0x09, 0x52, 0x11, 0x73, 0x79, 0x73, 0x53, - 0x65, 0x6c, 0x66, 0x48, 0x65, 0x61, 0x6c, 0x50, 0x6f, 0x6c, 0x69, 0x63, 0x79, 0x4a, 0x04, 0x08, - 0x09, 0x10, 0x0a, 0x52, 0x0b, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x6e, 0x6f, 0x64, 0x65, 0x73, - 0x22, 0x63, 0x0a, 0x0c, 0x50, 0x6f, 0x6f, 0x6c, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x79, - 0x12, 0x16, 0x0a, 0x06, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, - 0x52, 0x06, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x12, 0x18, 0x0a, 0x06, 0x73, 0x74, 0x72, 0x76, - 0x61, 0x6c, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x48, 0x00, 0x52, 0x06, 0x73, 0x74, 0x72, 0x76, - 0x61, 0x6c, 0x12, 0x18, 0x0a, 0x06, 0x6e, 0x75, 0x6d, 0x76, 0x61, 0x6c, 0x18, 0x03, 0x20, 0x01, - 0x28, 0x04, 0x48, 0x00, 0x52, 0x06, 0x6e, 0x75, 0x6d, 0x76, 0x61, 0x6c, 0x42, 0x07, 0x0a, 0x05, - 0x76, 0x61, 0x6c, 0x75, 0x65, 0x22, 0x83, 0x01, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x65, - 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, - 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, - 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x32, 0x0a, 0x0a, 0x70, 0x72, - 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, - 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, - 0x74, 0x79, 0x52, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x12, 0x1b, - 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, - 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x29, 0x0a, 0x0f, 0x50, - 0x6f, 0x6f, 0x6c, 0x53, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, + 0x20, 0x01, 0x28, 0x04, 0x52, 0x07, 0x72, 0x65, 0x63, 0x6f, 0x72, 0x64, 0x73, 0x12, 0x42, 0x0a, + 0x0d, 0x64, 0x65, 0x72, 0x69, 0x76, 0x65, 0x64, 0x5f, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x05, + 0x20, 0x01, 0x28, 0x0e, 0x32, 0x1d, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, + 0x52, 0x65, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x2e, 0x53, 0x74, + 0x61, 0x74, 0x65, 0x52, 0x0c, 0x64, 0x65, 0x72, 0x69, 0x76, 0x65, 0x64, 0x53, 0x74, 0x61, 0x74, + 0x65, 0x22, 0x59, 0x0a, 0x05, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x08, 0x0a, 0x04, 0x42, 0x55, + 0x53, 0x59, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x49, 0x44, 0x4c, 0x45, 0x10, 0x01, 0x12, 0x08, + 0x0a, 0x04, 0x44, 0x4f, 0x4e, 0x45, 0x10, 0x02, 0x12, 0x0c, 0x0a, 0x08, 0x53, 0x54, 0x4f, 0x50, + 0x50, 0x49, 0x4e, 0x47, 0x10, 0x03, 0x12, 0x0b, 0x0a, 0x07, 0x53, 0x54, 0x4f, 0x50, 0x50, 0x45, + 0x44, 0x10, 0x04, 0x12, 0x0b, 0x0a, 0x07, 0x46, 0x41, 0x49, 0x4c, 0x49, 0x4e, 0x47, 0x10, 0x05, + 0x12, 0x0a, 0x0a, 0x06, 0x46, 0x41, 0x49, 0x4c, 0x45, 0x44, 0x10, 0x06, 0x22, 0x89, 0x07, 0x0a, + 0x0d, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, - 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x22, 0x83, 0x01, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x47, - 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, - 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x32, 0x0a, 0x0a, 0x70, - 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, - 0x12, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x50, 0x72, 0x6f, 0x70, 0x65, - 0x72, 0x74, 0x79, 0x52, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x12, - 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x04, 0x20, 0x03, - 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x5d, 0x0a, 0x0f, - 0x50, 0x6f, 0x6f, 0x6c, 0x47, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x73, 0x70, 0x12, - 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, - 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x32, 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, - 0x72, 0x74, 0x69, 0x65, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x67, - 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x79, 0x52, - 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x22, 0x4f, 0x0a, 0x0e, 0x50, - 0x6f, 0x6f, 0x6c, 0x55, 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, - 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, - 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, - 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, - 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x81, 0x01, 0x0a, - 0x12, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, - 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x03, 0x20, - 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x12, 0x18, 0x0a, 0x07, 0x74, 0x61, 0x72, - 0x67, 0x65, 0x74, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, 0x74, 0x61, 0x72, 0x67, - 0x65, 0x74, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, - 0x18, 0x05, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, - 0x22, 0x75, 0x0a, 0x12, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x54, 0x61, 0x72, 0x67, 0x65, - 0x74, 0x55, 0x73, 0x61, 0x67, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x18, - 0x01, 0x20, 0x01, 0x28, 0x04, 0x52, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x12, 0x12, 0x0a, 0x04, - 0x66, 0x72, 0x65, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x04, 0x52, 0x04, 0x66, 0x72, 0x65, 0x65, - 0x12, 0x35, 0x0a, 0x0a, 0x6d, 0x65, 0x64, 0x69, 0x61, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x03, - 0x20, 0x01, 0x28, 0x0e, 0x32, 0x16, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x74, 0x6f, 0x72, - 0x61, 0x67, 0x65, 0x4d, 0x65, 0x64, 0x69, 0x61, 0x54, 0x79, 0x70, 0x65, 0x52, 0x09, 0x6d, 0x65, - 0x64, 0x69, 0x61, 0x54, 0x79, 0x70, 0x65, 0x22, 0xb8, 0x02, 0x0a, 0x13, 0x50, 0x6f, 0x6f, 0x6c, - 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x12, - 0x3b, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x25, - 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, - 0x61, 0x72, 0x67, 0x65, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x2e, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, - 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x2e, 0x0a, 0x05, - 0x73, 0x70, 0x61, 0x63, 0x65, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x6d, 0x67, - 0x6d, 0x74, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, - 0x55, 0x73, 0x61, 0x67, 0x65, 0x52, 0x05, 0x73, 0x70, 0x61, 0x63, 0x65, 0x12, 0x24, 0x0a, 0x0e, - 0x6d, 0x65, 0x6d, 0x5f, 0x66, 0x69, 0x6c, 0x65, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x18, 0x04, + 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x75, 0x75, 0x69, 0x64, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x75, 0x75, 0x69, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x6c, 0x61, + 0x62, 0x65, 0x6c, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6c, 0x61, 0x62, 0x65, 0x6c, + 0x12, 0x23, 0x0a, 0x0d, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, + 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0c, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x54, 0x61, + 0x72, 0x67, 0x65, 0x74, 0x73, 0x12, 0x25, 0x0a, 0x0e, 0x61, 0x63, 0x74, 0x69, 0x76, 0x65, 0x5f, + 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0d, 0x61, + 0x63, 0x74, 0x69, 0x76, 0x65, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x12, 0x29, 0x0a, 0x10, + 0x64, 0x69, 0x73, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x5f, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, + 0x18, 0x06, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0f, 0x64, 0x69, 0x73, 0x61, 0x62, 0x6c, 0x65, 0x64, + 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x12, 0x31, 0x0a, 0x07, 0x72, 0x65, 0x62, 0x75, 0x69, + 0x6c, 0x64, 0x18, 0x07, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x17, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, + 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x65, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x53, 0x74, 0x61, 0x74, 0x75, + 0x73, 0x52, 0x07, 0x72, 0x65, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x12, 0x36, 0x0a, 0x0a, 0x74, 0x69, + 0x65, 0x72, 0x5f, 0x73, 0x74, 0x61, 0x74, 0x73, 0x18, 0x08, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x17, + 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x55, 0x73, 0x61, + 0x67, 0x65, 0x53, 0x74, 0x61, 0x74, 0x73, 0x52, 0x09, 0x74, 0x69, 0x65, 0x72, 0x53, 0x74, 0x61, + 0x74, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x0a, 0x20, + 0x01, 0x28, 0x0d, 0x52, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x16, 0x0a, 0x06, + 0x6c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x06, 0x6c, 0x65, + 0x61, 0x64, 0x65, 0x72, 0x12, 0x23, 0x0a, 0x0d, 0x65, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x5f, + 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x0c, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x65, 0x6e, 0x61, + 0x62, 0x6c, 0x65, 0x64, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x25, 0x0a, 0x0e, 0x64, 0x69, 0x73, + 0x61, 0x62, 0x6c, 0x65, 0x64, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x0d, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x0d, 0x64, 0x69, 0x73, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x52, 0x61, 0x6e, 0x6b, 0x73, + 0x12, 0x23, 0x0a, 0x0d, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x65, 0x6e, 0x67, 0x69, 0x6e, 0x65, + 0x73, 0x18, 0x0e, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0c, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x45, 0x6e, + 0x67, 0x69, 0x6e, 0x65, 0x73, 0x12, 0x26, 0x0a, 0x0f, 0x70, 0x6f, 0x6f, 0x6c, 0x5f, 0x6c, 0x61, + 0x79, 0x6f, 0x75, 0x74, 0x5f, 0x76, 0x65, 0x72, 0x18, 0x0f, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0d, + 0x70, 0x6f, 0x6f, 0x6c, 0x4c, 0x61, 0x79, 0x6f, 0x75, 0x74, 0x56, 0x65, 0x72, 0x12, 0x2c, 0x0a, + 0x12, 0x75, 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, 0x5f, 0x6c, 0x61, 0x79, 0x6f, 0x75, 0x74, 0x5f, + 0x76, 0x65, 0x72, 0x18, 0x10, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x10, 0x75, 0x70, 0x67, 0x72, 0x61, + 0x64, 0x65, 0x4c, 0x61, 0x79, 0x6f, 0x75, 0x74, 0x56, 0x65, 0x72, 0x12, 0x2c, 0x0a, 0x05, 0x73, + 0x74, 0x61, 0x74, 0x65, 0x18, 0x11, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x16, 0x2e, 0x6d, 0x67, 0x6d, + 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x53, 0x74, 0x61, + 0x74, 0x65, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x17, 0x0a, 0x07, 0x73, 0x76, 0x63, + 0x5f, 0x6c, 0x64, 0x72, 0x18, 0x12, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x06, 0x73, 0x76, 0x63, 0x4c, + 0x64, 0x72, 0x12, 0x19, 0x0a, 0x08, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x65, 0x70, 0x73, 0x18, 0x13, + 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, 0x73, 0x76, 0x63, 0x52, 0x65, 0x70, 0x73, 0x12, 0x1d, 0x0a, + 0x0a, 0x71, 0x75, 0x65, 0x72, 0x79, 0x5f, 0x6d, 0x61, 0x73, 0x6b, 0x18, 0x14, 0x20, 0x01, 0x28, + 0x04, 0x52, 0x09, 0x71, 0x75, 0x65, 0x72, 0x79, 0x4d, 0x61, 0x73, 0x6b, 0x12, 0x24, 0x0a, 0x0e, + 0x6d, 0x65, 0x6d, 0x5f, 0x66, 0x69, 0x6c, 0x65, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x18, 0x15, 0x20, 0x01, 0x28, 0x04, 0x52, 0x0c, 0x6d, 0x65, 0x6d, 0x46, 0x69, 0x6c, 0x65, 0x42, 0x79, 0x74, - 0x65, 0x73, 0x12, 0x27, 0x0a, 0x10, 0x6d, 0x64, 0x5f, 0x6f, 0x6e, 0x5f, 0x73, 0x73, 0x64, 0x5f, - 0x61, 0x63, 0x74, 0x69, 0x76, 0x65, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0d, 0x6d, 0x64, - 0x4f, 0x6e, 0x53, 0x73, 0x64, 0x41, 0x63, 0x74, 0x69, 0x76, 0x65, 0x22, 0x5f, 0x0a, 0x0b, 0x54, - 0x61, 0x72, 0x67, 0x65, 0x74, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x11, 0x0a, 0x0d, 0x53, 0x54, - 0x41, 0x54, 0x45, 0x5f, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, 0x0c, 0x0a, - 0x08, 0x44, 0x4f, 0x57, 0x4e, 0x5f, 0x4f, 0x55, 0x54, 0x10, 0x01, 0x12, 0x08, 0x0a, 0x04, 0x44, - 0x4f, 0x57, 0x4e, 0x10, 0x02, 0x12, 0x06, 0x0a, 0x02, 0x55, 0x50, 0x10, 0x03, 0x12, 0x09, 0x0a, - 0x05, 0x55, 0x50, 0x5f, 0x49, 0x4e, 0x10, 0x04, 0x12, 0x07, 0x0a, 0x03, 0x4e, 0x45, 0x57, 0x10, - 0x05, 0x12, 0x09, 0x0a, 0x05, 0x44, 0x52, 0x41, 0x49, 0x4e, 0x10, 0x06, 0x4a, 0x04, 0x08, 0x01, - 0x10, 0x02, 0x22, 0x5e, 0x0a, 0x13, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, - 0x61, 0x72, 0x67, 0x65, 0x74, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, - 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, - 0x73, 0x12, 0x2f, 0x0a, 0x05, 0x69, 0x6e, 0x66, 0x6f, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, - 0x32, 0x19, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, - 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x05, 0x69, 0x6e, 0x66, - 0x6f, 0x73, 0x22, 0x54, 0x0a, 0x13, 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x65, 0x62, 0x75, 0x69, 0x6c, - 0x64, 0x53, 0x74, 0x61, 0x72, 0x74, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, - 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x1b, 0x0a, 0x09, 0x73, - 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, - 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x69, 0x0a, 0x12, 0x50, 0x6f, 0x6f, 0x6c, - 0x52, 0x65, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x53, 0x74, 0x6f, 0x70, 0x52, 0x65, 0x71, 0x12, 0x10, - 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, - 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, - 0x12, 0x14, 0x0a, 0x05, 0x66, 0x6f, 0x72, 0x63, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x08, 0x52, - 0x05, 0x66, 0x6f, 0x72, 0x63, 0x65, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, + 0x65, 0x73, 0x12, 0x1d, 0x0a, 0x0a, 0x64, 0x65, 0x61, 0x64, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, + 0x18, 0x16, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x64, 0x65, 0x61, 0x64, 0x52, 0x61, 0x6e, 0x6b, + 0x73, 0x12, 0x27, 0x0a, 0x10, 0x6d, 0x64, 0x5f, 0x6f, 0x6e, 0x5f, 0x73, 0x73, 0x64, 0x5f, 0x61, + 0x63, 0x74, 0x69, 0x76, 0x65, 0x18, 0x17, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0d, 0x6d, 0x64, 0x4f, + 0x6e, 0x53, 0x73, 0x64, 0x41, 0x63, 0x74, 0x69, 0x76, 0x65, 0x12, 0x28, 0x0a, 0x10, 0x73, 0x65, + 0x6c, 0x66, 0x5f, 0x68, 0x65, 0x61, 0x6c, 0x5f, 0x70, 0x6f, 0x6c, 0x69, 0x63, 0x79, 0x18, 0x18, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x73, 0x65, 0x6c, 0x66, 0x48, 0x65, 0x61, 0x6c, 0x50, 0x6f, + 0x6c, 0x69, 0x63, 0x79, 0x12, 0x2f, 0x0a, 0x14, 0x73, 0x79, 0x73, 0x5f, 0x73, 0x65, 0x6c, 0x66, + 0x5f, 0x68, 0x65, 0x61, 0x6c, 0x5f, 0x70, 0x6f, 0x6c, 0x69, 0x63, 0x79, 0x18, 0x19, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x11, 0x73, 0x79, 0x73, 0x53, 0x65, 0x6c, 0x66, 0x48, 0x65, 0x61, 0x6c, 0x50, + 0x6f, 0x6c, 0x69, 0x63, 0x79, 0x4a, 0x04, 0x08, 0x09, 0x10, 0x0a, 0x52, 0x0b, 0x74, 0x6f, 0x74, + 0x61, 0x6c, 0x5f, 0x6e, 0x6f, 0x64, 0x65, 0x73, 0x22, 0x63, 0x0a, 0x0c, 0x50, 0x6f, 0x6f, 0x6c, + 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x79, 0x12, 0x16, 0x0a, 0x06, 0x6e, 0x75, 0x6d, 0x62, + 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x06, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, + 0x12, 0x18, 0x0a, 0x06, 0x73, 0x74, 0x72, 0x76, 0x61, 0x6c, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, + 0x48, 0x00, 0x52, 0x06, 0x73, 0x74, 0x72, 0x76, 0x61, 0x6c, 0x12, 0x18, 0x0a, 0x06, 0x6e, 0x75, + 0x6d, 0x76, 0x61, 0x6c, 0x18, 0x03, 0x20, 0x01, 0x28, 0x04, 0x48, 0x00, 0x52, 0x06, 0x6e, 0x75, + 0x6d, 0x76, 0x61, 0x6c, 0x42, 0x07, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x22, 0x83, 0x01, + 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x71, + 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, + 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, + 0x69, 0x64, 0x12, 0x32, 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, + 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, + 0x6f, 0x6c, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x79, 0x52, 0x0a, 0x70, 0x72, 0x6f, 0x70, + 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, - 0x6e, 0x6b, 0x73, 0x22, 0x76, 0x0a, 0x13, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x65, 0x6c, 0x66, 0x48, - 0x65, 0x61, 0x6c, 0x45, 0x76, 0x61, 0x6c, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, - 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, - 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x20, 0x0a, 0x0c, - 0x73, 0x79, 0x73, 0x5f, 0x70, 0x72, 0x6f, 0x70, 0x5f, 0x76, 0x61, 0x6c, 0x18, 0x04, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x0a, 0x73, 0x79, 0x73, 0x50, 0x72, 0x6f, 0x70, 0x56, 0x61, 0x6c, 0x12, 0x1b, - 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, - 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x2a, 0x25, 0x0a, 0x10, 0x53, - 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x4d, 0x65, 0x64, 0x69, 0x61, 0x54, 0x79, 0x70, 0x65, 0x12, - 0x07, 0x0a, 0x03, 0x53, 0x43, 0x4d, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x4e, 0x56, 0x4d, 0x45, - 0x10, 0x01, 0x2a, 0x5d, 0x0a, 0x10, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, - 0x65, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x0c, 0x0a, 0x08, 0x43, 0x72, 0x65, 0x61, 0x74, 0x69, - 0x6e, 0x67, 0x10, 0x00, 0x12, 0x09, 0x0a, 0x05, 0x52, 0x65, 0x61, 0x64, 0x79, 0x10, 0x01, 0x12, - 0x0e, 0x0a, 0x0a, 0x44, 0x65, 0x73, 0x74, 0x72, 0x6f, 0x79, 0x69, 0x6e, 0x67, 0x10, 0x02, 0x12, - 0x13, 0x0a, 0x0f, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x45, 0x78, 0x63, 0x6c, 0x75, 0x64, - 0x65, 0x64, 0x10, 0x03, 0x12, 0x0b, 0x0a, 0x07, 0x55, 0x6e, 0x6b, 0x6e, 0x6f, 0x77, 0x6e, 0x10, - 0x04, 0x42, 0x3a, 0x5a, 0x38, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, - 0x64, 0x61, 0x6f, 0x73, 0x2d, 0x73, 0x74, 0x61, 0x63, 0x6b, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2f, - 0x73, 0x72, 0x63, 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x2f, 0x63, 0x6f, 0x6d, 0x6d, - 0x6f, 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x6d, 0x67, 0x6d, 0x74, 0x62, 0x06, 0x70, - 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x6e, 0x6b, 0x73, 0x22, 0x29, 0x0a, 0x0f, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x65, 0x74, 0x50, 0x72, + 0x6f, 0x70, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, + 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x22, 0x83, + 0x01, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x47, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, + 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, + 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x02, 0x69, 0x64, 0x12, 0x32, 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, + 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, + 0x6f, 0x6f, 0x6c, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x79, 0x52, 0x0a, 0x70, 0x72, 0x6f, + 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, + 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, + 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x5d, 0x0a, 0x0f, 0x50, 0x6f, 0x6f, 0x6c, 0x47, 0x65, 0x74, 0x50, + 0x72, 0x6f, 0x70, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, + 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, + 0x32, 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x18, 0x02, 0x20, + 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x50, + 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x79, 0x52, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, + 0x69, 0x65, 0x73, 0x22, 0x4f, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x55, 0x70, 0x67, 0x72, 0x61, + 0x64, 0x65, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, + 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, + 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x81, 0x01, 0x0a, 0x12, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, + 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, + 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, + 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x12, 0x0a, + 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, + 0x6b, 0x12, 0x18, 0x0a, 0x07, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x18, 0x04, 0x20, 0x03, + 0x28, 0x0d, 0x52, 0x07, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x73, + 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, + 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x75, 0x0a, 0x12, 0x53, 0x74, 0x6f, 0x72, + 0x61, 0x67, 0x65, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x55, 0x73, 0x61, 0x67, 0x65, 0x12, 0x14, + 0x0a, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x04, 0x52, 0x05, 0x74, + 0x6f, 0x74, 0x61, 0x6c, 0x12, 0x12, 0x0a, 0x04, 0x66, 0x72, 0x65, 0x65, 0x18, 0x02, 0x20, 0x01, + 0x28, 0x04, 0x52, 0x04, 0x66, 0x72, 0x65, 0x65, 0x12, 0x35, 0x0a, 0x0a, 0x6d, 0x65, 0x64, 0x69, + 0x61, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x16, 0x2e, 0x6d, + 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x4d, 0x65, 0x64, 0x69, 0x61, + 0x54, 0x79, 0x70, 0x65, 0x52, 0x09, 0x6d, 0x65, 0x64, 0x69, 0x61, 0x54, 0x79, 0x70, 0x65, 0x22, + 0xb8, 0x02, 0x0a, 0x13, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, + 0x67, 0x65, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x12, 0x3b, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, + 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x25, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, + 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x49, 0x6e, 0x66, + 0x6f, 0x2e, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, 0x73, + 0x74, 0x61, 0x74, 0x65, 0x12, 0x2e, 0x0a, 0x05, 0x73, 0x70, 0x61, 0x63, 0x65, 0x18, 0x03, 0x20, + 0x03, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x61, + 0x67, 0x65, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x55, 0x73, 0x61, 0x67, 0x65, 0x52, 0x05, 0x73, + 0x70, 0x61, 0x63, 0x65, 0x12, 0x24, 0x0a, 0x0e, 0x6d, 0x65, 0x6d, 0x5f, 0x66, 0x69, 0x6c, 0x65, + 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x04, 0x52, 0x0c, 0x6d, 0x65, + 0x6d, 0x46, 0x69, 0x6c, 0x65, 0x42, 0x79, 0x74, 0x65, 0x73, 0x12, 0x27, 0x0a, 0x10, 0x6d, 0x64, + 0x5f, 0x6f, 0x6e, 0x5f, 0x73, 0x73, 0x64, 0x5f, 0x61, 0x63, 0x74, 0x69, 0x76, 0x65, 0x18, 0x05, + 0x20, 0x01, 0x28, 0x08, 0x52, 0x0d, 0x6d, 0x64, 0x4f, 0x6e, 0x53, 0x73, 0x64, 0x41, 0x63, 0x74, + 0x69, 0x76, 0x65, 0x22, 0x5f, 0x0a, 0x0b, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x53, 0x74, 0x61, + 0x74, 0x65, 0x12, 0x11, 0x0a, 0x0d, 0x53, 0x54, 0x41, 0x54, 0x45, 0x5f, 0x55, 0x4e, 0x4b, 0x4e, + 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, 0x0c, 0x0a, 0x08, 0x44, 0x4f, 0x57, 0x4e, 0x5f, 0x4f, 0x55, + 0x54, 0x10, 0x01, 0x12, 0x08, 0x0a, 0x04, 0x44, 0x4f, 0x57, 0x4e, 0x10, 0x02, 0x12, 0x06, 0x0a, + 0x02, 0x55, 0x50, 0x10, 0x03, 0x12, 0x09, 0x0a, 0x05, 0x55, 0x50, 0x5f, 0x49, 0x4e, 0x10, 0x04, + 0x12, 0x07, 0x0a, 0x03, 0x4e, 0x45, 0x57, 0x10, 0x05, 0x12, 0x09, 0x0a, 0x05, 0x44, 0x52, 0x41, + 0x49, 0x4e, 0x10, 0x06, 0x4a, 0x04, 0x08, 0x01, 0x10, 0x02, 0x22, 0x5e, 0x0a, 0x13, 0x50, 0x6f, + 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x52, 0x65, 0x73, + 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x2f, 0x0a, 0x05, 0x69, 0x6e, 0x66, + 0x6f, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, + 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x49, + 0x6e, 0x66, 0x6f, 0x52, 0x05, 0x69, 0x6e, 0x66, 0x6f, 0x73, 0x22, 0x54, 0x0a, 0x13, 0x50, 0x6f, + 0x6f, 0x6c, 0x52, 0x65, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x53, 0x74, 0x61, 0x72, 0x74, 0x52, 0x65, + 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, + 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x02, 0x69, 0x64, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, + 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, + 0x22, 0x69, 0x0a, 0x12, 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x65, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x53, + 0x74, 0x6f, 0x70, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x66, 0x6f, 0x72, 0x63, + 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x08, 0x52, 0x05, 0x66, 0x6f, 0x72, 0x63, 0x65, 0x12, 0x1b, + 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, + 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x76, 0x0a, 0x13, 0x50, + 0x6f, 0x6f, 0x6c, 0x53, 0x65, 0x6c, 0x66, 0x48, 0x65, 0x61, 0x6c, 0x45, 0x76, 0x61, 0x6c, 0x52, + 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x02, 0x69, 0x64, 0x12, 0x20, 0x0a, 0x0c, 0x73, 0x79, 0x73, 0x5f, 0x70, 0x72, 0x6f, 0x70, + 0x5f, 0x76, 0x61, 0x6c, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x73, 0x79, 0x73, 0x50, + 0x72, 0x6f, 0x70, 0x56, 0x61, 0x6c, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, + 0x6e, 0x6b, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, + 0x6e, 0x6b, 0x73, 0x2a, 0x25, 0x0a, 0x10, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x4d, 0x65, + 0x64, 0x69, 0x61, 0x54, 0x79, 0x70, 0x65, 0x12, 0x07, 0x0a, 0x03, 0x53, 0x43, 0x4d, 0x10, 0x00, + 0x12, 0x08, 0x0a, 0x04, 0x4e, 0x56, 0x4d, 0x45, 0x10, 0x01, 0x2a, 0x5d, 0x0a, 0x10, 0x50, 0x6f, + 0x6f, 0x6c, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x0c, + 0x0a, 0x08, 0x43, 0x72, 0x65, 0x61, 0x74, 0x69, 0x6e, 0x67, 0x10, 0x00, 0x12, 0x09, 0x0a, 0x05, + 0x52, 0x65, 0x61, 0x64, 0x79, 0x10, 0x01, 0x12, 0x0e, 0x0a, 0x0a, 0x44, 0x65, 0x73, 0x74, 0x72, + 0x6f, 0x79, 0x69, 0x6e, 0x67, 0x10, 0x02, 0x12, 0x13, 0x0a, 0x0f, 0x54, 0x61, 0x72, 0x67, 0x65, + 0x74, 0x73, 0x45, 0x78, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x64, 0x10, 0x03, 0x12, 0x0b, 0x0a, 0x07, + 0x55, 0x6e, 0x6b, 0x6e, 0x6f, 0x77, 0x6e, 0x10, 0x04, 0x42, 0x3a, 0x5a, 0x38, 0x67, 0x69, 0x74, + 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2d, 0x73, 0x74, 0x61, + 0x63, 0x6b, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2f, 0x73, 0x72, 0x63, 0x2f, 0x63, 0x6f, 0x6e, 0x74, + 0x72, 0x6f, 0x6c, 0x2f, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x2f, 0x6d, 0x67, 0x6d, 0x74, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( @@ -3462,21 +3489,22 @@ var file_mgmt_pool_proto_depIdxs = []int32{ 40, // 2: mgmt.ListContResp.containers:type_name -> mgmt.ListContResp.Cont 0, // 3: mgmt.StorageUsageStats.media_type:type_name -> mgmt.StorageMediaType 2, // 4: mgmt.PoolRebuildStatus.state:type_name -> mgmt.PoolRebuildStatus.State - 24, // 5: mgmt.PoolQueryResp.rebuild:type_name -> mgmt.PoolRebuildStatus - 23, // 6: mgmt.PoolQueryResp.tier_stats:type_name -> mgmt.StorageUsageStats - 1, // 7: mgmt.PoolQueryResp.state:type_name -> mgmt.PoolServiceState - 26, // 8: mgmt.PoolSetPropReq.properties:type_name -> mgmt.PoolProperty - 26, // 9: mgmt.PoolGetPropReq.properties:type_name -> mgmt.PoolProperty - 26, // 10: mgmt.PoolGetPropResp.properties:type_name -> mgmt.PoolProperty - 0, // 11: mgmt.StorageTargetUsage.media_type:type_name -> mgmt.StorageMediaType - 3, // 12: mgmt.PoolQueryTargetInfo.state:type_name -> mgmt.PoolQueryTargetInfo.TargetState - 33, // 13: mgmt.PoolQueryTargetInfo.space:type_name -> mgmt.StorageTargetUsage - 34, // 14: mgmt.PoolQueryTargetResp.infos:type_name -> mgmt.PoolQueryTargetInfo - 15, // [15:15] is the sub-list for method output_type - 15, // [15:15] is the sub-list for method input_type - 15, // [15:15] is the sub-list for extension type_name - 15, // [15:15] is the sub-list for extension extendee - 0, // [0:15] is the sub-list for field type_name + 2, // 5: mgmt.PoolRebuildStatus.derived_state:type_name -> mgmt.PoolRebuildStatus.State + 24, // 6: mgmt.PoolQueryResp.rebuild:type_name -> mgmt.PoolRebuildStatus + 23, // 7: mgmt.PoolQueryResp.tier_stats:type_name -> mgmt.StorageUsageStats + 1, // 8: mgmt.PoolQueryResp.state:type_name -> mgmt.PoolServiceState + 26, // 9: mgmt.PoolSetPropReq.properties:type_name -> mgmt.PoolProperty + 26, // 10: mgmt.PoolGetPropReq.properties:type_name -> mgmt.PoolProperty + 26, // 11: mgmt.PoolGetPropResp.properties:type_name -> mgmt.PoolProperty + 0, // 12: mgmt.StorageTargetUsage.media_type:type_name -> mgmt.StorageMediaType + 3, // 13: mgmt.PoolQueryTargetInfo.state:type_name -> mgmt.PoolQueryTargetInfo.TargetState + 33, // 14: mgmt.PoolQueryTargetInfo.space:type_name -> mgmt.StorageTargetUsage + 34, // 15: mgmt.PoolQueryTargetResp.infos:type_name -> mgmt.PoolQueryTargetInfo + 16, // [16:16] is the sub-list for method output_type + 16, // [16:16] is the sub-list for method input_type + 16, // [16:16] is the sub-list for extension type_name + 16, // [16:16] is the sub-list for extension extendee + 0, // [0:16] is the sub-list for field type_name } func init() { file_mgmt_pool_proto_init() } diff --git a/src/control/lib/control/pool.go b/src/control/lib/control/pool.go index bd9a3504a2e..afc55ead998 100644 --- a/src/control/lib/control/pool.go +++ b/src/control/lib/control/pool.go @@ -582,6 +582,10 @@ func poolQueryInt(ctx context.Context, rpcClient UnaryInvoker, req *PoolQueryReq return nil, err } + if err := resp.UpdateRebuildStatus(); err != nil { + return nil, err + } + if req.QueryMask.HasOption(daos.PoolQueryOptionSelfHealPolicy) { if err := resp.UpdateSelfHealPolicy(ctx, rpcClient); err != nil { return nil, errors.Wrap(err, "pool get-prop self_heal failed") diff --git a/src/control/lib/control/pool_test.go b/src/control/lib/control/pool_test.go index d93e0c5c8ce..eed5044a5f9 100644 --- a/src/control/lib/control/pool_test.go +++ b/src/control/lib/control/pool_test.go @@ -1,6 +1,6 @@ // // (C) Copyright 2020-2024 Intel Corporation. -// (C) Copyright 2025 Hewlett Packard Enterprise Development LP +// (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -1882,9 +1882,10 @@ func TestControl_PoolQuery(t *testing.T) { ActiveTargets: 42, State: daos.PoolServiceStateReady, Rebuild: &daos.PoolRebuildStatus{ - State: daos.PoolRebuildStateIdle, - Objects: 1, - Records: 2, + State: daos.PoolRebuildStateIdle, + DerivedState: daos.PoolRebuildStateIdle, + Objects: 1, + Records: 2, }, TierStats: []*daos.StorageUsageStats{ { @@ -1942,9 +1943,10 @@ func TestControl_PoolQuery(t *testing.T) { ActiveTargets: 42, State: daos.PoolServiceStateReady, Rebuild: &daos.PoolRebuildStatus{ - State: daos.PoolRebuildStateIdle, - Objects: 1, - Records: 2, + State: daos.PoolRebuildStateIdle, + DerivedState: daos.PoolRebuildStateIdle, + Objects: 1, + Records: 2, }, TierStats: []*daos.StorageUsageStats{ { @@ -2008,6 +2010,192 @@ func TestControl_PoolQuery(t *testing.T) { }, expErr: errors.New("> 1 occurrences of prop 4 in resp"), }, + "query with rebuild state busy with DER_OP_CANCELED (stopping)": { + mic: &MockInvokerConfig{ + UnaryResponse: MockMSResponse("host1", nil, + &mgmtpb.PoolQueryResp{ + Uuid: poolUUID.String(), + TotalTargets: 42, + ActiveTargets: 42, + State: mgmtpb.PoolServiceState_Ready, + Rebuild: &mgmtpb.PoolRebuildStatus{ + Status: int32(daos.OpCanceled), + State: mgmtpb.PoolRebuildStatus_BUSY, + Objects: 100, + Records: 500, + }, + }, + ), + }, + expResp: &PoolQueryResp{ + PoolInfo: daos.PoolInfo{ + UUID: poolUUID, + TotalTargets: 42, + ActiveTargets: 42, + State: daos.PoolServiceStateReady, + Rebuild: &daos.PoolRebuildStatus{ + Status: int32(daos.OpCanceled), + State: daos.PoolRebuildStateBusy, + DerivedState: daos.PoolRebuildStateStopping, + Objects: 100, + Records: 500, + }, + }, + }, + }, + "query with rebuild state idle with DER_OP_CANCELED (stopped)": { + mic: &MockInvokerConfig{ + UnaryResponse: MockMSResponse("host1", nil, + &mgmtpb.PoolQueryResp{ + Uuid: poolUUID.String(), + TotalTargets: 42, + ActiveTargets: 42, + State: mgmtpb.PoolServiceState_Ready, + Rebuild: &mgmtpb.PoolRebuildStatus{ + Status: int32(daos.OpCanceled), + State: mgmtpb.PoolRebuildStatus_IDLE, + Objects: 0, + Records: 0, + }, + }, + ), + }, + expResp: &PoolQueryResp{ + PoolInfo: daos.PoolInfo{ + UUID: poolUUID, + TotalTargets: 42, + ActiveTargets: 42, + State: daos.PoolServiceStateReady, + Rebuild: &daos.PoolRebuildStatus{ + Status: int32(daos.OpCanceled), + State: daos.PoolRebuildStateIdle, + DerivedState: daos.PoolRebuildStateStopped, + Objects: 0, + Records: 0, + }, + }, + }, + }, + "query with rebuild state busy with error (failing)": { + mic: &MockInvokerConfig{ + UnaryResponse: MockMSResponse("host1", nil, + &mgmtpb.PoolQueryResp{ + Uuid: poolUUID.String(), + TotalTargets: 42, + ActiveTargets: 42, + State: mgmtpb.PoolServiceState_Ready, + Rebuild: &mgmtpb.PoolRebuildStatus{ + State: mgmtpb.PoolRebuildStatus_BUSY, + Status: -1, + Objects: 75, + Records: 300, + }, + }, + ), + }, + expResp: &PoolQueryResp{ + PoolInfo: daos.PoolInfo{ + UUID: poolUUID, + TotalTargets: 42, + ActiveTargets: 42, + State: daos.PoolServiceStateReady, + Rebuild: &daos.PoolRebuildStatus{ + State: daos.PoolRebuildStateBusy, + DerivedState: daos.PoolRebuildStateFailing, + Status: -1, + Objects: 75, + Records: 300, + }, + }, + }, + }, + "query with rebuild state idle with error (failed)": { + mic: &MockInvokerConfig{ + UnaryResponse: MockMSResponse("host1", nil, + &mgmtpb.PoolQueryResp{ + Uuid: poolUUID.String(), + TotalTargets: 42, + ActiveTargets: 42, + State: mgmtpb.PoolServiceState_Ready, + Rebuild: &mgmtpb.PoolRebuildStatus{ + State: mgmtpb.PoolRebuildStatus_IDLE, + Status: -5, + }, + }, + ), + }, + expResp: &PoolQueryResp{ + PoolInfo: daos.PoolInfo{ + UUID: poolUUID, + TotalTargets: 42, + ActiveTargets: 42, + State: daos.PoolServiceStateReady, + Rebuild: &daos.PoolRebuildStatus{ + State: daos.PoolRebuildStateIdle, + DerivedState: daos.PoolRebuildStateFailed, + Status: -5, + }, + }, + }, + }, + "query with rebuild state done": { + mic: &MockInvokerConfig{ + UnaryResponse: MockMSResponse("host1", nil, + &mgmtpb.PoolQueryResp{ + Uuid: poolUUID.String(), + TotalTargets: 42, + ActiveTargets: 42, + State: mgmtpb.PoolServiceState_Ready, + Rebuild: &mgmtpb.PoolRebuildStatus{ + State: mgmtpb.PoolRebuildStatus_DONE, + Objects: 200, + Records: 1000, + }, + }, + ), + }, + expResp: &PoolQueryResp{ + PoolInfo: daos.PoolInfo{ + UUID: poolUUID, + TotalTargets: 42, + ActiveTargets: 42, + State: daos.PoolServiceStateReady, + Rebuild: &daos.PoolRebuildStatus{ + State: daos.PoolRebuildStateDone, + DerivedState: daos.PoolRebuildStateDone, + Objects: 200, + Records: 1000, + }, + }, + }, + }, + "query with rebuild state idle": { + mic: &MockInvokerConfig{ + UnaryResponse: MockMSResponse("host1", nil, + &mgmtpb.PoolQueryResp{ + Uuid: poolUUID.String(), + TotalTargets: 42, + ActiveTargets: 42, + State: mgmtpb.PoolServiceState_Ready, + Rebuild: &mgmtpb.PoolRebuildStatus{ + State: mgmtpb.PoolRebuildStatus_IDLE, + }, + }, + ), + }, + expResp: &PoolQueryResp{ + PoolInfo: daos.PoolInfo{ + UUID: poolUUID, + TotalTargets: 42, + ActiveTargets: 42, + State: daos.PoolServiceStateReady, + Rebuild: &daos.PoolRebuildStatus{ + State: daos.PoolRebuildStateIdle, + DerivedState: daos.PoolRebuildStateIdle, + }, + }, + }, + }, } { t.Run(name, func(t *testing.T) { log, buf := logging.NewTestLogger(t.Name()) @@ -2572,9 +2760,10 @@ func TestControl_ListPools(t *testing.T) { rebuildState = daos.PoolRebuildStateBusy } return &daos.PoolRebuildStatus{ - State: rebuildState, - Objects: 1, - Records: 2, + State: rebuildState, + DerivedState: rebuildState, + Objects: 1, + Records: 2, } } expTierStats := []*daos.StorageUsageStats{ diff --git a/src/control/lib/daos/api/pool.go b/src/control/lib/daos/api/pool.go index 57e5f03a096..9eac8929555 100644 --- a/src/control/lib/daos/api/pool.go +++ b/src/control/lib/daos/api/pool.go @@ -407,6 +407,10 @@ func PoolQuery(ctx context.Context, sysName, poolID string, queryMask daos.PoolQ } } + if err := poolInfo.UpdateRebuildStatus(); err != nil { + return nil, err + } + return poolInfo, nil } diff --git a/src/control/lib/daos/pool.go b/src/control/lib/daos/pool.go index d51b6312cf6..88dc167058d 100644 --- a/src/control/lib/daos/pool.go +++ b/src/control/lib/daos/pool.go @@ -1,6 +1,6 @@ // // (C) Copyright 2020-2024 Intel Corporation. -// (C) Copyright 2025 Hewlett Packard Enterprise Development LP +// (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP // (C) Copyright 2025 Google LLC // // SPDX-License-Identifier: BSD-2-Clause-Patent @@ -57,6 +57,7 @@ type ( PoolRebuildStatus struct { Status int32 `json:"status"` State PoolRebuildState `json:"state"` + DerivedState PoolRebuildState `json:"derived_state"` Objects uint64 `json:"objects"` Records uint64 `json:"records"` TotalObjects uint64 `json:"total_objects"` @@ -313,6 +314,40 @@ func (pi *PoolInfo) RebuildState() string { return pi.Rebuild.State.String() } +// UpdateRebuildStatus evaluates a derived state to indicate transient rebuild conditions. +func (pi *PoolInfo) UpdateRebuildStatus() error { + if pi.Rebuild == nil { + return nil + } + if pi.Rebuild.State > PoolRebuildStateDone { + return errors.New("illegal rebuild state value") + } + ds := pi.Rebuild.State + + switch pi.Rebuild.State { + case PoolRebuildStateIdle: + if pi.Rebuild.Status == int32(OpCanceled) { + ds = PoolRebuildStateStopped + } else if pi.Rebuild.Status != 0 { + ds = PoolRebuildStateFailed + } + case PoolRebuildStateDone: + if pi.Rebuild.Status != 0 { + ds = PoolRebuildStateFailed + } + case PoolRebuildStateBusy: + if pi.Rebuild.Status == int32(OpCanceled) { + ds = PoolRebuildStateStopping + } else if pi.Rebuild.Status != 0 { + ds = PoolRebuildStateFailing + } + } + + pi.Rebuild.DerivedState = ds + + return nil +} + // Name retrieves effective name for pool from either label or UUID. func (pi *PoolInfo) Name() string { name := pi.Label @@ -428,6 +463,14 @@ const ( PoolRebuildStateDone = PoolRebuildState(mgmtpb.PoolRebuildStatus_DONE) // PoolRebuildStateBusy indicates that the rebuild process is in progress. PoolRebuildStateBusy = PoolRebuildState(mgmtpb.PoolRebuildStatus_BUSY) + // PoolRebuildStateStopping indicates that the rebuild process is stopping (transient). + PoolRebuildStateStopping = PoolRebuildState(mgmtpb.PoolRebuildStatus_STOPPING) + // PoolRebuildStateStopped indicates that the rebuild process has stopped. + PoolRebuildStateStopped = PoolRebuildState(mgmtpb.PoolRebuildStatus_STOPPED) + // PoolRebuildStateFailing indicates that the rebuild process is failing (transient). + PoolRebuildStateFailing = PoolRebuildState(mgmtpb.PoolRebuildStatus_FAILING) + // PoolRebuildStateFailed indicates that the rebuild process has failed. + PoolRebuildStateFailed = PoolRebuildState(mgmtpb.PoolRebuildStatus_FAILED) ) func (prs PoolRebuildState) String() string { diff --git a/src/control/lib/daos/pool_test.go b/src/control/lib/daos/pool_test.go index 39ed135858c..5c90e03784d 100644 --- a/src/control/lib/daos/pool_test.go +++ b/src/control/lib/daos/pool_test.go @@ -1,6 +1,6 @@ // // (C) Copyright 2020-2024 Intel Corporation. -// (C) Copyright 2025 Hewlett Packard Enterprise Development LP +// (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -338,3 +338,226 @@ func TestDaos_PoolQueryMaskUnmarshalJSON(t *testing.T) { }) } } + +func TestDaos_PoolRebuildState_String(t *testing.T) { + for name, tc := range map[string]struct { + state PoolRebuildState + expString string + }{ + "idle": {PoolRebuildStateIdle, "idle"}, + "busy": {PoolRebuildStateBusy, "busy"}, + "done": {PoolRebuildStateDone, "done"}, + "stopping": {PoolRebuildStateStopping, "stopping"}, + "stopped": {PoolRebuildStateStopped, "stopped"}, + "failing": {PoolRebuildStateFailing, "failing"}, + "failed": {PoolRebuildStateFailed, "failed"}, + "unknown": {PoolRebuildState(999), "unknown"}, + } { + t.Run(name, func(t *testing.T) { + gotString := tc.state.String() + + test.AssertEqual(t, tc.expString, gotString, "unexpected string value") + }) + } +} + +func TestDaos_PoolRebuildState_MarshalJSON(t *testing.T) { + for name, tc := range map[string]struct { + state PoolRebuildState + expJSON string + expErr error + }{ + "idle": {PoolRebuildStateIdle, `"idle"`, nil}, + "busy": {PoolRebuildStateBusy, `"busy"`, nil}, + "done": {PoolRebuildStateDone, `"done"`, nil}, + "stopping": {PoolRebuildStateStopping, `"stopping"`, nil}, + "stopped": {PoolRebuildStateStopped, `"stopped"`, nil}, + "failing": {PoolRebuildStateFailing, `"failing"`, nil}, + "failed": {PoolRebuildStateFailed, `"failed"`, nil}, + "unknown": {PoolRebuildState(999), `"unknown"`, nil}, + } { + t.Run(name, func(t *testing.T) { + gotJSON, gotErr := tc.state.MarshalJSON() + + test.CmpErr(t, tc.expErr, gotErr) + if tc.expErr != nil { + return + } + + test.AssertEqual(t, tc.expJSON, string(gotJSON), "unexpected JSON") + }) + } +} + +func TestDaos_PoolRebuildState_UnmarshalJSON(t *testing.T) { + for name, tc := range map[string]struct { + json string + expState PoolRebuildState + expErr error + }{ + "idle": {`"idle"`, PoolRebuildStateIdle, nil}, + "busy": {`"busy"`, PoolRebuildStateBusy, nil}, + "done": {`"done"`, PoolRebuildStateDone, nil}, + "stopping": {`"stopping"`, PoolRebuildStateStopping, nil}, + "stopped": {`"stopped"`, PoolRebuildStateStopped, nil}, + "failing": {`"failing"`, PoolRebuildStateFailing, nil}, + "failed": {`"failed"`, PoolRebuildStateFailed, nil}, + "uppercase idle": {`"IDLE"`, PoolRebuildStateIdle, nil}, + "uppercase busy": {`"BUSY"`, PoolRebuildStateBusy, nil}, + "uppercase done": {`"DONE"`, PoolRebuildStateDone, nil}, + "uppercase stopping": {`"STOPPING"`, PoolRebuildStateStopping, nil}, + "uppercase stopped": {`"STOPPED"`, PoolRebuildStateStopped, nil}, + "uppercase failing": {`"FAILING"`, PoolRebuildStateFailing, nil}, + "uppercase failed": {`"FAILED"`, PoolRebuildStateFailed, nil}, + "mixed case stopped": {`"StOpPeD"`, PoolRebuildStateStopped, nil}, + "invalid": {`"invalid"`, PoolRebuildState(0), errors.New("failed to unmarshal")}, + "empty": {`""`, PoolRebuildState(0), errors.New("failed to unmarshal")}, + } { + t.Run(name, func(t *testing.T) { + var gotState PoolRebuildState + gotErr := gotState.UnmarshalJSON([]byte(tc.json)) + + test.CmpErr(t, tc.expErr, gotErr) + if tc.expErr != nil { + return + } + + test.AssertEqual(t, tc.expState, gotState, "unexpected state") + }) + } +} + +func TestDaos_PoolInfo_UpdateRebuildStatus(t *testing.T) { + for name, tc := range map[string]struct { + poolInfo *PoolInfo + expDerivedState PoolRebuildState + expErr error + }{ + "nil rebuild status": { + poolInfo: &PoolInfo{}, + }, + "idle state with status 0": { + poolInfo: &PoolInfo{ + Rebuild: &PoolRebuildStatus{ + State: PoolRebuildStateIdle, + }, + }, + expDerivedState: PoolRebuildStateIdle, + }, + "idle state with canceled status": { + poolInfo: &PoolInfo{ + Rebuild: &PoolRebuildStatus{ + State: PoolRebuildStateIdle, + Status: int32(OpCanceled), + }, + }, + expDerivedState: PoolRebuildStateStopped, + }, + "idle state with non-zero non-canceled status": { + poolInfo: &PoolInfo{ + Rebuild: &PoolRebuildStatus{ + State: PoolRebuildStateIdle, + Status: -1008, + }, + }, + expDerivedState: PoolRebuildStateFailed, + }, + "done state with status 0": { + poolInfo: &PoolInfo{ + Rebuild: &PoolRebuildStatus{ + State: PoolRebuildStateDone, + }, + }, + expDerivedState: PoolRebuildStateDone, + }, + "done state with non-zero status": { + poolInfo: &PoolInfo{ + Rebuild: &PoolRebuildStatus{ + State: PoolRebuildStateDone, + Status: -1009, + }, + }, + expDerivedState: PoolRebuildStateFailed, + }, + "busy state with status 0": { + poolInfo: &PoolInfo{ + Rebuild: &PoolRebuildStatus{ + State: PoolRebuildStateBusy, + }, + }, + expDerivedState: PoolRebuildStateBusy, + }, + "busy state with canceled status": { + poolInfo: &PoolInfo{ + Rebuild: &PoolRebuildStatus{ + State: PoolRebuildStateBusy, + Status: int32(OpCanceled), + }, + }, + expDerivedState: PoolRebuildStateStopping, + }, + "busy state with non-zero non-canceled status": { + poolInfo: &PoolInfo{ + Rebuild: &PoolRebuildStatus{ + State: PoolRebuildStateBusy, + Status: -1010, + }, + }, + expDerivedState: PoolRebuildStateFailing, + }, + "illegal stopped state": { + poolInfo: &PoolInfo{ + Rebuild: &PoolRebuildStatus{ + State: PoolRebuildStateStopped, + }, + }, + expErr: errors.New("illegal rebuild state"), + }, + "illegal stopping state": { + poolInfo: &PoolInfo{ + Rebuild: &PoolRebuildStatus{ + State: PoolRebuildStateStopping, + }, + }, + expErr: errors.New("illegal rebuild state"), + }, + "illegal failed state": { + poolInfo: &PoolInfo{ + Rebuild: &PoolRebuildStatus{ + State: PoolRebuildStateFailed, + }, + }, + expErr: errors.New("illegal rebuild state"), + }, + "illegal failing state": { + poolInfo: &PoolInfo{ + Rebuild: &PoolRebuildStatus{ + State: PoolRebuildStateFailing, + }, + }, + expErr: errors.New("illegal rebuild state"), + }, + "illegal rebuild state value": { + poolInfo: &PoolInfo{ + Rebuild: &PoolRebuildStatus{ + State: PoolRebuildState(999), + }, + }, + expErr: errors.New("illegal rebuild state value"), + }, + } { + t.Run(name, func(t *testing.T) { + gotErr := tc.poolInfo.UpdateRebuildStatus() + + test.CmpErr(t, tc.expErr, gotErr) + if tc.expErr != nil { + return + } + + if tc.poolInfo.Rebuild != nil { + test.AssertEqual(t, tc.expDerivedState, tc.poolInfo.Rebuild.DerivedState, + "unexpected derived state") + } + }) + } +} diff --git a/src/control/lib/daos/status.go b/src/control/lib/daos/status.go index 54099f31a2f..3de2435be64 100644 --- a/src/control/lib/daos/status.go +++ b/src/control/lib/daos/status.go @@ -1,5 +1,6 @@ // // (C) Copyright 2019-2024 Intel Corporation. +// (C) Copyright 2026 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -120,6 +121,8 @@ const ( MercuryFatalError Status = -C.DER_HG_FATAL // NoService indicates the pool service is not up and didn't process the pool request NoService Status = -C.DER_NO_SERVICE + // OpCanceled indicates that an operation was cancelled (non-crt). + OpCanceled = -C.DER_OP_CANCELED ) const ( diff --git a/src/mgmt/pool.pb-c.c b/src/mgmt/pool.pb-c.c index 3c59fbd0523..82c188b5609 100644 --- a/src/mgmt/pool.pb-c.c +++ b/src/mgmt/pool.pb-c.c @@ -3327,20 +3327,28 @@ const ProtobufCMessageDescriptor mgmt__storage_usage_stats__descriptor = (ProtobufCMessageInit) mgmt__storage_usage_stats__init, NULL,NULL,NULL /* reserved[123] */ }; -static const ProtobufCEnumValue mgmt__pool_rebuild_status__state__enum_values_by_number[3] = +static const ProtobufCEnumValue mgmt__pool_rebuild_status__state__enum_values_by_number[7] = { { "BUSY", "MGMT__POOL_REBUILD_STATUS__STATE__BUSY", 0 }, { "IDLE", "MGMT__POOL_REBUILD_STATUS__STATE__IDLE", 1 }, { "DONE", "MGMT__POOL_REBUILD_STATUS__STATE__DONE", 2 }, + { "STOPPING", "MGMT__POOL_REBUILD_STATUS__STATE__STOPPING", 3 }, + { "STOPPED", "MGMT__POOL_REBUILD_STATUS__STATE__STOPPED", 4 }, + { "FAILING", "MGMT__POOL_REBUILD_STATUS__STATE__FAILING", 5 }, + { "FAILED", "MGMT__POOL_REBUILD_STATUS__STATE__FAILED", 6 }, }; static const ProtobufCIntRange mgmt__pool_rebuild_status__state__value_ranges[] = { -{0, 0},{0, 3} +{0, 0},{0, 7} }; -static const ProtobufCEnumValueIndex mgmt__pool_rebuild_status__state__enum_values_by_name[3] = +static const ProtobufCEnumValueIndex mgmt__pool_rebuild_status__state__enum_values_by_name[7] = { { "BUSY", 0 }, { "DONE", 2 }, + { "FAILED", 6 }, + { "FAILING", 5 }, { "IDLE", 1 }, + { "STOPPED", 4 }, + { "STOPPING", 3 }, }; const ProtobufCEnumDescriptor mgmt__pool_rebuild_status__state__descriptor = { @@ -3349,15 +3357,15 @@ const ProtobufCEnumDescriptor mgmt__pool_rebuild_status__state__descriptor = "State", "Mgmt__PoolRebuildStatus__State", "mgmt", - 3, + 7, mgmt__pool_rebuild_status__state__enum_values_by_number, - 3, + 7, mgmt__pool_rebuild_status__state__enum_values_by_name, 1, mgmt__pool_rebuild_status__state__value_ranges, NULL,NULL,NULL,NULL /* reserved[1234] */ }; -static const ProtobufCFieldDescriptor mgmt__pool_rebuild_status__field_descriptors[4] = +static const ProtobufCFieldDescriptor mgmt__pool_rebuild_status__field_descriptors[5] = { { "status", @@ -3407,8 +3415,21 @@ static const ProtobufCFieldDescriptor mgmt__pool_rebuild_status__field_descripto 0, /* flags */ 0,NULL,NULL /* reserved1,reserved2, etc */ }, + { + "derived_state", + 5, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_ENUM, + 0, /* quantifier_offset */ + offsetof(Mgmt__PoolRebuildStatus, derived_state), + &mgmt__pool_rebuild_status__state__descriptor, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, }; static const unsigned mgmt__pool_rebuild_status__field_indices_by_name[] = { + 4, /* field[4] = derived_state */ 2, /* field[2] = objects */ 3, /* field[3] = records */ 1, /* field[1] = state */ @@ -3417,7 +3438,7 @@ static const unsigned mgmt__pool_rebuild_status__field_indices_by_name[] = { static const ProtobufCIntRange mgmt__pool_rebuild_status__number_ranges[1 + 1] = { { 1, 0 }, - { 0, 4 } + { 0, 5 } }; const ProtobufCMessageDescriptor mgmt__pool_rebuild_status__descriptor = { @@ -3427,7 +3448,7 @@ const ProtobufCMessageDescriptor mgmt__pool_rebuild_status__descriptor = "Mgmt__PoolRebuildStatus", "mgmt", sizeof(Mgmt__PoolRebuildStatus), - 4, + 5, mgmt__pool_rebuild_status__field_descriptors, mgmt__pool_rebuild_status__field_indices_by_name, 1, mgmt__pool_rebuild_status__number_ranges, diff --git a/src/mgmt/pool.pb-c.h b/src/mgmt/pool.pb-c.h index d357bc1f33b..a41ebb658ce 100644 --- a/src/mgmt/pool.pb-c.h +++ b/src/mgmt/pool.pb-c.h @@ -57,18 +57,13 @@ typedef struct _Mgmt__PoolSelfHealEvalReq Mgmt__PoolSelfHealEvalReq; /* --- enums --- */ typedef enum _Mgmt__PoolRebuildStatus__State { - /* - * DRS_IN_PROGRESS - */ MGMT__POOL_REBUILD_STATUS__STATE__BUSY = 0, - /* - * DRS_NOT_STARTED - */ MGMT__POOL_REBUILD_STATUS__STATE__IDLE = 1, - /* - * DRS_COMPLETED - */ - MGMT__POOL_REBUILD_STATUS__STATE__DONE = 2 + MGMT__POOL_REBUILD_STATUS__STATE__DONE = 2, + MGMT__POOL_REBUILD_STATUS__STATE__STOPPING = 3, + MGMT__POOL_REBUILD_STATUS__STATE__STOPPED = 4, + MGMT__POOL_REBUILD_STATUS__STATE__FAILING = 5, + MGMT__POOL_REBUILD_STATUS__STATE__FAILED = 6 PROTOBUF_C__FORCE_ENUM_TO_BE_INT_SIZE(MGMT__POOL_REBUILD_STATUS__STATE) } Mgmt__PoolRebuildStatus__State; typedef enum _Mgmt__PoolQueryTargetInfo__TargetState { @@ -779,10 +774,11 @@ struct _Mgmt__PoolRebuildStatus Mgmt__PoolRebuildStatus__State state; uint64_t objects; uint64_t records; + Mgmt__PoolRebuildStatus__State derived_state; }; #define MGMT__POOL_REBUILD_STATUS__INIT \ { PROTOBUF_C_MESSAGE_INIT (&mgmt__pool_rebuild_status__descriptor) \ - , 0, MGMT__POOL_REBUILD_STATUS__STATE__BUSY, 0, 0 } + , 0, MGMT__POOL_REBUILD_STATUS__STATE__BUSY, 0, 0, MGMT__POOL_REBUILD_STATUS__STATE__BUSY } /* diff --git a/src/mgmt/tests/srv_drpc_tests.c b/src/mgmt/tests/srv_drpc_tests.c index 5c79cab0734..aac8c4351ed 100644 --- a/src/mgmt/tests/srv_drpc_tests.c +++ b/src/mgmt/tests/srv_drpc_tests.c @@ -1,6 +1,6 @@ /* * (C) Copyright 2019-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -1406,7 +1406,7 @@ expect_query_resp_with_info(daos_pool_info_t *exp_info, } static void -test_drpc_pool_query_success(void **state) +test_drpc_pool_query_rebuild_idle_success(void **state) { Drpc__Call call = DRPC__CALL__INIT; Drpc__Response resp = DRPC__RESPONSE__INIT; @@ -1443,7 +1443,7 @@ test_drpc_pool_query_success(void **state) } static void -test_drpc_pool_query_success_rebuild_busy(void **state) +test_drpc_pool_query_rebuild_done_success(void **state) { Drpc__Call call = DRPC__CALL__INIT; Drpc__Response resp = DRPC__RESPONSE__INIT; @@ -1452,6 +1452,7 @@ test_drpc_pool_query_success_rebuild_busy(void **state) init_test_pool_info(&exp_info); init_test_rebuild_status(&exp_info.pi_rebuild_st); exp_info.pi_rebuild_st.rs_version = 1; + exp_info.pi_rebuild_st.rs_state = DRS_COMPLETED; ds_mgmt_pool_query_info_out = exp_info; ds_mgmt_pool_query_mem_bytes = 11; @@ -1459,16 +1460,14 @@ test_drpc_pool_query_success_rebuild_busy(void **state) ds_mgmt_drpc_pool_query(&call, &resp); - expect_query_resp_with_info(&exp_info, - MGMT__POOL_REBUILD_STATUS__STATE__BUSY, - &resp); + expect_query_resp_with_info(&exp_info, MGMT__POOL_REBUILD_STATUS__STATE__DONE, &resp); D_FREE(call.body.data); D_FREE(resp.body.data); } static void -test_drpc_pool_query_success_rebuild_done(void **state) +test_drpc_pool_query_rebuild_busy_success(void **state) { Drpc__Call call = DRPC__CALL__INIT; Drpc__Response resp = DRPC__RESPONSE__INIT; @@ -1477,7 +1476,6 @@ test_drpc_pool_query_success_rebuild_done(void **state) init_test_pool_info(&exp_info); init_test_rebuild_status(&exp_info.pi_rebuild_st); exp_info.pi_rebuild_st.rs_version = 1; - exp_info.pi_rebuild_st.rs_state = DRS_COMPLETED; ds_mgmt_pool_query_info_out = exp_info; ds_mgmt_pool_query_mem_bytes = 11; @@ -1485,16 +1483,76 @@ test_drpc_pool_query_success_rebuild_done(void **state) ds_mgmt_drpc_pool_query(&call, &resp); - expect_query_resp_with_info(&exp_info, - MGMT__POOL_REBUILD_STATUS__STATE__DONE, - &resp); + expect_query_resp_with_info(&exp_info, MGMT__POOL_REBUILD_STATUS__STATE__BUSY, &resp); + + D_FREE(call.body.data); + D_FREE(resp.body.data); +} + +static void +test_drpc_pool_query_rebuild_idle_err(void **state) +{ + Drpc__Call call = DRPC__CALL__INIT; + Drpc__Response resp = DRPC__RESPONSE__INIT; + daos_pool_info_t exp_info = {0}; + + init_test_pool_info(&exp_info); + exp_info.pi_rebuild_st.rs_version = 1; + exp_info.pi_rebuild_st.rs_errno = -DER_MISC; + exp_info.pi_rebuild_st.rs_state = DRS_NOT_STARTED; + + ds_mgmt_pool_query_info_out = exp_info; + ds_mgmt_pool_query_mem_bytes = 11; + /* + * rebuild results returned to us shouldn't include the number of + * objects/records if there's an error. + */ + ds_mgmt_pool_query_info_out.pi_rebuild_st.rs_obj_nr = 42; + ds_mgmt_pool_query_info_out.pi_rebuild_st.rs_rec_nr = 999; + + setup_pool_query_drpc_call(&call, TEST_UUID, 0); + + ds_mgmt_drpc_pool_query(&call, &resp); + + expect_query_resp_with_info(&exp_info, MGMT__POOL_REBUILD_STATUS__STATE__IDLE, &resp); + + D_FREE(call.body.data); + D_FREE(resp.body.data); +} + +static void +test_drpc_pool_query_rebuild_done_err(void **state) +{ + Drpc__Call call = DRPC__CALL__INIT; + Drpc__Response resp = DRPC__RESPONSE__INIT; + daos_pool_info_t exp_info = {0}; + + init_test_pool_info(&exp_info); + exp_info.pi_rebuild_st.rs_version = 1; + exp_info.pi_rebuild_st.rs_errno = -DER_MISC; + exp_info.pi_rebuild_st.rs_state = DRS_COMPLETED; + + ds_mgmt_pool_query_info_out = exp_info; + ds_mgmt_pool_query_mem_bytes = 11; + /* + * rebuild results returned to us shouldn't include the number of + * objects/records if there's an error. + */ + ds_mgmt_pool_query_info_out.pi_rebuild_st.rs_obj_nr = 42; + ds_mgmt_pool_query_info_out.pi_rebuild_st.rs_rec_nr = 999; + + setup_pool_query_drpc_call(&call, TEST_UUID, 0); + + ds_mgmt_drpc_pool_query(&call, &resp); + + expect_query_resp_with_info(&exp_info, MGMT__POOL_REBUILD_STATUS__STATE__DONE, &resp); D_FREE(call.body.data); D_FREE(resp.body.data); } static void -test_drpc_pool_query_success_rebuild_err(void **state) +test_drpc_pool_query_rebuild_busy_err(void **state) { Drpc__Call call = DRPC__CALL__INIT; Drpc__Response resp = DRPC__RESPONSE__INIT; @@ -3409,10 +3467,12 @@ main(void) REINT_TEST(test_drpc_reint_bad_uuid), QUERY_TEST(test_drpc_pool_query_bad_uuid), QUERY_TEST(test_drpc_pool_query_mgmt_svc_fails), - QUERY_TEST(test_drpc_pool_query_success), - QUERY_TEST(test_drpc_pool_query_success_rebuild_busy), - QUERY_TEST(test_drpc_pool_query_success_rebuild_done), - QUERY_TEST(test_drpc_pool_query_success_rebuild_err), + QUERY_TEST(test_drpc_pool_query_rebuild_idle_success), + QUERY_TEST(test_drpc_pool_query_rebuild_done_success), + QUERY_TEST(test_drpc_pool_query_rebuild_busy_success), + QUERY_TEST(test_drpc_pool_query_rebuild_idle_err), + QUERY_TEST(test_drpc_pool_query_rebuild_done_err), + QUERY_TEST(test_drpc_pool_query_rebuild_busy_err), QUERY_TARGETS_TEST(test_drpc_pool_query_targets_bad_uuid), QUERY_TARGETS_TEST(test_drpc_pool_query_targets_mgmt_svc_fails), QUERY_TARGETS_TEST(test_drpc_pool_query_targets_with_targets), diff --git a/src/proto/mgmt/pool.proto b/src/proto/mgmt/pool.proto index faaaf3b1f85..728fc112cbe 100644 --- a/src/proto/mgmt/pool.proto +++ b/src/proto/mgmt/pool.proto @@ -1,6 +1,6 @@ // // (C) Copyright 2019-2024 Intel Corporation. -// (C) Copyright 2025 Hewlett Packard Enterprise Development LP +// (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -204,13 +204,18 @@ message StorageUsageStats { message PoolRebuildStatus { int32 status = 1; // DAOS error code enum State { - BUSY = 0; // DRS_IN_PROGRESS - IDLE = 1; // DRS_NOT_STARTED - DONE = 2; // DRS_COMPLETED + BUSY = 0; + IDLE = 1; + DONE = 2; + STOPPING = 3; + STOPPED = 4; + FAILING = 5; + FAILED = 6; } State state = 2; uint64 objects = 3; uint64 records = 4; + State derived_state = 5; } enum PoolServiceState { diff --git a/src/tests/ftest/control/dmg_pool_query_test.py b/src/tests/ftest/control/dmg_pool_query_test.py index f2280833bdf..bbb4ba30d9d 100644 --- a/src/tests/ftest/control/dmg_pool_query_test.py +++ b/src/tests/ftest/control/dmg_pool_query_test.py @@ -1,6 +1,6 @@ """ (C) Copyright 2020-2024 Intel Corporation. - (C) Copyright 2025 Hewlett Packard Enterprise Development LP + (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -77,6 +77,7 @@ def test_pool_query_basic(self): "rebuild": { "status": self.params.get("rebuild_status", path="/run/exp_vals/rebuild/*"), "state": self.params.get("state", path="/run/exp_vals/rebuild/*"), + "derived_state": self.params.get("state", path="/run/exp_vals/rebuild/*"), "objects": self.params.get("objects", path="/run/exp_vals/rebuild/*"), "records": self.params.get("records", path="/run/exp_vals/rebuild/*"), "total_objects": self.params.get("total_objects", path="/run/exp_vals/rebuild/*") From 135aac342ae3661f98255fbd324d6e721d90b7a6 Mon Sep 17 00:00:00 2001 From: Liu Xuezhao Date: Thu, 22 Jan 2026 10:29:51 +0800 Subject: [PATCH 138/253] DAOS-18473 object: fix a bug of layout create (#17405) Signed-off-by: Xuezhao Liu --- src/object/cli_obj.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/object/cli_obj.c b/src/object/cli_obj.c index d2c309a990d..d4b474b5edf 100644 --- a/src/object/cli_obj.c +++ b/src/object/cli_obj.c @@ -1,8 +1,7 @@ /** * (C) Copyright 2016-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * (C) Copyright 2025 Google LLC - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -387,7 +386,7 @@ obj_layout_refresh(struct dc_object *obj) D_RWLOCK_WRLOCK(&obj->cob_lock); obj_layout_free(obj); - rc = obj_layout_create(obj, 0, true); + rc = obj_layout_create(obj, obj->cob_mode, true); D_RWLOCK_UNLOCK(&obj->cob_lock); return rc; From e39d8d81ac73df6d8db3de9f9cdd8c379df8aa56 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Wed, 21 Jan 2026 20:56:30 -0600 Subject: [PATCH 139/253] DAOS-18161 object: load container write stamp from VOS (#17387) After restart, load the latest epoch of writes that require aggregation from VOS instead of setting it to zero, otherwise EC aggregation has to scan the container again even it's not changed after previous aggregation. Signed-off-by: Liang Zhen Co-authored-by: Liang Zhen --- src/include/daos_srv/vos_types.h | 4 +++- src/object/srv_ec_aggregate.c | 29 +++++++++++++++++------------ src/vos/vos_container.c | 15 +++++++++++---- 3 files changed, 31 insertions(+), 17 deletions(-) diff --git a/src/include/daos_srv/vos_types.h b/src/include/daos_srv/vos_types.h index ca50a41658f..ee64bb92e1b 100644 --- a/src/include/daos_srv/vos_types.h +++ b/src/include/daos_srv/vos_types.h @@ -1,6 +1,6 @@ /** * (C) Copyright 2015-2025 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -184,6 +184,8 @@ typedef struct { daos_size_t ci_used; /** Highest (Last) aggregated epoch */ daos_epoch_t ci_hae; + /** latest epoch for writes that require aggregation */ + daos_epoch_t ci_agg_write; /** TODO */ } vos_cont_info_t; diff --git a/src/object/srv_ec_aggregate.c b/src/object/srv_ec_aggregate.c index 708f77540a8..67d3c4aaee5 100644 --- a/src/object/srv_ec_aggregate.c +++ b/src/object/srv_ec_aggregate.c @@ -2756,19 +2756,27 @@ cont_ec_aggregate_cb(struct ds_cont_child *cont, daos_epoch_range_t *epr, return rc; } - if (likely(cont->sc_ec_agg_eph_valid)) { - if (cont->sc_ec_agg_eph == 0) { - D_INFO(DF_CONT ": update cont->sc_ec_agg_eph to " DF_X64, - DP_CONT(cont->sc_pool->spc_uuid, cont->sc_uuid), - cont->sc_ec_agg_eph_boundary); - cont->sc_ec_agg_eph = cont->sc_ec_agg_eph_boundary; - } - } else { + if (!cont->sc_ec_agg_eph_valid) { D_DEBUG(DB_EPC, DF_CONT ": pause EC aggregation for sc_ec_agg_eph_boundary.\n", DP_CONT(cont->sc_pool->spc_uuid, cont->sc_uuid)); return 0; } + if (cont->sc_ec_agg_eph == 0) { + D_INFO(DF_CONT ": update cont->sc_ec_agg_eph to " DF_X64, + DP_CONT(cont->sc_pool->spc_uuid, cont->sc_uuid), + cont->sc_ec_agg_eph_boundary); + cont->sc_ec_agg_eph = cont->sc_ec_agg_eph_boundary; + } + + if (cont->sc_ec_update_timestamp == 0) { + vos_cont_info_t info; + + /* load the timestamp of the last write that can be aggregated from VOS */ + vos_cont_query(ec_agg_param->ap_cont_handle, &info); + cont->sc_ec_update_timestamp = info.ci_agg_write; + } + ec_agg_eph = cont->sc_ec_agg_eph; ec_agg_param->ap_min_unagg_eph = DAOS_EPOCH_MAX; if (flags & VOS_AGG_FL_FORCE_SCAN) { @@ -2780,10 +2788,7 @@ cont_ec_aggregate_cb(struct ds_cont_child *cont, daos_epoch_range_t *epr, ec_agg_param->ap_filter_eph = MAX(epr->epr_lo, cont->sc_ec_agg_eph); } - /* Currently cont->sc_ec_update_timestamp is in memory so this optimization won't be helpful - * when there is no container update since restart. - */ - if (ec_agg_param->ap_filter_eph != 0 && cont->sc_ec_update_timestamp != 0 && + if (ec_agg_param->ap_filter_eph != 0 && ec_agg_param->ap_filter_eph >= cont->sc_ec_update_timestamp) { D_DEBUG(DB_EPC, DF_CONT " skip EC agg " DF_U64 ">= " DF_U64 "\n", DP_CONT(cont->sc_pool_uuid, cont->sc_uuid), ec_agg_param->ap_filter_eph, diff --git a/src/vos/vos_container.c b/src/vos/vos_container.c index ef359462b68..9cb992b28be 100644 --- a/src/vos/vos_container.c +++ b/src/vos/vos_container.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2016-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -545,6 +545,8 @@ int vos_cont_query(daos_handle_t coh, vos_cont_info_t *cont_info) { struct vos_container *cont; + struct vos_cont_df *cont_df; + uint64_t feats; cont = vos_hdl2cont(coh); if (cont == NULL) { @@ -552,9 +554,14 @@ vos_cont_query(daos_handle_t coh, vos_cont_info_t *cont_info) return -DER_INVAL; } - cont_info->ci_nobjs = cont->vc_cont_df->cd_nobjs; - cont_info->ci_used = cont->vc_cont_df->cd_used; - cont_info->ci_hae = cont->vc_cont_df->cd_hae; + cont_df = cont->vc_cont_df; + memset(cont_info, 0, sizeof(*cont_info)); + cont_info->ci_nobjs = cont_df->cd_nobjs; + cont_info->ci_used = cont_df->cd_used; + cont_info->ci_hae = cont_df->cd_hae; + + feats = dbtree_feats_get(&cont_df->cd_obj_root); + vos_feats_agg_time_get(feats, &cont_info->ci_agg_write); return 0; } From 5604065b43ed59295ffd6ee5a01bcd1157d31d9e Mon Sep 17 00:00:00 2001 From: Ravindran Padmanabhan Date: Thu, 22 Jan 2026 13:34:20 -0500 Subject: [PATCH 140/253] DAOS-17917 test: Make aggregation/punching test work on MDonSSD environment. (#17414) Enhance aggregation/punching.py test to verify container space reclamation with container deletion and data removal on a MD on SSD phase 2 pool. Signed-off-by: rpadma2 --- src/tests/ftest/aggregation/punching.yaml | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/tests/ftest/aggregation/punching.yaml b/src/tests/ftest/aggregation/punching.yaml index b62953919d4..a0e5e11bef6 100644 --- a/src/tests/ftest/aggregation/punching.yaml +++ b/src/tests/ftest/aggregation/punching.yaml @@ -1,3 +1,6 @@ +launch: + !filter-only : /run/pool/default # yamllint disable-line rule:colons + hosts: test_servers: 2 test_clients: 2 @@ -9,10 +12,12 @@ server_config: 0: log_mask: INFO storage: auto -pool: - scm_size: 8000000000 - nvme_size: 80000000000 - svcn: 1 +pool: !mux + default: + size: 100% + md_on_ssd_p2: + size: 100% + mem_ratio: 25 container: type: POSIX control_method: daos From efdfead43451881843858407d0728f4e7d4106a1 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 23 Jan 2026 08:48:31 -0800 Subject: [PATCH 141/253] DAOS-18504 cq: update yamllint to 1.38.0 (#17398) Updates `yamllint` from 1.37.1 to 1.38.0 Signed-off-by: dependabot[bot] --- utils/cq/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/cq/requirements.txt b/utils/cq/requirements.txt index f98e5c16946..079145ab4f3 100644 --- a/utils/cq/requirements.txt +++ b/utils/cq/requirements.txt @@ -5,7 +5,7 @@ pyenchant flake8==7.3.0 isort==7.0.0 pylint==4.0.4 -yamllint==1.37.1 +yamllint==1.38.0 codespell==2.4.1 # Used by ci/jira_query.py which pip installs it standalone. jira From a077e0b9a0fab2ae5fc0d7cc4b5a5d02a3aeb91c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 23 Jan 2026 08:49:47 -0800 Subject: [PATCH 142/253] DAOS-18504 cq: bump codeql-action from 4.31.9 to 4.31.10 (#17399) Updates `github/codeql-action` from 4.31.9 to 4.31.10 Signed-off-by: dependabot[bot] --- .github/workflows/ossf-scorecard.yml | 2 +- .github/workflows/trivy.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ossf-scorecard.yml b/.github/workflows/ossf-scorecard.yml index 78aad4c6682..2151192fcd1 100644 --- a/.github/workflows/ossf-scorecard.yml +++ b/.github/workflows/ossf-scorecard.yml @@ -71,6 +71,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard (optional). # Commenting out will disable upload of results to your repo's Code Scanning dashboard - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@5d4e8d1aca955e8d8589aabd499c5cae939e33c7 # v4.31.9 + uses: github/codeql-action/upload-sarif@cdefb33c0f6224e58673d9004f47f7cb3e328b89 # v4.31.10 with: sarif_file: results.sarif diff --git a/.github/workflows/trivy.yml b/.github/workflows/trivy.yml index a3b1528788f..0e5937aac62 100644 --- a/.github/workflows/trivy.yml +++ b/.github/workflows/trivy.yml @@ -68,7 +68,7 @@ jobs: trivy-config: 'utils/trivy/trivy.yaml' - name: Upload Trivy scan results to GitHub Security tab - uses: github/codeql-action/upload-sarif@5d4e8d1aca955e8d8589aabd499c5cae939e33c7 # v4.31.9 + uses: github/codeql-action/upload-sarif@cdefb33c0f6224e58673d9004f47f7cb3e328b89 # v4.31.10 with: sarif_file: 'trivy-results.sarif' From 2faecd2bd819e6f5c07ef3fe78f0871c4f3df58a Mon Sep 17 00:00:00 2001 From: Li Wei Date: Sat, 24 Jan 2026 01:53:51 +0900 Subject: [PATCH 143/253] DAOS-18478 tests: Fix daos_test POOL13 (#17401) daos_test POOL13 should restore the original value of pool property self_heal, or later tests may be affected. Signed-off-by: Li Wei --- src/tests/suite/daos_pool.c | 8 +++++++- src/tests/suite/daos_test.h | 4 +++- src/tests/suite/daos_test_common.c | 8 +++++++- 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/src/tests/suite/daos_pool.c b/src/tests/suite/daos_pool.c index a15b40bdda8..0286a467241 100644 --- a/src/tests/suite/daos_pool.c +++ b/src/tests/suite/daos_pool.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2016-2023 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -734,11 +734,17 @@ pool_op_retry(void **state) print_message("success\n"); /* pool set prop success committed, "lost" reply - duplicate RPC retry */ + char *orig_self_heal = NULL; + rc = daos_pool_get_prop(arg->pool.pool_uuid, "self_heal", &orig_self_heal); + assert_rc_equal(rc, 0); test_set_engine_fail_loc(arg, leader_rank, DAOS_MD_OP_PASS_NOREPLY | DAOS_FAIL_ONCE); print_message("set pool prop (retry / dup rpc detection)... "); rc = daos_pool_set_prop(arg->pool.pool_uuid, "self_heal", "none"); assert_rc_equal(rc, 0); print_message("success\n"); + rc = daos_pool_set_prop(arg->pool.pool_uuid, "self_heal", orig_self_heal); + assert_rc_equal(rc, 0); + free(orig_self_heal); /* pool evict success committed, "lost" reply - duplicate RPC retry */ test_set_engine_fail_loc(arg, leader_rank, DAOS_MD_OP_PASS_NOREPLY | DAOS_FAIL_ONCE); diff --git a/src/tests/suite/daos_test.h b/src/tests/suite/daos_test.h index dd76c844992..002c54e9473 100644 --- a/src/tests/suite/daos_test.h +++ b/src/tests/suite/daos_test.h @@ -1,6 +1,6 @@ /** * (C) Copyright 2016-2023 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -428,6 +428,8 @@ void test_rebuild_wait_to_error(test_arg_t **args, int args_cnt); int daos_pool_set_prop(const uuid_t pool_uuid, const char *name, const char *value); +int + daos_pool_get_prop(const uuid_t pool_uuid, const char *name, char **value_out); int daos_pool_upgrade(const uuid_t pool_uuid); int ec_data_nr_get(daos_obj_id_t oid); diff --git a/src/tests/suite/daos_test_common.c b/src/tests/suite/daos_test_common.c index aa3fd327c08..7f823126552 100644 --- a/src/tests/suite/daos_test_common.c +++ b/src/tests/suite/daos_test_common.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2018-2023 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -1121,6 +1121,12 @@ daos_pool_set_prop(const uuid_t pool_uuid, const char *name, return dmg_pool_set_prop(dmg_config_file, name, value, pool_uuid); } +int +daos_pool_get_prop(const uuid_t pool_uuid, const char *name, char **value_out) +{ + return dmg_pool_get_prop(dmg_config_file, NULL, pool_uuid, name, value_out); +} + void daos_start_server(test_arg_t *arg, const uuid_t pool_uuid, const char *grp, d_rank_list_t *svc, d_rank_t rank) From 006a80961902a8405cf085ca1f9d095f59e807a4 Mon Sep 17 00:00:00 2001 From: Jan Michalski Date: Fri, 23 Jan 2026 16:57:19 +0000 Subject: [PATCH 144/253] DAOS-18324 common: create files as 0660 instead of 0600 (#17330) Allow users who are part of the daos_server group to access created files. It would allow run dlck/ddb as a normal user instead of as root which is a bad a practice and sometimes is impossible. + drop the mode argument when `open()` is called without `O_CREAT`. In this case mode is ignored and providing a value is nothing but confusing. Signed-off-by: Jan Michalski --- src/bio/bio_xstream.c | 2 +- src/common/ad_mem.c | 3 ++- src/include/daos/mem.h | 2 ++ src/mgmt/mgmt_common.c | 4 ++-- src/vos/vos_common.c | 2 +- src/vos/vos_pool.c | 5 +++-- 6 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/bio/bio_xstream.c b/src/bio/bio_xstream.c index e72223be2fd..f3200031423 100644 --- a/src/bio/bio_xstream.c +++ b/src/bio/bio_xstream.c @@ -301,7 +301,7 @@ bio_nvme_init_ext(const char *nvme_conf, int numa_node, unsigned int mem_size, } if (nvme_conf && strlen(nvme_conf) > 0) { - fd = open(nvme_conf, O_RDONLY, 0600); + fd = open(nvme_conf, O_RDONLY); if (fd < 0) D_WARN("Open %s failed, skip DAOS NVMe setup "DF_RC"\n", nvme_conf, DP_RC(daos_errno2der(errno))); diff --git a/src/common/ad_mem.c b/src/common/ad_mem.c index 03b3120344a..fd069273fd0 100644 --- a/src/common/ad_mem.c +++ b/src/common/ad_mem.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2022-2023 Intel Corporation. + * (C) Copyright 2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -608,7 +609,7 @@ blob_file_open(struct ad_blob *blob, const char *path, size_t *size, bool create int flags = O_RDWR; while (1) { - fd = open(path, flags, 0600); + fd = open(path, flags, UMEM_FILE_MODE_DEFAULT); if (fd >= 0) break; diff --git a/src/include/daos/mem.h b/src/include/daos/mem.h index 1b08f4ac3d0..d451f26704d 100644 --- a/src/include/daos/mem.h +++ b/src/include/daos/mem.h @@ -38,6 +38,8 @@ umempobj_pgsz(int backend); /* umem persistent object property flags */ #define UMEMPOBJ_ENABLE_STATS 0x1 +#define UMEM_FILE_MODE_DEFAULT 0660 + #ifdef DAOS_PMEM_BUILD /* The backend type is stored in meta blob header, don't change the value */ diff --git a/src/mgmt/mgmt_common.c b/src/mgmt/mgmt_common.c index a58cb4ecec7..70ffcc46776 100644 --- a/src/mgmt/mgmt_common.c +++ b/src/mgmt/mgmt_common.c @@ -149,7 +149,7 @@ ds_mgmt_tgt_recreate(uuid_t pool_uuid, daos_size_t scm_size, int tgt_nr, int *tg rc = -DER_NONEXIST; goto out; } - fd = open(rdb_path, O_RDWR | O_CREAT, 0600); + fd = open(rdb_path, O_RDWR | O_CREAT, UMEM_FILE_MODE_DEFAULT); if (fd < 0) { rc = daos_errno2der(errno); D_ERROR("failed to create/open the vos file %s:" DF_RC "\n", rdb_path, @@ -200,7 +200,7 @@ ds_mgmt_tgt_preallocate(uuid_t uuid, daos_size_t scm_size, int tgt_id, const cha D_DEBUG(DB_MGMT, DF_UUID ": creating vos file %s (%ld bytes)\n", DP_UUID(uuid), path, scm_size); - fd = open(path, O_CREAT | O_RDWR, 0600); + fd = open(path, O_CREAT | O_RDWR, UMEM_FILE_MODE_DEFAULT); if (fd < 0) { rc = daos_errno2der(errno); D_ERROR(DF_UUID ": failed to create vos file %s: " DF_RC "\n", DP_UUID(uuid), path, diff --git a/src/vos/vos_common.c b/src/vos/vos_common.c index e5fe50dac97..642621ebd08 100644 --- a/src/vos/vos_common.c +++ b/src/vos/vos_common.c @@ -991,7 +991,7 @@ vos_self_nvme_init(const char *vos_path, bool init_spdk) goto out; /* Only use hugepages if NVME SSD configuration existed. */ - fd = open(nvme_conf, O_RDONLY, 0600); + fd = open(nvme_conf, O_RDONLY); if (fd < 0) { rc = bio_nvme_init_ext(NULL, VOS_NVME_NUMA_NODE, 0, 0, VOS_NVME_NR_TARGET, true, init_spdk); diff --git a/src/vos/vos_pool.c b/src/vos/vos_pool.c index 96d516b32dd..7d3f95142da 100644 --- a/src/vos/vos_pool.c +++ b/src/vos/vos_pool.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2016-2025 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -996,7 +996,8 @@ vos_pmemobj_create(const char *path, uuid_t pool_id, const char *layout, umem_create: D_DEBUG(DB_MGMT, "umempobj_create sz: " DF_U64 " store_sz: " DF_U64, scm_sz, store.stor_size); - pop = umempobj_create(path, layout, UMEMPOBJ_ENABLE_STATS, scm_sz, 0600, &store); + pop = umempobj_create(path, layout, UMEMPOBJ_ENABLE_STATS, scm_sz, UMEM_FILE_MODE_DEFAULT, + &store); if (pop != NULL) { *ph = pop; return 0; From a78d3638d2af22dde7d95867a2db14e6fb1d5965 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Sat, 24 Jan 2026 13:28:11 -0600 Subject: [PATCH 145/253] DAOS-18502 test: update NLT to work on EL9.7 (#17438) NLT uses commands such cp, cat, etc to trigger function for ioil to intercept. In later distribution though, the implementation for such commands changes to use other lower level linux calls. Update the NLT test to use dd which should reliable use read/write calls that IOIL intercepts. __fxstat sounds not used anymore, so we cannot rely on intercepting that. disable that for now in test checking to have tests pass. Signed-off-by: Mohamad Chaarawi --- utils/node_local_test.py | 45 +++++++++++++++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 5 deletions(-) diff --git a/utils/node_local_test.py b/utils/node_local_test.py index 8b6bffa0056..9896196427e 100755 --- a/utils/node_local_test.py +++ b/utils/node_local_test.py @@ -2,7 +2,7 @@ """Node local test (NLT). (C) Copyright 2020-2024 Intel Corporation. -(C) Copyright 2025 Hewlett Packard Enterprise Development LP +(C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP (C) Copyright 2025 Google LLC (C) Copyright 2025 Enakta Labs Ltd @@ -2847,14 +2847,40 @@ def test_il(self): with open(file, 'w') as fd: fd.write('Hello') # Copy it across containers. - self.dfuse.il_cmd(['cp', file, sub_cont_dir]) + dst = join(sub_cont_dir, 'file') + self.dfuse.il_cmd([ + 'dd', + f'if={file}', + f'of={dst}', + 'bs=4096', + 'iflag=fullblock', + 'status=none' + ], check_fstat=False) # Copy it within the container. child_dir = join(self.dfuse.dir, 'new_dir') os.mkdir(child_dir) - self.dfuse.il_cmd(['cp', file, child_dir]) + dst = join(child_dir, 'file') + + self.dfuse.il_cmd([ + 'dd', + f'if={file}', + f'of={dst}', + 'bs=128K', + 'status=none' + ], check_fstat=False) + # Copy something into a container - self.dfuse.il_cmd(['cp', '/bin/bash', sub_cont_dir], check_read=False) + dst = join(sub_cont_dir, 'bash') + + self.dfuse.il_cmd([ + 'dd', + 'if=/bin/bash', + f'of={dst}', + 'bs=128K', + 'status=none' + ], check_read=False, check_fstat=False) + # Read it from within a container self.dfuse.il_cmd(['md5sum', join(sub_cont_dir, 'bash')], check_read=False, check_write=False, check_fstat=False) @@ -5018,7 +5044,16 @@ def create_and_read_via_il(dfuse, path): ofd.flush() assert_file_size(ofd, 12) print(os.fstat(ofd.fileno())) - dfuse.il_cmd(['cat', fname], check_write=False) + + # Replace Python snippet with dd to guarantee read() + dfuse.il_cmd([ + 'dd', + f'if={fname}', + 'of=/tmp/dd_sink', + 'bs=4096', + 'iflag=fullblock', + 'status=none' + ], check_write=False, check_fstat=False) def run_container_query(conf, path): From d5ef32664ac681006e97ee182df49704d115553f Mon Sep 17 00:00:00 2001 From: Makito Kano Date: Mon, 26 Jan 2026 14:44:08 +0900 Subject: [PATCH 146/253] =?UTF-8?q?DAOS-18505=20test:=20pool=5Fmembership.?= =?UTF-8?q?py=20test=5Fdangling=5Frank=5Fentry=20-=20Skip=20f=E2=80=A6=20(?= =?UTF-8?q?#17443)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * DAOS-18505 test: pool_membership.py test_dangling_rank_entry - Skip for MD-on-SSD Currently, the test doesn't support MD-on-SSD, so skip for now. Signed-off-by: Makito Kano --- src/tests/ftest/recovery/pool_membership.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/tests/ftest/recovery/pool_membership.py b/src/tests/ftest/recovery/pool_membership.py index 67b5dff96bc..22385866e10 100644 --- a/src/tests/ftest/recovery/pool_membership.py +++ b/src/tests/ftest/recovery/pool_membership.py @@ -1,6 +1,6 @@ """ (C) Copyright 2024 Intel Corporation. - (C) Copyright 2025 Hewlett Packard Enterprise Development LP + (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -315,6 +315,11 @@ def test_dangling_rank_entry(self): :avocado: tags=recovery,cat_recov,pool_membership :avocado: tags=PoolMembershipTest,test_dangling_rank_entry """ + if self.server_managers[0].manager.job.using_control_metadata: + self.log.info("MD-on-SSD cluster. Will be supported later.") + # return results in PASS. + return + targets = self.params.get("targets", "/run/server_config/engines/0/*") exp_msg = "dangling rank entry" From 4f07df48e5cf1c1e01ca9b4decac6bfcddc572ac Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Mon, 26 Jan 2026 13:18:03 -0600 Subject: [PATCH 147/253] DAOS-18502 test: fix another NLT test for IL (#17451) Signed-off-by: Mohamad Chaarawi --- utils/node_local_test.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/utils/node_local_test.py b/utils/node_local_test.py index 9896196427e..894bbdc3761 100755 --- a/utils/node_local_test.py +++ b/utils/node_local_test.py @@ -2829,7 +2829,14 @@ def test_il_cat(self): with open(fname, 'w'): pass - self.dfuse.il_cmd(['cat', fname], check_write=False) + self.dfuse.il_cmd([ + 'dd', + f'if={fname}', + 'of=/dev/null', + 'bs=4096', + 'iflag=fullblock', + 'status=none' + ], check_write=False, check_fstat=False) @needs_dfuse_with_opt(caching_variants=[False]) def test_il(self): @@ -5049,7 +5056,7 @@ def create_and_read_via_il(dfuse, path): dfuse.il_cmd([ 'dd', f'if={fname}', - 'of=/tmp/dd_sink', + 'of=/dev/null', 'bs=4096', 'iflag=fullblock', 'status=none' From 5a7d21cf2babae79ad69255d40eca1f6518dcfb2 Mon Sep 17 00:00:00 2001 From: Kris Jacque Date: Mon, 26 Jan 2026 17:19:24 -0700 Subject: [PATCH 148/253] DAOS-18382 control: Make ddb vos_path optional (#17359) For ddb, vos_path was previously a required positional parameter even for commands that didn't use it. With this change, --vos_path is a named parameter to be used only with commands that require it. Signed-off-by: Kris Jacque --- src/control/cmd/ddb/main.go | 38 +++++++++++++++---------------- src/tests/ftest/util/ddb_utils.py | 4 ++-- 2 files changed, 20 insertions(+), 22 deletions(-) diff --git a/src/control/cmd/ddb/main.go b/src/control/cmd/ddb/main.go index e608cf6d20c..b328821fda4 100644 --- a/src/control/cmd/ddb/main.go +++ b/src/control/cmd/ddb/main.go @@ -43,15 +43,15 @@ func exitWithError(log logging.Logger, err error) { } type cliOptions struct { - Debug bool `long:"debug" description:"enable debug output"` - WriteMode bool `long:"write_mode" short:"w" description:"Open the vos file in write mode."` - CmdFile string `long:"cmd_file" short:"f" description:"Path to a file containing a sequence of ddb commands to execute."` - SysdbPath string `long:"db_path" short:"p" description:"Path to the sys db."` - Version bool `short:"v" long:"version" description:"Show version"` + Debug bool `long:"debug" description:"enable debug output"` + WriteMode bool `long:"write_mode" short:"w" description:"Open the vos file in write mode."` + CmdFile string `long:"cmd_file" short:"f" description:"Path to a file containing a sequence of ddb commands to execute."` + SysdbPath string `long:"db_path" short:"p" description:"Path to the sys db."` + VosPath vosPathStr `long:"vos_path" short:"s" description:"Path to the VOS file to open."` + Version bool `short:"v" long:"version" description:"Show version"` Args struct { - VosPath vosPathStr `positional-arg-name:"vos_file_path"` - RunCmd ddbCmdStr `positional-arg-name:"ddb_command"` - RunCmdArgs []string `positional-arg-name:"ddb_command_args"` + RunCmd ddbCmdStr `positional-arg-name:"ddb_command"` + RunCmdArgs []string `positional-arg-name:"ddb_command_args"` } `positional-args:"yes"` } @@ -229,18 +229,16 @@ func parseOpts(args []string, opts *cliOptions, log *logging.LeveledLogger) erro p.Name = "ddb" p.Usage = "[OPTIONS]" p.ShortDescription = "daos debug tool" - p.LongDescription = `The DAOS Debug Tool (ddb) allows a user to navigate through and modify + p.LongDescription = ` +The DAOS Debug Tool (ddb) allows a user to navigate through and modify a file in the VOS format. It offers both a command line and interactive shell mode. If neither a single command or '-f' option is provided, then the tool will run in interactive mode. In order to modify the VOS file, the '-w' option must be included. -If the command requires it, the VOS file provided as the first positional -parameter will be opened before any commands are executed. See the -command‑specific help for details. When the VOS file is not required, it is -ignored; however, it must still be supplied, and it may be empty (""), e.g. - -ddb "" ls --help +If the command requires it, the VOS file must be provided with the parameter +--vos-path. The VOS file will be opened before any commands are executed. See +the command‑specific help for details. ` // Set the traceback level such that a crash results in @@ -281,8 +279,8 @@ ddb "" ls --help defer C.free(unsafe.Pointer(ctx.ctx.dc_db_path)) } - if opts.Args.VosPath != "" { - ctx.ctx.dc_pool_path = C.CString(string(opts.Args.VosPath)) + if opts.VosPath != "" { + ctx.ctx.dc_pool_path = C.CString(string(opts.VosPath)) defer C.free(unsafe.Pointer(ctx.ctx.dc_pool_path)) if !strings.HasPrefix(string(opts.Args.RunCmd), "feature") && @@ -293,9 +291,9 @@ ddb "" ls --help !strings.HasPrefix(string(opts.Args.RunCmd), "rm_pool") && !strings.HasPrefix(string(opts.Args.RunCmd), "dev_list") && !strings.HasPrefix(string(opts.Args.RunCmd), "dev_replace") { - log.Debugf("Connect to path: %s\n", opts.Args.VosPath) - if err := ddbOpen(ctx, string(opts.Args.VosPath), bool(opts.WriteMode)); err != nil { - return errors.Wrapf(err, "Error opening path: %s", opts.Args.VosPath) + log.Debugf("Connect to path: %s\n", opts.VosPath) + if err := ddbOpen(ctx, string(opts.VosPath), bool(opts.WriteMode)); err != nil { + return errors.Wrapf(err, "Error opening path: %s", opts.VosPath) } } } diff --git a/src/tests/ftest/util/ddb_utils.py b/src/tests/ftest/util/ddb_utils.py index 53f9601653e..da6e122592e 100644 --- a/src/tests/ftest/util/ddb_utils.py +++ b/src/tests/ftest/util/ddb_utils.py @@ -1,6 +1,6 @@ """ (C) Copyright 2022 Intel Corporation. - (C) Copyright 2025 Hewlett Packard Enterprise Development LP + (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -38,7 +38,7 @@ def __init__(self, server_host, path, verbose=True, timeout=None, sudo=True): self.single_command = BasicParameter(None, position=2) # VOS file path. - self.vos_path = BasicParameter(None, position=1) + self.vos_path = FormattedParameter("--vos_path {}", position=1) # Members needed for run(). self.verbose = verbose From 6d39324270d87633dca2c756a329a9c047b7c52f Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Tue, 27 Jan 2026 09:10:15 -0500 Subject: [PATCH 149/253] DAOS-17916 test: Verify page eviction on MD on SSD (#17172) Adding a test to verify page eviction on a MD on SSD phase 2 pool. Signed-off-by: Phil Henderson --- src/tests/ftest/pool/eviction_metrics.py | 124 +++++++++++ src/tests/ftest/pool/eviction_metrics.yaml | 42 ++++ src/tests/ftest/util/mdtest_utils.py | 241 ++++++++++++++++++++- src/tests/ftest/util/telemetry_utils.py | 7 + src/vos/vos_internal.h | 3 +- 5 files changed, 413 insertions(+), 4 deletions(-) create mode 100644 src/tests/ftest/pool/eviction_metrics.py create mode 100644 src/tests/ftest/pool/eviction_metrics.yaml diff --git a/src/tests/ftest/pool/eviction_metrics.py b/src/tests/ftest/pool/eviction_metrics.py new file mode 100644 index 00000000000..8318f290d35 --- /dev/null +++ b/src/tests/ftest/pool/eviction_metrics.py @@ -0,0 +1,124 @@ +""" + (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP + + SPDX-License-Identifier: BSD-2-Clause-Patent +""" +import json +import math + +from job_manager_utils import get_job_manager +from mdtest_utils import MDTEST_NAMESPACE, run_mdtest +from telemetry_test_base import TestWithTelemetry + + +class EvictionMetrics(TestWithTelemetry): + """ + Tests DAOS client eviction from a pool that the client is using. + + :avocado: recursive + """ + + def test_eviction_metrics(self): + """Verify page eviction on the pool + + 1. Create a pool with a mem ratio of 100% (for pmem or phase 1) or 25% (for phase 2) + 2. Collect a baseline for the pool eviction metrics + 3. Run mdtest -a DFS to generate many small files larger than mem size + 4. Collect new page eviction metrics + 5. Verify page eviction + + :avocado: tags=all,daily_regression + :avocado: tags=hw,medium + :avocado: tags=pool + :avocado: tags=EvictionMetrics,test_eviction_metrics + """ + write_bytes = self.params.get('write_bytes', MDTEST_NAMESPACE, None) + processes = self.params.get('processes', MDTEST_NAMESPACE, None) + ppn = self.params.get('ppn', MDTEST_NAMESPACE, None) + + evict_metrics = list(self.telemetry.ENGINE_POOL_VOS_CACHE_METRICS) + + self.log_step('Creating a pool (dmg pool create)') + pool = self.get_pool(connect=False) + try: + _result = json.loads(pool.dmg.result.stdout) + tier_bytes_scm = int(_result["response"]["tier_bytes"][0]) + mem_file_bytes = int(_result["response"]["mem_file_bytes"]) + except Exception as error: # pylint: disable=broad-except + self.fail(f"Error extracting data for dmg pool create output: {error}") + + # Calculate the mdtest files_per_process based upon the scm size and other mdtest params + _write_processes = processes + if ppn is not None: + _write_processes = ppn * len(self.host_info.clients.hosts) + files_per_process = math.floor(mem_file_bytes / (write_bytes * _write_processes)) + if tier_bytes_scm > mem_file_bytes: + # Write more (110%) files to exceed mem_file_bytes and cause eviction + mdtest_params = {"num_of_files_dirs": math.ceil(files_per_process * 1.10)} + else: + # Write less (30%) files to avoid out of space errors + mdtest_params = {"num_of_files_dirs": math.floor(files_per_process * 0.30)} + + self.log.debug("-" * 60) + self.log.debug("Pool %s create data:", pool) + self.log.debug(" tier_bytes_scm: %s", tier_bytes_scm) + self.log.debug(" mem_file_bytes: %s", mem_file_bytes) + self.log.debug(" mem_ratio.value: %s", pool.mem_ratio.value) + self.log.debug("Mdtest write parameters:") + self.log.debug(" write_bytes: %s", write_bytes) + if ppn is not None: + self.log.debug(" ppn / nodes: %s / %s", ppn, len(self.host_info.clients.hosts)) + else: + self.log.debug(" processes: %s", processes) + self.log.debug(" files_per_process: %s", files_per_process) + self.log.debug(" num_of_files_dirs: %s", mdtest_params["num_of_files_dirs"]) + self.log.debug(" expected to write: %s", + _write_processes * write_bytes * mdtest_params["num_of_files_dirs"]) + self.log.debug("-" * 60) + + self.log_step('Creating a container (dmg container create)') + container = self.get_container(pool) + + self.log_step( + 'Collect pool eviction metrics after creating a pool (dmg telemetry metrics query)') + expected_ranges = self.telemetry.collect_data(evict_metrics) + for metric in expected_ranges: + for label in expected_ranges[metric]: + if pool.mem_ratio.value is not None and metric.endswith('_hit'): + expected_ranges[metric][label] = [0, 100] # 0-100 (phase 2) + elif pool.mem_ratio.value is not None and metric.endswith('_miss'): + expected_ranges[metric][label] = [0, 5] # 0-5 (phase 2) + elif pool.mem_ratio.value is not None and metric.endswith('_ne'): + expected_ranges[metric][label] = [0, 5] # 0-5 (phase 2) + else: + expected_ranges[metric][label] = [0, 0] # 0 only + self.log.debug("%s expected_ranges: %s", pool, expected_ranges) + + self.log_step('Verify pool eviction metrics after pool creation') + if not self.telemetry.verify_data(expected_ranges): + self.fail('Pool eviction metrics verification failed after pool creation') + + self.log_step('Writing data to the pool (mdtest -a DFS)') + manager = get_job_manager(self, subprocess=False, timeout=None) + run_mdtest( + self, self.hostlist_clients, self.workdir, None, container, processes, ppn, manager, + mdtest_params=mdtest_params) + + self.log_step( + 'Collect pool eviction metrics after writing data (dmg telemetry metrics query)') + expected_ranges = self.telemetry.collect_data(evict_metrics) + for metric in expected_ranges: + for label in expected_ranges[metric]: + if pool.mem_ratio.value is None: + expected_ranges[metric][label] = [0, 0] # 0 only (phase 1) + elif metric.endswith('_page_flush'): + expected_ranges[metric][label] = [0] # 0 or greater (phase 2) + else: + expected_ranges[metric][label] = [1] # 1 or greater (phase 2) + self.log.debug("%s expected_ranges: %s", pool, expected_ranges) + + self.log_step('Verify pool eviction metrics after writing data') + if not self.telemetry.verify_data(expected_ranges): + self.fail('Pool eviction metrics verification failed after writing data') + + self.log_step('Test passed') diff --git a/src/tests/ftest/pool/eviction_metrics.yaml b/src/tests/ftest/pool/eviction_metrics.yaml new file mode 100644 index 00000000000..f9026a02b9c --- /dev/null +++ b/src/tests/ftest/pool/eviction_metrics.yaml @@ -0,0 +1,42 @@ +launch: + !filter-only : /run/pool/default # yamllint disable-line rule:colons + +hosts: + test_servers: 1 + test_clients: 3 + +timeout: 120 + +server_config: + name: daos_server + engines_per_host: 1 + engines: + 0: + targets: 4 + nr_xs_helpers: 0 + storage: auto + +pool: !mux + default: + size: 10G + md_on_ssd_p2: + size: 10G + mem_ratio: 25 + +container: + type: POSIX + oclass: S1 + dir_oclass: SX + +mdtest: + dfs_oclass: S1 + dfs_dir_oclass: SX + dfs_destroy: False + manager: "MPICH" + ppn: 32 + test_dir: "/" + api: DFS + flags: "-C -F -G 27 -N 1 -Y -u -L" + branching_factor: 1 + write_bytes: 3072 + read_bytes: 3072 diff --git a/src/tests/ftest/util/mdtest_utils.py b/src/tests/ftest/util/mdtest_utils.py index 97e5d75d088..0b5654ece43 100644 --- a/src/tests/ftest/util/mdtest_utils.py +++ b/src/tests/ftest/util/mdtest_utils.py @@ -1,5 +1,6 @@ """ (C) Copyright 2019-2024 Intel Corporation. + (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -8,20 +9,115 @@ import re from command_utils import ExecutableCommand -from command_utils_base import FormattedParameter, LogParameter +from command_utils_base import BasicParameter, FormattedParameter, LogParameter +from exception_utils import CommandFailure from general_utils import get_log_file +from job_manager_utils import get_job_manager + +MDTEST_NAMESPACE = "/run/mdtest/*" + + +def get_mdtest(test, hosts, manager=None, path=None, slots=None, namespace=MDTEST_NAMESPACE, + mdtest_params=None): + """Get a Mdtest object. + + Args: + test (Test): avocado Test object + hosts (NodeSet): hosts on which to run the mdtest command + manager (JobManager, optional): command to manage the multi-host execution of mdtest. + Defaults to None, which will get a default job manager. + path (str, optional): hostfile path. Defaults to None. + slots (int, optional): hostfile number of slots per host. Defaults to None. + namespace (str, optional): path to yaml parameters. Defaults to MDTEST_NAMESPACE. + mdtest_params (dict, optional): parameters to update the mdtest command. Defaults to None. + + Returns: + Mdtest: the Mdtest object requested + """ + mdtest = Mdtest(test, hosts, manager, path, slots, namespace) + if mdtest_params: + for name, value in mdtest_params.items(): + mdtest.update(name, value) + return mdtest + + +def run_mdtest(test, hosts, path, slots, container, processes, ppn=None, manager=None, + log_file=None, intercept=None, display_space=True, namespace=MDTEST_NAMESPACE, + mdtest_params=None): + # pylint: disable=too-many-arguments + """Run Mdtest on multiple hosts. + + Args: + test (Test): avocado Test object + hosts (NodeSet): hosts on which to run the mdtest command + path (str): hostfile path. + slots (int): hostfile number of slots per host. + container (TestContainer): DAOS test container object. + processes (int): number of processes to run + ppn (int, optional): number of processes per node to run. If specified it will override + the processes input. Defaults to None. + manager (JobManager, optional): command to manage the multi-host execution of mdtest. + Defaults to None, which will get a default job manager. + log_file (str, optional): log file name. Defaults to None, which will result in a log file + name containing the test, pool, and container IDs. + intercept (str, optional): path to interception library. Defaults to None. + display_space (bool, optional): Whether to display the pool space. Defaults to True. + namespace (str, optional): path to yaml parameters. Defaults to MDTEST_NAMESPACE. + mdtest_params (dict, optional): dictionary of MdtestCommand attributes to override from + get_params(). Defaults to None. + + Raises: + CommandFailure: if there is an error running the mdtest command + + Returns: + CmdResult: result of the ior command + + """ + mdtest = get_mdtest(test, hosts, manager, path, slots, namespace, mdtest_params) + if log_file is None: + log_file = mdtest.get_unique_log(container) + mdtest.update_log_file(log_file) + return mdtest.run(container, processes, ppn, intercept, display_space) + + +def write_mdtest_data(test, container, namespace=MDTEST_NAMESPACE, **mdtest_run_params): + """Write data to the container using mdtest. + + Simple method for test classes to use to write data with mdtest. While not required, this is + setup by default to pull in mdtest parameters from the test yaml. + + Args: + test (Test): avocado Test object + container (TestContainer): the container to populate + namespace (str, optional): path to mdtest yaml parameters. Defaults to MDTEST_NAMESPACE. + mdtest_run_params (dict): optional params for the Mdtest.run() command. + + Returns: + Mdtest: the Mdtest object used to populate the container + """ + mdtest = get_mdtest(test, test.hostlist_clients, None, test.workdir, None, namespace) + mdtest.update_log_file(mdtest.get_unique_log(container)) + + if 'processes' not in mdtest_run_params: + mdtest_run_params['processes'] = test.params.get('processes', namespace, None) + elif 'ppn' not in mdtest_run_params: + mdtest_run_params['ppn'] = test.params.get('ppn', namespace, None) + + mdtest.run(container, **mdtest_run_params) + return mdtest class MdtestCommand(ExecutableCommand): """Defines a object representing a mdtest command.""" - def __init__(self, log_dir): + def __init__(self, log_dir, namespace="/run/mdtest/*"): """Create an MdtestCommand object. Args: log_dir (str): directory in which to put log files + namespace (str, optional): path to yaml parameters. Defaults to "/run/mdtest/*". """ - super().__init__("/run/mdtest/*", "mdtest") + super().__init__(namespace, "mdtest") self._log_dir = log_dir @@ -137,6 +233,145 @@ def get_default_env(self, manager_cmd, log_file=None): return env +class Mdtest: + """Defines a class that runs the mdtest command through a job manager, e.g. mpirun.""" + + def __init__(self, test, hosts, manager=None, path=None, slots=None, + namespace=MDTEST_NAMESPACE): + """Initialize an Mdtest object. + + Args: + test (Test): avocado Test object + hosts (NodeSet): hosts on which to run the mdtest command + manager (JobManager, optional): command to manage the multi-host execution of mdtest. + Defaults to None, which will get a default job manager. + path (str, optional): hostfile path. Defaults to None. + slots (int, optional): hostfile number of slots per host. Defaults to None. + namespace (str, optional): path to yaml parameters. Defaults to MDTEST_NAMESPACE. + """ + if manager is None: + manager = get_job_manager(test, subprocess=False, timeout=60) + self.manager = manager + self.manager.assign_hosts(hosts, path, slots) + self.manager.job = MdtestCommand(test.test_env.log_dir, namespace) + self.manager.job.get_params(test) + self.manager.output_check = "both" + self.timeout = test.params.get("timeout", namespace, None) + self.label_generator = test.label_generator + self.test_id = test.test_id + self.env = self.command.get_default_env(str(self.manager)) + + @property + def command(self): + """Get the MdtestCommand object. + + Returns: + MdtestCommand: the MdtestCommand object managed by the JobManager + + """ + return self.manager.job + + def update(self, name, value): + """Update a MdtestCommand BasicParameter with a new value. + + Args: + name (str): name of the MdtestCommand BasicParameter to update + value (str): value to assign to the MdtestCommand BasicParameter + """ + param = getattr(self.command, name, None) + if param: + if isinstance(param, BasicParameter): + param.update(value, ".".join([self.command.command, name])) + + def update_log_file(self, log_file): + """Update the log file for the mdtest command. + + Args: + log_file (str): new mdtest log file + """ + self.command.env["D_LOG_FILE"] = get_log_file( + log_file or f"{self.command.command}_daos.log") + + def get_unique_log(self, container): + """Get a unique mdtest log file name. + + Args: + container (TestContainer): container involved with the command + + Returns: + str: a log file name + """ + label = self.label_generator.get_label("mdtest") + parts = [self.test_id, container.pool.identifier, container.identifier, label] + return '.'.join(['_'.join(parts), 'log']) + + def update_daos_params(self, pool, container): + """Set the mdtest parameters for the pool and container. + + Optionally also set the DAOS pool and container environment variables for mdtest. + + Args: + pool (TestPool): the pool to use with the mdtest command + container (TestContainer): the container to use with the mdtest command + """ + self.command.update_params(dfs_pool=pool.identifier, dfs_cont=container.identifier) + + if "mpirun" in str(self.manager) or "srun" in str(self.manager): + self.env["DAOS_POOL"] = self.command.dfs_pool.value + self.env["DAOS_CONT"] = self.command.dfs_cont.value + self.env["IOR_HINT__MPI__romio_daos_obj_class"] = self.command.dfs_oclass.value + + def run(self, container, processes, ppn=None, intercept=None, display_space=True): + """Run mdtest. + + Args: + container (TestContainer): DAOS test container object. + processes (int): number of processes to run + ppn (int, optional): number of processes per node to run. If specified it will override + the processes input. Defaults to None. + intercept (str, optional): path to interception library. Defaults to None. + display_space (bool, optional): Whether to display the pool space. Defaults to True. + + Raises: + CommandFailure: if there is an error running the mdtest command + + Returns: + CmdResult: result of the mdtest command + """ + result = None + error_message = None + + self.update_daos_params(container.pool, container) + + if intercept: + self.env["LD_PRELOAD"] = intercept + + # Pass only processes or ppn to be compatible with previous behavior + if ppn is not None: + self.manager.assign_processes(ppn=ppn) + else: + self.manager.assign_processes(processes=processes) + + self.manager.assign_environment(self.env) + + try: + if display_space: + container.pool.display_space() + result = self.manager.run() + + except CommandFailure as error: + error_message = "Mdtest Failed:\n {}".format("\n ".join(str(error).split("\n"))) + + finally: + if not self.manager.run_as_subprocess and display_space: + container.pool.display_space() + + if error_message: + raise CommandFailure(error_message) + + return result + + class MdtestMetrics(): # pylint: disable=too-few-public-methods """Represents metrics from mdtest output. diff --git a/src/tests/ftest/util/telemetry_utils.py b/src/tests/ftest/util/telemetry_utils.py index 8937db87788..5230fba5a46 100644 --- a/src/tests/ftest/util/telemetry_utils.py +++ b/src/tests/ftest/util/telemetry_utils.py @@ -159,6 +159,12 @@ class TelemetryUtils(): "engine_pool_vos_wal_replay_size", "engine_pool_vos_wal_replay_time", "engine_pool_vos_wal_replay_transactions"] + ENGINE_POOL_VOS_CACHE_METRICS = [ + "engine_pool_vos_cache_page_evict", + "engine_pool_vos_cache_page_flush", + "engine_pool_vos_cache_page_hit", + "engine_pool_vos_cache_page_miss", + "engine_pool_vos_cache_page_ne"] ENGINE_POOL_SVC_METRICS = [ "engine_pool_svc_degraded_ranks", "engine_pool_svc_disabled_targets", @@ -179,6 +185,7 @@ class TelemetryUtils(): ENGINE_POOL_VOS_SPACE_METRICS + \ ENGINE_POOL_VOS_WAL_METRICS + \ ENGINE_POOL_VOS_WAL_REPLAY_METRICS +\ + ENGINE_POOL_VOS_CACHE_METRICS +\ ENGINE_POOL_SVC_METRICS ENGINE_EVENT_METRICS = [ "engine_events_dead_ranks", diff --git a/src/vos/vos_internal.h b/src/vos/vos_internal.h index 28d7ae765f7..40927b22d28 100644 --- a/src/vos/vos_internal.h +++ b/src/vos/vos_internal.h @@ -263,7 +263,8 @@ struct vos_cache_metrics { struct d_tm_node_t *vcm_obj_hit; }; -void vos_cache_metrics_init(struct vos_cache_metrics *vc_metrcis, const char *path, int tgt_id); +void +vos_cache_metrics_init(struct vos_cache_metrics *vc_metrics, const char *path, int tgt_id); struct vos_pool_metrics { void *vp_vea_metrics; From bbde24bc48bb85cd5c7d25187a423f02f5c947e5 Mon Sep 17 00:00:00 2001 From: Jan Michalski Date: Tue, 27 Jan 2026 15:47:04 +0000 Subject: [PATCH 150/253] DAOS-18503 test: suppress GLIBC leaks on EL9.7 (#17424) Note: getpwnam_r() suppression was found previously but: - the call stack on EL9.7 is slightly different so I made it a little bit more permissive. - NLT tests which re-discovered it on EL9.7 use a different suppression file so for now I just made a copy of this single suppression. Signed-off-by: Jan Michalski --- src/cart/utils/memcheck-cart.supp | 21 +++++++++++++++++++++ utils/test_memcheck.supp | 5 ++--- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/src/cart/utils/memcheck-cart.supp b/src/cart/utils/memcheck-cart.supp index b29fbe80ace..9676c1966f2 100644 --- a/src/cart/utils/memcheck-cart.supp +++ b/src/cart/utils/memcheck-cart.supp @@ -795,3 +795,24 @@ fun:_cgo_b590e4e2531a_Cfunc_daos_init fun:runtime.asmcgocall.abi0 } +{ + getpwnam_r() leak + Memcheck:Leak + fun:*alloc + ... + fun:getpwnam_r* +} +{ + getpwuid_r() leak + Memcheck:Leak + fun:calloc + ... + fun:getpwuid_r* +} +{ + localtime() leak + Memcheck:Leak + fun:malloc + ... + fun:__tz_convert +} diff --git a/utils/test_memcheck.supp b/utils/test_memcheck.supp index 0d41b6e9a3d..4f5e2bca077 100644 --- a/utils/test_memcheck.supp +++ b/utils/test_memcheck.supp @@ -356,12 +356,11 @@ fun:start_thread } { - getpwnam_r leak + getpwnam_r() leak Memcheck:Leak + fun:*alloc ... - fun:_nss_systemd_getpwnam_r fun:getpwnam_r* - fun:daos_acl_principal_to_uid } { getgrgid_r leak From 651b4c9b3a9af9691255181ac2520c9f0244f996 Mon Sep 17 00:00:00 2001 From: Tom Nabarro Date: Tue, 27 Jan 2026 15:48:30 +0000 Subject: [PATCH 151/253] DAOS-18347 test: Fix ListVerbose by adding derived_state (#17433) Signed-off-by: Tom Nabarro --- src/tests/ftest/pool/list_verbose.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/tests/ftest/pool/list_verbose.py b/src/tests/ftest/pool/list_verbose.py index 5cd429a748b..9f784d2d197 100644 --- a/src/tests/ftest/pool/list_verbose.py +++ b/src/tests/ftest/pool/list_verbose.py @@ -1,6 +1,6 @@ """ (C) Copyright 2018-2024 Intel Corporation. - (C) Copyright 2025 Hewlett Packard Enterprise Development LP + (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -80,6 +80,7 @@ def create_expected(self, pool, scm_free, nvme_free, scm_imbalance, "rebuild": { "status": 0, "state": rebuild_state, + "derived_state": rebuild_state, "objects": 0, "records": 0, "total_objects": 0 From 474849d626a9c71c4bd47f4502d44b0ad612ce5b Mon Sep 17 00:00:00 2001 From: Niu Yawei Date: Tue, 27 Jan 2026 23:50:57 +0800 Subject: [PATCH 152/253] DAOS-18500 vos: ensure dtx records pinned (#17431) When vos_dtx_commit() calls dtx_commit_pin() to pin the records to be committed, the records in intermediate state of 'committing' and 'aborting' shouldn't be skipped, since such intermediate state could be cleared by other concurrent vos_dtx_commit(). Signed-off-by: Fan Yong Co-authored-by: Niu Yawei --- src/vos/vos_dtx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/vos/vos_dtx.c b/src/vos/vos_dtx.c index e572d4be489..d50f2b87cc8 100644 --- a/src/vos/vos_dtx.c +++ b/src/vos/vos_dtx.c @@ -2560,8 +2560,7 @@ dtx_commit_pin(struct vos_container *cont, struct dtx_id dtis[], int count, int dae = riov.iov_buf; D_ASSERT(dae->dae_preparing == 0); - if (vos_dae_is_abort(dae) || dae->dae_committed || dae->dae_committing || - dae->dae_need_release == 0) + if (dae->dae_aborted || dae->dae_committed || dae->dae_need_release == 0) continue; rc = bkts_add_dae(vos_cont2pool(cont), &bkts, dae); From 5c3c473cf6512521ed1e2ba8219064849d35f59c Mon Sep 17 00:00:00 2001 From: Tom Nabarro Date: Tue, 27 Jan 2026 15:52:48 +0000 Subject: [PATCH 153/253] DAOS-17433 bio: Output LED-RAS control directives (#17276) For non-VMD configurations, SPDK has no way to directly control drive LEDs when bound to VFIO/userspace driver. To address this, generate RAS-events when LED state changes are requested from DAOS's internal logic. Signed-off-by: Tom Nabarro --- src/bio/bio_device.c | 176 +++++++++++++++---------- src/bio/bio_internal.h | 5 +- src/bio/bio_xstream.c | 22 +--- src/control/events/ras.go | 3 +- src/control/lib/daos/status.go | 4 +- src/control/server/ctl_smd_rpc.go | 8 +- src/control/server/ctl_smd_rpc_test.go | 27 +++- src/include/daos_srv/ras.h | 5 +- src/mgmt/srv_query.c | 35 ++--- 9 files changed, 163 insertions(+), 122 deletions(-) diff --git a/src/bio/bio_device.c b/src/bio/bio_device.c index 07deb6c7b10..ca72bd09f55 100644 --- a/src/bio/bio_device.c +++ b/src/bio/bio_device.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2020-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -48,13 +48,10 @@ revive_dev(struct bio_xs_context *xs_ctxt, struct bio_bdev *d_bdev) d_bdev->bb_trigger_reint = 1; spdk_thread_send_msg(owner_thread(bbs), setup_bio_bdev, d_bdev); - /** - * Reset the LED of the VMD device once revived, a DER_NOTSUPPORTED indicates that VMD-LED - * control is not enabled on device. - */ + /* Reset the LED of the VMD device once revived */ rc = bio_led_manage(xs_ctxt, NULL, d_bdev->bb_uuid, (unsigned int)CTL__LED_ACTION__RESET, NULL, 0); - if ((rc != 0) && (rc != -DER_NOTSUPPORTED)) + if (rc != 0) DL_ERROR(rc, "Reset LED on device:" DF_UUID " failed", DP_UUID(d_bdev->bb_uuid)); return 0; @@ -695,11 +692,12 @@ static void led_device_action(void *ctx, struct spdk_pci_device *pci_device) { struct led_opts *opts = ctx; - enum spdk_vmd_led_state cur_led_state; - Ctl__LedState d_led_state; + enum spdk_vmd_led_state cur_led_state = SPDK_VMD_LED_STATE_UNKNOWN; + Ctl__LedState d_led_state = CTL__LED_STATE__NA; const char *pci_dev_type = NULL; char addr_buf[ADDR_STR_MAX_LEN + 1]; int rc; + bool vmd_on; if (opts->status != 0) return; @@ -726,41 +724,45 @@ led_device_action(void *ctx, struct spdk_pci_device *pci_device) return; } - if (strncmp(pci_dev_type, NVME_PCI_DEV_TYPE_VMD, strlen(NVME_PCI_DEV_TYPE_VMD)) != 0) { - D_DEBUG(DB_MGMT, "Found non-VMD device type (%s:%s), can't manage LED\n", - pci_dev_type, addr_buf); - opts->status = -DER_NOTSUPPORTED; - return; - } + vmd_on = strncmp(pci_dev_type, NVME_PCI_DEV_TYPE_VMD, strlen(NVME_PCI_DEV_TYPE_VMD)) == 0; - /* First check the current state of the VMD LED */ - rc = spdk_vmd_get_led_state(pci_device, &cur_led_state); - if (spdk_unlikely(rc != 0)) { - D_ERROR("Failed to retrieve the state of the LED on %s (%s)\n", addr_buf, - spdk_strerror(-rc)); - opts->status = -DER_NOSYS; - return; - } + D_DEBUG(DB_MGMT, "led_device_action addr:%s, action:%s", addr_buf, + LED_ACTION_NAME(opts->action)); - /* Convert state to Ctl__LedState from SPDK led_state */ - d_led_state = led_state_spdk2daos(cur_led_state); + if (vmd_on) { + /* First check the current state of the VMD LED */ + rc = spdk_vmd_get_led_state(pci_device, &cur_led_state); + if (spdk_unlikely(rc != 0)) { + D_ERROR("Failed to retrieve the state of the LED on %s (%s)\n", addr_buf, + spdk_strerror(-rc)); + opts->status = -DER_NOSYS; + return; + } - D_DEBUG(DB_MGMT, "led on dev %s has state: %s (action: %s, new state: %s)\n", addr_buf, - LED_STATE_NAME(d_led_state), LED_ACTION_NAME(opts->action), - LED_STATE_NAME(opts->led_state)); + /* Convert state to Ctl__LedState from SPDK led_state */ + d_led_state = led_state_spdk2daos(cur_led_state); + + D_DEBUG(DB_MGMT, "vmd led on dev %s has state: %s (action: %s, new state: %s)\n", + addr_buf, LED_STATE_NAME(d_led_state), LED_ACTION_NAME(opts->action), + LED_STATE_NAME(opts->led_state)); + } switch (opts->action) { case CTL__LED_ACTION__GET: - /* Return early with current device state set */ - opts->led_state = d_led_state; + if (vmd_on) + /* Return early with current device state set */ + opts->led_state = d_led_state; + else + /* Leave state as NA */ + D_ERROR("LED state GET not supported for non-VMD device (type %s:%s)\n", + pci_dev_type, addr_buf); return; case CTL__LED_ACTION__SET: break; case CTL__LED_ACTION__RESET: /* Reset intercepted earlier in call-stack and converted to set */ - D_ERROR("Reset action is not supported\n"); - opts->status = -DER_INVAL; - return; + D_ERROR("Reset action unsupported in this code path\n"); + D_ASSERT(false); default: D_ERROR("Unrecognized LED action requested\n"); opts->status = -DER_INVAL; @@ -773,30 +775,44 @@ led_device_action(void *ctx, struct spdk_pci_device *pci_device) return; } - /* Set the LED to the new state */ - rc = spdk_vmd_set_led_state(pci_device, led_state_daos2spdk(opts->led_state)); - if (spdk_unlikely(rc != 0)) { - D_ERROR("Failed to set the VMD LED state on %s (%s)\n", addr_buf, - spdk_strerror(-rc)); - opts->status = -DER_NOSYS; - return; - } + if (vmd_on) { + /* Set the LED to the new state */ + rc = spdk_vmd_set_led_state(pci_device, led_state_daos2spdk(opts->led_state)); + if (spdk_unlikely(rc != 0)) { + D_ERROR("Failed to set the VMD LED state on %s (%s)\n", addr_buf, + spdk_strerror(-rc)); + opts->status = -DER_NOSYS; + return; + } - rc = spdk_vmd_get_led_state(pci_device, &cur_led_state); - if (rc != 0) { - D_ERROR("Failed to get the VMD LED state on %s (%s)\n", addr_buf, - spdk_strerror(-rc)); - opts->status = -DER_NOSYS; - return; + rc = spdk_vmd_get_led_state(pci_device, &cur_led_state); + if (rc != 0) { + D_ERROR("Failed to get the VMD LED state on %s (%s)\n", addr_buf, + spdk_strerror(-rc)); + opts->status = -DER_NOSYS; + return; + } + d_led_state = led_state_spdk2daos(cur_led_state); + } else { + /* Set current state to expected if no VMD */ + d_led_state = opts->led_state; } - d_led_state = led_state_spdk2daos(cur_led_state); /* Verify the correct state is set */ if (d_led_state != opts->led_state) { D_ERROR("Unexpected LED state on %s, want %s got %s\n", addr_buf, LED_STATE_NAME(opts->led_state), LED_STATE_NAME(d_led_state)); - opts->status = -DER_INVAL; + opts->status = -DER_MISC; + return; } + + /** + * Print RAS event for LED change. If no VMD, the RAS events may be used to trigger LED + * control mechanisms outside of SPDK and/or DAOS. + */ + ras_notify_eventf(RAS_DEVICE_LED_SET, RAS_TYPE_INFO, RAS_SEV_NOTICE, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, "LED on device %s set to state %s", + addr_buf, LED_STATE_NAME(opts->led_state)); } static int @@ -893,7 +909,7 @@ led_manage(struct bio_xs_context *xs_ctxt, struct spdk_pci_addr pci_addr, Ctl__L case CTL__LED_ACTION__SET: opts.action = action; if (state == NULL) { - D_ERROR("LED state not set for SET action\n"); + D_ERROR("LED state not set, missing state field\n"); return -DER_INVAL; } opts.led_state = *state; @@ -920,15 +936,12 @@ led_manage(struct bio_xs_context *xs_ctxt, struct spdk_pci_addr pci_addr, Ctl__L spdk_pci_for_each_device(&opts, led_device_action); if (opts.status != 0) { - if (opts.status != -DER_NOTSUPPORTED) { - if (state != NULL) - D_ERROR("LED %s failed (target state: %s): %s\n", - LED_ACTION_NAME(action), LED_STATE_NAME(*state), - spdk_strerror(opts.status)); - else - D_ERROR("LED %s failed: %s\n", LED_ACTION_NAME(action), - spdk_strerror(opts.status)); - } + if (state != NULL) + D_ERROR("LED %s failed (target state: %s): %s\n", LED_ACTION_NAME(action), + LED_STATE_NAME(*state), spdk_strerror(opts.status)); + else + D_ERROR("LED %s failed: %s\n", LED_ACTION_NAME(action), + spdk_strerror(opts.status)); return opts.status; } @@ -999,15 +1012,20 @@ dev_uuid2pci_addr(struct spdk_pci_addr *pci_addr, uuid_t dev_uuid) } rc = fill_in_traddr(&b_info, d_bdev->bb_name); - if (rc || b_info.bdi_traddr == NULL) { - D_DEBUG(DB_MGMT, "Unable to get traddr for device %s\n", d_bdev->bb_name); + if (rc) { + D_ERROR("Unable to get traddr for device %s\n", d_bdev->bb_name); return -DER_INVAL; } + if (b_info.bdi_traddr == NULL) { + D_DEBUG(DB_MGMT, "Skipping get traddr for device %s (not NVMe?)\n", + d_bdev->bb_name); + return -DER_NOTSUPPORTED; + } rc = spdk_pci_addr_parse(pci_addr, b_info.bdi_traddr); if (rc != 0) { - D_DEBUG(DB_MGMT, "Unable to parse PCI address for device %s (%s)\n", - b_info.bdi_traddr, spdk_strerror(-rc)); + D_ERROR("Unable to parse PCI address for device %s (%s)\n", b_info.bdi_traddr, + spdk_strerror(-rc)); rc = -DER_INVAL; } @@ -1015,18 +1033,22 @@ dev_uuid2pci_addr(struct spdk_pci_addr *pci_addr, uuid_t dev_uuid) return rc; } +static bool +is_pci_addr_valid(const struct spdk_pci_addr *addr) +{ + struct spdk_pci_addr zero = {0}; + + return spdk_pci_addr_compare(addr, &zero) != 0; +} + int bio_led_manage(struct bio_xs_context *xs_ctxt, char *tr_addr, uuid_t dev_uuid, unsigned int action, unsigned int *state, uint64_t duration) { - struct spdk_pci_addr pci_addr; + struct spdk_pci_addr pci_addr = {0}; int addr_len = 0; int rc; - /* LED management on NVMe devices currently only supported when VMD is enabled. */ - if (!bio_vmd_enabled) - return -DER_NOTSUPPORTED; - /** * If tr_addr is already provided, convert to a PCI address. If tr_addr is NULL or empty, * derive PCI address from the provided UUID and if tr_addr is an empty string buffer then @@ -1035,14 +1057,21 @@ bio_led_manage(struct bio_xs_context *xs_ctxt, char *tr_addr, uuid_t dev_uuid, u if (tr_addr != NULL) { addr_len = strnlen(tr_addr, SPDK_NVMF_TRADDR_MAX_LEN + 1); - if (addr_len == SPDK_NVMF_TRADDR_MAX_LEN + 1) + if (addr_len == SPDK_NVMF_TRADDR_MAX_LEN + 1) { + D_ERROR("Address string too long"); return -DER_INVAL; + } } if (addr_len == 0) { rc = dev_uuid2pci_addr(&pci_addr, dev_uuid); + if (rc == -DER_NOTSUPPORTED) { + /* Skip LED action for device without valid PCI address */ + return 0; + } if (rc != 0) { - DL_ERROR(rc, "Failed to read PCI addr from dev UUID"); + DL_ERROR(rc, "Failed to read PCI addr from device " DF_UUID, + DP_UUID(dev_uuid)); return rc; } @@ -1050,7 +1079,7 @@ bio_led_manage(struct bio_xs_context *xs_ctxt, char *tr_addr, uuid_t dev_uuid, u /* Populate tr_addr buffer to return address */ rc = spdk_pci_addr_fmt(tr_addr, addr_len, &pci_addr); if (rc != 0) { - D_ERROR("Failed to write VMD's PCI address (%s)\n", + D_ERROR("Failed to write VMD's PCI address (%s)", spdk_strerror(-rc)); return -DER_INVAL; } @@ -1058,12 +1087,17 @@ bio_led_manage(struct bio_xs_context *xs_ctxt, char *tr_addr, uuid_t dev_uuid, u } else { rc = spdk_pci_addr_parse(&pci_addr, tr_addr); if (rc != 0) { - D_ERROR("Unable to parse PCI address for device %s (%s)\n", tr_addr, + D_ERROR("Unable to parse PCI address for device %s (%s)", tr_addr, spdk_strerror(-rc)); return -DER_INVAL; } } + if (!is_pci_addr_valid(&pci_addr)) { + D_ERROR("No valid PCI address found for device"); + return -DER_INVAL; + } + return led_manage(xs_ctxt, pci_addr, (Ctl__LedAction)action, (Ctl__LedState *)state, duration); } diff --git a/src/bio/bio_internal.h b/src/bio/bio_internal.h index 5b97582d18c..d8be60405d5 100644 --- a/src/bio/bio_internal.h +++ b/src/bio/bio_internal.h @@ -1,6 +1,6 @@ /** * (C) Copyright 2018-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -598,8 +598,7 @@ extern struct bio_faulty_criteria glb_criteria; /* bio_xstream.c */ extern bool bio_scm_rdma; -extern bool bio_spdk_inited; -extern bool bio_vmd_enabled; +extern bool bio_spdk_inited; extern unsigned int bio_chk_sz; extern unsigned int bio_chk_cnt_max; extern unsigned int bio_numa_node; diff --git a/src/bio/bio_xstream.c b/src/bio/bio_xstream.c index f3200031423..757118d3e67 100644 --- a/src/bio/bio_xstream.c +++ b/src/bio/bio_xstream.c @@ -56,9 +56,7 @@ static unsigned int bio_chk_init_pct; /* Diret RDMA over SCM */ bool bio_scm_rdma; /* Whether SPDK inited */ -bool bio_spdk_inited; -/* Whether VMD is enabled */ -bool bio_vmd_enabled; +bool bio_spdk_inited; /* SPDK subsystem fini timeout */ unsigned int bio_spdk_subsys_timeout = 25000; /* ms */ /* How many blob unmap calls can be called in a row */ @@ -109,7 +107,6 @@ bio_spdk_conf_read(struct spdk_env_opts *opts) return rc; } nvme_glb.bd_nvme_roles = roles; - bio_vmd_enabled = vmd_enabled && (nvme_glb.bd_bdev_class == BDEV_CLASS_NVME); rc = bio_set_hotplug_filter(nvme_glb.bd_nvme_conf); if (rc != 0) { @@ -1075,10 +1072,10 @@ init_bio_bdevs(struct bio_xs_context *ctxt) return -DER_EXIST; } - /* A DER_NOTSUPPORTED RC indicates that VMD-LED control not possible */ + /* Clear any pre-existing VMD-LED state */ rc = bio_led_manage(ctxt, NULL, d_bdev->bb_uuid, (unsigned int)CTL__LED_ACTION__RESET, NULL, 0); - if ((rc != 0) && (rc != -DER_NOTSUPPORTED)) { + if (rc != 0) { DL_ERROR(rc, "Reset LED on device:" DF_UUID " failed", DP_UUID(d_bdev->bb_uuid)); return rc; @@ -2024,22 +2021,15 @@ bio_led_event_monitor(struct bio_xs_context *ctxt, uint64_t now) struct bio_bdev *d_bdev; int rc; - if (!bio_vmd_enabled) - return; - /* Scan all devices present in bio_bdev list */ d_list_for_each_entry(d_bdev, bio_bdev_list(), bb_link) { if ((d_bdev->bb_led_expiry_time != 0) && (d_bdev->bb_led_expiry_time < now)) { - /** - * LED will be reset to faulty or normal state based on SSDs bio_bdevs. - * A DER_NOTSUPPORTED RC indicates that VMD-LED control not possible. - */ + /* LED will be reset to faulty or normal state based on SSDs bio_bdevs. */ rc = bio_led_manage(ctxt, NULL, d_bdev->bb_uuid, (unsigned int)CTL__LED_ACTION__RESET, NULL, 0); if (rc != 0) { - if (rc != -DER_NOTSUPPORTED) - DL_ERROR(rc, "Reset LED on device:" DF_UUID " failed", - DP_UUID(d_bdev->bb_uuid)); + DL_ERROR(rc, "Reset LED on device:" DF_UUID " failed", + DP_UUID(d_bdev->bb_uuid)); continue; } diff --git a/src/control/events/ras.go b/src/control/events/ras.go index 95dbc218858..902a8559e58 100644 --- a/src/control/events/ras.go +++ b/src/control/events/ras.go @@ -1,6 +1,6 @@ // // (C) Copyright 2020-2024 Intel Corporation. -// (C) Copyright 2025 Hewlett Packard Enterprise Development LP +// (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -58,6 +58,7 @@ const ( RASSystemFabricProvChanged RASID = C.RAS_SYSTEM_FABRIC_PROV_CHANGED // info RASNVMeLinkSpeedChanged RASID = C.RAS_DEVICE_LINK_SPEED_CHANGED // warning|notice RASNVMeLinkWidthChanged RASID = C.RAS_DEVICE_LINK_WIDTH_CHANGED // warning|notice + RASDeviceLEDSet RASID = C.RAS_DEVICE_LED_SET // info ) func (id RASID) String() string { diff --git a/src/control/lib/daos/status.go b/src/control/lib/daos/status.go index 3de2435be64..6a597d461ef 100644 --- a/src/control/lib/daos/status.go +++ b/src/control/lib/daos/status.go @@ -1,6 +1,6 @@ // // (C) Copyright 2019-2024 Intel Corporation. -// (C) Copyright 2026 Hewlett Packard Enterprise Development LP +// (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -178,4 +178,6 @@ const ( RedundancyFactorExceeded Status = -C.DER_RF // AgentCommFailed indicates that client/agent communication failed. AgentCommFailed Status = -C.DER_AGENT_COMM + // NotSupported indicates that operation is unsupported. + NotSupported Status = -C.DER_NOTSUPPORTED ) diff --git a/src/control/server/ctl_smd_rpc.go b/src/control/server/ctl_smd_rpc.go index 16d5b6486c3..2cd75390f76 100644 --- a/src/control/server/ctl_smd_rpc.go +++ b/src/control/server/ctl_smd_rpc.go @@ -1,6 +1,6 @@ // // (C) Copyright 2020-2023 Intel Corporation. -// (C) Copyright 2025 Hewlett Packard Enterprise Development LP +// (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -174,7 +174,7 @@ func extractReqIDs(log logging.Logger, ids string, addrs idMap, uuids idMap) err tokens := strings.Split(ids, ",") for _, token := range tokens { - if addr, e := hardware.NewPCIAddress(token); e == nil && addr.IsVMDBackingAddress() { + if addr, e := hardware.NewPCIAddress(token); e == nil { addrs[addr.String()] = true continue } @@ -184,7 +184,7 @@ func extractReqIDs(log logging.Logger, ids string, addrs idMap, uuids idMap) err continue } - return errors.Errorf("req id entry %q is neither a valid vmd backing device pci "+ + return errors.Errorf("req id entry %q is neither a valid device pci "+ "address or uuid", token) } @@ -240,7 +240,7 @@ func (svc *ControlService) mapIDsToEngine(ctx context.Context, ids string, useTr matchAll := false if ids == "" { - // Selecting all is not supported unless using transport addresses. + // Selecting all not supported unless using transport addresses. if !useTrAddr { return nil, errors.New("empty id string") } diff --git a/src/control/server/ctl_smd_rpc_test.go b/src/control/server/ctl_smd_rpc_test.go index 06f1276fa25..bbc7de54a7f 100644 --- a/src/control/server/ctl_smd_rpc_test.go +++ b/src/control/server/ctl_smd_rpc_test.go @@ -1,5 +1,6 @@ // // (C) Copyright 2020-2024 Intel Corporation. +// (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -919,7 +920,7 @@ func TestServer_CtlSvc_SmdManage(t *testing.T) { }, expErr: errors.New("neither a valid"), }, - "led-manage; pci address not of a vmd backing device": { + "led-manage; pci address of a non-vmd device": { req: &ctlpb.SmdManageReq{ Op: &ctlpb.SmdManageReq_Led{ Led: &ctlpb.LedManageReq{ @@ -927,7 +928,29 @@ func TestServer_CtlSvc_SmdManage(t *testing.T) { }, }, }, - expErr: errors.New("neither a valid"), + drpcResps: map[int][]*mockDrpcResponse{ + 0: { + { + Message: &ctlpb.SmdDevResp{ + Devices: []*ctlpb.SmdDevice{pbNormDev(1)}, + }, + }, + { + Message: &ctlpb.DevManageResp{ + Device: pbIdentDev(1), + }, + }, + }, + }, + expResp: &ctlpb.SmdManageResp{ + Ranks: []*ctlpb.SmdManageResp_RankResp{ + { + Results: []*ctlpb.SmdManageResp_Result{ + {Device: pbIdentDev(1)}, + }, + }, + }, + }, }, "led-manage; valid pci address of vmd backing device": { req: &ctlpb.SmdManageReq{ diff --git a/src/include/daos_srv/ras.h b/src/include/daos_srv/ras.h index 5df47372eb9..8fcf4ae8013 100644 --- a/src/include/daos_srv/ras.h +++ b/src/include/daos_srv/ras.h @@ -1,6 +1,6 @@ /** * (C) Copyright 2020-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -65,7 +65,8 @@ X(RAS_SYSTEM_FABRIC_PROV_CHANGED, "system_fabric_provider_changed") \ X(RAS_ENGINE_JOIN_FAILED, "engine_join_failed") \ X(RAS_DEVICE_LINK_SPEED_CHANGED, "device_link_speed_changed") \ - X(RAS_DEVICE_LINK_WIDTH_CHANGED, "device_link_width_changed") + X(RAS_DEVICE_LINK_WIDTH_CHANGED, "device_link_width_changed") \ + X(RAS_DEVICE_LED_SET, "device_led_set") /** Define RAS event enum */ typedef enum { diff --git a/src/mgmt/srv_query.c b/src/mgmt/srv_query.c index 51e457ba278..5eab9777648 100644 --- a/src/mgmt/srv_query.c +++ b/src/mgmt/srv_query.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2016-2024 Intel Corporation. + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -248,11 +249,14 @@ bio_storage_dev_manage_led(void *arg) return -DER_INVAL; } - /* Set the LED of the VMD device to a FAULT state, tr_addr and state may be updated */ + /** + * Set/Get the LED state of the VMD device, tr_addr and state led_info fields may be + * updated. + */ rc = bio_led_manage(bxc, led_info->tr_addr, led_info->dev_uuid, (unsigned int)led_info->action, (unsigned int *)led_info->state, led_info->duration); - if ((rc != 0) && (rc != -DER_NOTSUPPORTED)) + if (rc != 0) DL_ERROR(rc, "bio_led_manage failed on device:" DF_UUID " (action: %s, state %s)", DP_UUID(led_info->dev_uuid), ctl__led_action__descriptor.values[led_info->action].name, @@ -486,7 +490,7 @@ ds_mgmt_smd_list_devs(Ctl__SmdDevResp *resp) break; resp->devices[i]->ctrlr_namespace_id = dev_info->bdi_ctrlr->nss->id; } else { - D_DEBUG(DB_MGMT, "ctrlr not initialized in bio_dev_info, unplugged?"); + D_DEBUG(DB_MGMT, "ctrlr not initialized in bio_dev_info, is it unplugged?"); } /* Populate NVMe device state */ @@ -513,13 +517,7 @@ ds_mgmt_smd_list_devs(Ctl__SmdDevResp *resp) init_xs_type(), 0, 0); if (rc != 0) { - if (rc == -DER_NOTSUPPORTED) { - resp->devices[i]->ctrlr->led_state = CTL__LED_STATE__NA; - /* Reset rc for non-VMD case */ - rc = 0; - } else { - break; - } + break; } resp->devices[i]->ctrlr->led_state = led_state; @@ -753,14 +751,8 @@ ds_mgmt_dev_set_faulty(uuid_t dev_uuid, Ctl__DevManageResp *resp) /* Set the VMD LED to FAULTY state on init xstream */ rc = dss_ult_execute(bio_storage_dev_manage_led, &led_info, NULL, NULL, init_xs_type(), 0, 0); - if (rc != 0) { - if (rc == -DER_NOTSUPPORTED) - /* Reset rc for non-VMD case */ - rc = 0; - else - DL_ERROR(rc, "FAULT LED state not set on device:" DF_UUID, - DP_UUID(dev_uuid)); - } + if (rc != 0) + DL_ERROR(rc, "FAULT LED state not set on device:" DF_UUID, DP_UUID(dev_uuid)); out: smd_dev_free_info(dev_info); @@ -808,14 +800,13 @@ ds_mgmt_dev_manage_led(Ctl__LedManageReq *req, Ctl__DevManageResp *resp) led_info.state = &led_state; led_info.duration = req->led_duration_mins * 60 * (NSEC_PER_SEC / NSEC_PER_USEC); - /* Manage the VMD LED state on init xstream */ + /* Manage the LED state on init xstream */ rc = dss_ult_execute(bio_storage_dev_manage_led, &led_info, NULL, NULL, init_xs_type(), 0, 0); if (rc != 0) { + DL_ERROR(rc, "LED manage failed on device %s (%d)", led_info.tr_addr, rc); resp->device->ctrlr->led_state = CTL__LED_STATE__NA; - if (rc == -DER_NOTSUPPORTED) - /* Reset rc for non-VMD case */ - rc = 0; + resp->status = rc; } else { resp->device->ctrlr->led_state = (Ctl__LedState)led_state; } From 6a0e20af8efbef8faa4da81c62f77112a7003282 Mon Sep 17 00:00:00 2001 From: Kris Jacque Date: Tue, 27 Jan 2026 08:54:30 -0700 Subject: [PATCH 154/253] DAOS-18453 control: Allow daos cmd to inject fault value (#17394) - Add an optional parameter to set fault injection value at the specified location. - Add a reset option to change both location and value to 0. Signed-off-by: Kris Jacque Signed-off-by: Tom Nabarro --- src/control/cmd/daos/fi.go | 80 ++++++++++++++++++++++++++----- src/control/lib/daos/api/fi.go | 44 +++++++++++++++++ src/control/server/mgmt_system.go | 4 +- 3 files changed, 114 insertions(+), 14 deletions(-) create mode 100644 src/control/lib/daos/api/fi.go diff --git a/src/control/cmd/daos/fi.go b/src/control/cmd/daos/fi.go index b47f463d0ce..adacf7575f7 100644 --- a/src/control/cmd/daos/fi.go +++ b/src/control/cmd/daos/fi.go @@ -1,5 +1,6 @@ // // (C) Copyright 2022 Intel Corporation. +// (C) Copyright 2026 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -19,6 +20,8 @@ import ( "strings" "github.com/pkg/errors" + + "github.com/daos-stack/daos/src/control/lib/daos/api" ) type faultsCmdRoot struct { @@ -58,23 +61,53 @@ func (ff faultFrequency) HasSome() (uint64, bool) { type faultLocation uint64 func (fl *faultLocation) UnmarshalFlag(fv string) error { - // Ugh. Seems like there should be a more clever way to do this... - switch strings.TrimSpace(fv) { - case "DAOS_CHK_CONT_ORPHAN": - *fl = faultLocation(C.DAOS_CHK_CONT_ORPHAN) - case "DAOS_CHK_CONT_BAD_LABEL": - *fl = faultLocation(C.DAOS_CHK_CONT_BAD_LABEL) - default: - return errors.Errorf("unhandled fault location %q", fv) + if fv == "none" { + *fl = 0 + return nil + } + + loc, err := api.FaultLocationFromString(fv) + if err != nil { + return err } + *fl = faultLocation(loc) return nil } +// IsSet indicates whether a fault location has been set. +func (fl faultLocation) IsSet() bool { + return fl != 0 +} + +type faultValue uint64 + +const faultValueUnset = faultValue(^uint64(0)) + +func (fv *faultValue) UnmarshalFlag(fvStr string) error { + if fvStr == "none" { + *fv = faultValueUnset + return nil + } + + // Allow hexadecimal and binary values, as well as decimal. + v, err := strconv.ParseUint(fvStr, 0, 64) + if err != nil { + return errors.Errorf("invalid fault value %q", fvStr) + } + *fv = faultValue(v) + return nil +} + +// IsSet indicates whether a fault value has been set. +func (fv faultValue) IsSet() bool { + return fv != faultValueUnset +} + type faultRank uint32 func (fr *faultRank) UnmarshalFlag(fv string) error { - if fv == strconv.FormatUint(uint64(C.CRT_NO_RANK), 10) || fv == "-1" { + if fv == "all" || fv == strconv.FormatUint(uint64(C.CRT_NO_RANK), 10) || fv == "-1" { *fr = faultRank(C.CRT_NO_RANK) return nil } @@ -90,9 +123,10 @@ func (fr *faultRank) UnmarshalFlag(fv string) error { type faultInjectionCmd struct { daosCmd - Rank faultRank `short:"r" long:"rank" description:"Rank to inject fault on" default:"4294967295"` + Rank faultRank `short:"r" long:"rank" description:"Rank to inject fault on" default:"all"` Frequency faultFrequency `short:"f" long:"frequency" description:"Fault injection frequency" choices:"always,once" default:"once"` - Location faultLocation `short:"l" long:"location" description:"Fault injection location" required:"1"` + Location faultLocation `short:"l" long:"location" description:"Fault injection location" default:"none"` + Value faultValue `short:"v" long:"value" description:"Fault injection value" default:"none"` } func (cmd *faultInjectionCmd) setParams() error { @@ -112,19 +146,41 @@ func (cmd *faultInjectionCmd) setParams() error { if cmd.Rank != C.CRT_NO_RANK { rankMsg = fmt.Sprintf("rank %d", cmd.Rank) } - cmd.Debugf("injecting fault %d on %s", faultMask, rankMsg) + cmd.Debugf("injecting fault location 0x%x on %s", faultMask, rankMsg) rc := C.daos_debug_set_params(nil, C.d_rank_t(cmd.Rank), C.DMG_KEY_FAIL_LOC, faultMask, 0, nil) if err := daosError(rc); err != nil { return errors.Wrap(err, "failed to set fault injection") } + + if cmd.Value.IsSet() { + cmd.Debugf("injecting fault value %d on %s", cmd.Value, rankMsg) + rc = C.daos_debug_set_params(nil, C.d_rank_t(cmd.Rank), C.DMG_KEY_FAIL_VALUE, C.uint64_t(cmd.Value), 0, nil) + if err := daosError(rc); err != nil { + return errors.Wrap(err, "failed to set fault injection value") + } + } return nil } type debugFaultCmd struct { faultInjectionCmd + + Reset bool `long:"reset" description:"Reset all fault injection parameters"` } func (cmd *debugFaultCmd) Execute(_ []string) error { + if cmd.Reset { + if cmd.Location.IsSet() || cmd.Value.IsSet() { + return errors.New("cannot set location or value when resetting fault injection parameters") + } + + cmd.Debugf("resetting all fault injection parameters") + cmd.Frequency = 0 + cmd.Location = 0 + cmd.Value = 0 + } else if !cmd.Location.IsSet() { + return errors.New("--location must be specified unless --reset is used") + } return cmd.setParams() } diff --git a/src/control/lib/daos/api/fi.go b/src/control/lib/daos/api/fi.go new file mode 100644 index 00000000000..40d4e2ef79a --- /dev/null +++ b/src/control/lib/daos/api/fi.go @@ -0,0 +1,44 @@ +// +// (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + +package api + +/* +#include +#include +*/ +import "C" + +import "fmt" + +var ( + // failLocMap maps from strings to DAOS fault injection location constants. + // The definitions come from daos_common.h. + // TODO: Add the rest of existing fault locs. Maybe auto-generate this mapping? + failLocMap = map[string]C.uint64_t{ + "DAOS_CHK_CONT_ORPHAN": C.DAOS_CHK_CONT_ORPHAN, + "DAOS_CHK_CONT_BAD_LABEL": C.DAOS_CHK_CONT_BAD_LABEL, + "DAOS_CHK_LEADER_BLOCK": C.DAOS_CHK_LEADER_BLOCK, + "DAOS_CHK_LEADER_FAIL_REGPOOL": C.DAOS_CHK_LEADER_FAIL_REGPOOL, + "DAOS_CHK_PS_NOTIFY_LEADER": C.DAOS_CHK_PS_NOTIFY_LEADER, + "DAOS_CHK_PS_NOTIFY_ENGINE": C.DAOS_CHK_PS_NOTIFY_ENGINE, + "DAOS_CHK_SYNC_ORPHAN_PROCESS": C.DAOS_CHK_SYNC_ORPHAN_PROCESS, + "DAOS_CHK_FAIL_REPORT_POOL1": C.DAOS_CHK_FAIL_REPORT_POOL1, + "DAOS_CHK_FAIL_REPORT_POOL2": C.DAOS_CHK_FAIL_REPORT_POOL2, + "DAOS_CHK_ENGINE_DEATH": C.DAOS_CHK_ENGINE_DEATH, + "DAOS_CHK_VERIFY_CONT_SHARDS": C.DAOS_CHK_VERIFY_CONT_SHARDS, + "DAOS_CHK_ORPHAN_POOL_SHARD": C.DAOS_CHK_ORPHAN_POOL_SHARD, + } +) + +// FaultLocationFromString converts a string to a fault injection location value. +func FaultLocationFromString(str string) (uint64, error) { + loc, found := failLocMap[str] + if !found { + return 0, fmt.Errorf("invalid fault injection location %q", str) + } + return uint64(loc), nil +} diff --git a/src/control/server/mgmt_system.go b/src/control/server/mgmt_system.go index 27110136c38..7eb43f6232e 100644 --- a/src/control/server/mgmt_system.go +++ b/src/control/server/mgmt_system.go @@ -1,6 +1,6 @@ // // (C) Copyright 2020-2024 Intel Corporation. -// (C) Copyright 2025 Hewlett Packard Enterprise Development LP +// (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -57,7 +57,7 @@ const ( // the client network autoconfiguration hints, and the set of ranks associated with MS // replicas. If req.AllRanks is true, all ranks' fabric URIs are also given the client. func (svc *mgmtSvc) GetAttachInfo(ctx context.Context, req *mgmtpb.GetAttachInfoReq) (*mgmtpb.GetAttachInfoResp, error) { - if err := svc.checkReplicaRequest(req); err != nil { + if err := svc.checkReplicaRequest(wrapCheckerReq(req)); err != nil { return nil, err } if len(svc.clientNetworkHint) == 0 { From a3580184e191b902b1b8ceef64861745930d5ab4 Mon Sep 17 00:00:00 2001 From: Jan Michalski Date: Tue, 27 Jan 2026 15:57:35 +0000 Subject: [PATCH 155/253] DAOS-16963 ddb: zero-length key fix (#15928) Signed-off-by: Jan Michalski --- src/utils/ddb/ddb_parse.c | 6 +++++- src/utils/ddb/tests/ddb_commands_tests.c | 12 +++++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/utils/ddb/ddb_parse.c b/src/utils/ddb/ddb_parse.c index 2d493a18ec6..29e53785911 100644 --- a/src/utils/ddb/ddb_parse.c +++ b/src/utils/ddb/ddb_parse.c @@ -452,8 +452,12 @@ key_parse_str(const char *input, daos_key_t *key) key_len++; } } - if (size == 0) + if (size == 0) { + if (key_len == 0) { + return -DER_INVAL; + } size = key_len; + } if (size < key_len) return -DER_INVAL; diff --git a/src/utils/ddb/tests/ddb_commands_tests.c b/src/utils/ddb/tests/ddb_commands_tests.c index acb920d9fce..35f6ff7299e 100644 --- a/src/utils/ddb/tests/ddb_commands_tests.c +++ b/src/utils/ddb/tests/ddb_commands_tests.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2022-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP. + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -181,12 +181,22 @@ dump_ilog_cmd_tests(void **state) assert_success(ddb_run_ilog_dump(&ctx, &opt)); assert_true(dvt_fake_print_called); + /* Dump dkey ilog - invalid */ + dvt_fake_print_called = 0; + opt.path = "[0]/[0]//"; + assert_rc_equal(ddb_run_ilog_dump(&ctx, &opt), -DER_INVAL); + assert_true(dvt_fake_print_called); + /* Dump dkey ilog */ dvt_fake_print_called = 0; opt.path = "[0]/[0]/[0]"; assert_success(ddb_run_ilog_dump(&ctx, &opt)); assert_true(dvt_fake_print_called); + /* Dump akey ilog - invalid */ + opt.path = "[0]/[0]/[0]//"; + assert_rc_equal(ddb_run_ilog_dump(&ctx, &opt), -DER_INVAL); + /* Dump akey ilog */ opt.path = "[0]/[0]/[0]/[0]"; assert_success(ddb_run_ilog_dump(&ctx, &opt)); From 4c8fb913d6811da7a71f00a784c2f11879fb3f91 Mon Sep 17 00:00:00 2001 From: Nasf-Fan Date: Wed, 28 Jan 2026 11:52:39 +0800 Subject: [PATCH 156/253] DAOS-17535 chk: misc improvements for CR logic (#17427) Include the followings: 1. When create CHK IV namespace, make the secondary group to be same as the primary group. Otherwise, CHK logic may hit DER_NONEXIST trouble when communicate via IV. 2. Integrate CHK IV namespace create and destroy API, cleanup related logic, redefine the version. 3. Get ranks list and IV namespace version from CHK leader when rejoin. Adjust CHK_REJOIN RPC for related changes. 4. Remove unsupported functionality for checking the specified 'phase'. 5. Add new test for case of lost some engine(s) before start checker. 6. Dedicated ULT to handle dead rank event, that will not be affected by checker start or stop. Then even if check scheduler exited, the subsequent check query still can work against the latest rank list. Signed-off-by: Fan Yong --- src/chk/chk_common.c | 22 +- src/chk/chk_engine.c | 213 +++++++-------- src/chk/chk_internal.h | 124 ++++----- src/chk/chk_iv.c | 102 ++++++- src/chk/chk_leader.c | 250 +++++++++--------- src/chk/chk_rpc.c | 42 ++- src/chk/chk_srv.c | 19 +- src/engine/server_iv.c | 4 +- src/gurt/misc.c | 23 +- src/include/daos_srv/daos_chk.h | 8 +- src/include/gurt/common.h | 12 +- src/mgmt/srv_chk.c | 6 +- src/mgmt/srv_drpc.c | 4 +- src/mgmt/srv_internal.h | 7 +- src/mgmt/tests/mocks.c | 4 +- src/object/cli_coll.c | 5 +- src/tests/ftest/recovery/cat_recov_core.yaml | 7 +- .../ftest/recovery/check_start_corner_case.py | 4 +- .../ftest/recovery/container_cleanup.yaml | 2 +- .../container_list_consolidation.yaml | 2 +- src/tests/ftest/recovery/ms_membership.yaml | 5 + src/tests/ftest/recovery/pool_cleanup.yaml | 2 +- .../recovery/pool_list_consolidation.yaml | 2 +- src/tests/suite/daos_cr.c | 61 ++++- 24 files changed, 532 insertions(+), 398 deletions(-) diff --git a/src/chk/chk_common.c b/src/chk/chk_common.c index 460c37a50ff..adf5d068523 100644 --- a/src/chk/chk_common.c +++ b/src/chk/chk_common.c @@ -1073,8 +1073,7 @@ chk_policy_refresh(uint32_t policy_nr, struct chk_policy *policies, struct chk_p } int -chk_prop_prepare(d_rank_t leader, uint32_t flags, int phase, - uint32_t policy_nr, struct chk_policy *policies, +chk_prop_prepare(d_rank_t leader, uint32_t flags, uint32_t policy_nr, struct chk_policy *policies, d_rank_list_t *ranks, struct chk_property *prop) { int rc = 0; @@ -1086,11 +1085,8 @@ chk_prop_prepare(d_rank_t leader, uint32_t flags, int phase, prop->cp_flags &= ~CHK__CHECK_FLAG__CF_FAILOUT; if (flags & CHK__CHECK_FLAG__CF_NO_AUTO) prop->cp_flags &= ~CHK__CHECK_FLAG__CF_AUTO; - prop->cp_flags |= flags & ~(CHK__CHECK_FLAG__CF_RESET | - CHK__CHECK_FLAG__CF_ORPHAN_POOL | - CHK__CHECK_FLAG__CF_NO_FAILOUT | - CHK__CHECK_FLAG__CF_NO_AUTO); - prop->cp_phase = phase; + prop->cp_flags |= flags & ~(CHK__CHECK_FLAG__CF_RESET | CHK__CHECK_FLAG__CF_ORPHAN_POOL | + CHK__CHECK_FLAG__CF_NO_FAILOUT | CHK__CHECK_FLAG__CF_NO_AUTO); if (ranks != NULL) prop->cp_rank_nr = ranks->rl_nr; @@ -1240,12 +1236,7 @@ chk_ins_cleanup(struct chk_instance *ins) chk_stop_sched(ins); ins->ci_inited = 0; - chk_iv_ns_cleanup(&ins->ci_iv_ns); - - if (ins->ci_iv_group != NULL) { - crt_group_secondary_destroy(ins->ci_iv_group); - ins->ci_iv_group = NULL; - } + chk_iv_ns_destroy(ins); } int @@ -1260,7 +1251,8 @@ chk_ins_init(struct chk_instance **p_ins) if (ins == NULL) D_GOTO(out_init, rc = -DER_NOMEM); - ins->ci_sched = ABT_THREAD_NULL; + ins->ci_sched = ABT_THREAD_NULL; + ins->ci_dead_rank_ult = ABT_THREAD_NULL; ins->ci_rank_hdl = DAOS_HDL_INVAL; D_INIT_LIST_HEAD(&ins->ci_rank_list); @@ -1332,6 +1324,8 @@ chk_ins_fini(struct chk_instance **p_ins) D_ASSERT(d_list_empty(&ins->ci_interaction_filter_list)); D_ASSERT(d_list_empty(&ins->ci_pool_shutdown_list)); + D_ASSERT(ins->ci_dead_rank_ult == ABT_THREAD_NULL); + if (ins->ci_sched != ABT_THREAD_NULL) ABT_thread_free(&ins->ci_sched); diff --git a/src/chk/chk_engine.c b/src/chk/chk_engine.c index c301d55a184..3dfb7b9b705 100644 --- a/src/chk/chk_engine.c +++ b/src/chk/chk_engine.c @@ -225,8 +225,7 @@ chk_engine_exit(struct chk_instance *ins, uint32_t ins_phase, uint32_t ins_statu iv.ci_to_leader = 1; /* Notify the leader that check instance exit on the engine. */ - rc = chk_iv_update(ins->ci_iv_ns, &iv, CRT_IV_SHORTCUT_TO_ROOT, - CRT_IV_SYNC_NONE, true); + rc = chk_iv_update(ins, &iv, CRT_IV_SHORTCUT_TO_ROOT, CRT_IV_SYNC_NONE); D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, DF_ENGINE" on rank %u notify leader for its exit, status %u: rc = %d\n", DP_ENGINE(ins), dss_self_rank(), ins_status, rc); @@ -1708,8 +1707,7 @@ chk_engine_pool_notify(struct chk_pool_rec *cpr) * to all engines. Otherwise, the engine out of the pool map cannot get * the notification. */ - rc = chk_iv_update(ins->ci_iv_ns, &iv, CRT_IV_SHORTCUT_NONE, CRT_IV_SYNC_EAGER, - true); + rc = chk_iv_update(ins, &iv, CRT_IV_SHORTCUT_NONE, CRT_IV_SYNC_EAGER); D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, DF_ENGINE" on rank %u notify pool shards for "DF_UUIDF", phase %u, " "ins_status %u, pool_status %u: rc = %d\n", @@ -1721,8 +1719,7 @@ chk_engine_pool_notify(struct chk_pool_rec *cpr) iv.ci_from_psl = 0; iv.ci_to_leader = 1; /* Synchronously notify the check leader with the new check status/phase. */ - rc = chk_iv_update(ins->ci_iv_ns, &iv, CRT_IV_SHORTCUT_TO_ROOT, - CRT_IV_SYNC_NONE, true); + rc = chk_iv_update(ins, &iv, CRT_IV_SHORTCUT_TO_ROOT, CRT_IV_SYNC_NONE); D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, DF_ENGINE" on rank %u notify check leader for "DF_UUIDF", phase %u, " "ins_status %u, pool_status %u: rc = %d\n", @@ -2045,9 +2042,8 @@ chk_engine_sched(void *args) static int chk_engine_start_prep(struct chk_instance *ins, uint32_t rank_nr, d_rank_t *ranks, - uint32_t policy_nr, struct chk_policy *policies, int pool_nr, - uuid_t pools[], uint64_t gen, int phase, uint32_t api_flags, - d_rank_t leader, uint32_t flags) + uint32_t policy_nr, struct chk_policy *policies, int pool_nr, uuid_t pools[], + uint64_t gen, uint32_t api_flags, d_rank_t leader, uint32_t flags) { struct chk_traverse_pools_args ctpa = { 0 }; struct chk_bookmark *cbk = &ins->ci_bk; @@ -2134,8 +2130,7 @@ chk_engine_start_prep(struct chk_instance *ins, uint32_t rank_nr, d_rank_t *rank init: if (!chk_is_on_leader(gen, leader, true)) { - rc = chk_prop_prepare(leader, api_flags, phase, policy_nr, policies, rank_list, - prop); + rc = chk_prop_prepare(leader, api_flags, policy_nr, policies, rank_list, prop); if (rc != 0) goto out; @@ -2263,16 +2258,15 @@ chk_engine_pool_filter(uuid_t uuid, void *arg, int *phase) int chk_engine_start(uint64_t gen, uint32_t rank_nr, d_rank_t *ranks, uint32_t policy_nr, struct chk_policy *policies, int pool_nr, uuid_t pools[], uint32_t api_flags, - int phase, d_rank_t leader, uint32_t flags, uuid_t iv_uuid, + uint32_t ns_ver, d_rank_t leader, uint32_t flags, uuid_t iv_uuid, struct ds_pool_clues *clues) { - struct chk_instance *ins = chk_engine; - struct chk_bookmark *cbk = &ins->ci_bk; - struct umem_attr uma = { 0 }; - char uuid_str[DAOS_UUID_STR_SIZE]; - d_rank_t myrank = dss_self_rank(); - int rc; - int rc1; + struct chk_instance *ins = chk_engine; + struct chk_bookmark *cbk = &ins->ci_bk; + struct umem_attr uma = {0}; + d_rank_t myrank = dss_self_rank(); + int rc; + int rc1; rc = chk_ins_can_start(ins); if (rc != 0) @@ -2294,12 +2288,7 @@ chk_engine_start(uint64_t gen, uint32_t rank_nr, d_rank_t *ranks, uint32_t polic if (ins->ci_sched != ABT_THREAD_NULL) ABT_thread_free(&ins->ci_sched); - chk_iv_ns_cleanup(&ins->ci_iv_ns); - - if (ins->ci_iv_group != NULL) { - crt_group_secondary_destroy(ins->ci_iv_group); - ins->ci_iv_group = NULL; - } + chk_iv_ns_destroy(ins); uma.uma_id = UMEM_CLASS_VMEM; @@ -2313,27 +2302,20 @@ chk_engine_start(uint64_t gen, uint32_t rank_nr, d_rank_t *ranks, uint32_t polic if (rc != 0) goto out_tree; - rc = chk_engine_start_prep(ins, rank_nr, ranks, policy_nr, policies, - pool_nr, pools, gen, phase, api_flags, leader, flags); + rc = chk_engine_start_prep(ins, rank_nr, ranks, policy_nr, policies, pool_nr, pools, gen, + api_flags, leader, flags); if (rc != 0) goto out_tree; if (chk_is_on_leader(gen, leader, true)) { ins->ci_iv_ns = chk_leader_get_iv_ns(); - if (unlikely(ins->ci_iv_ns == NULL)) - goto out_tree; + D_ASSERT(ins->ci_iv_ns != NULL); + + ins->ci_ns_ver = ns_ver; } else { - uuid_unparse_lower(iv_uuid, uuid_str); - rc = crt_group_secondary_create(uuid_str, NULL, ins->ci_ranks, &ins->ci_iv_group); + rc = chk_iv_ns_create(ins, iv_uuid, leader, ns_ver); if (rc != 0) goto out_tree; - - rc = ds_iv_ns_create(dss_get_module_info()->dmi_ctx, iv_uuid, ins->ci_iv_group, - &ins->ci_iv_id, &ins->ci_iv_ns); - if (rc != 0) - goto out_group; - - ds_iv_ns_update(ins->ci_iv_ns, leader, ins->ci_iv_ns->iv_master_term + 1); } uuid_copy(cbk->cb_iv_uuid, iv_uuid); @@ -2367,12 +2349,7 @@ chk_engine_start(uint64_t gen, uint32_t rank_nr, d_rank_t *ranks, uint32_t polic D_WARN(DF_ENGINE" failed to update engine bookmark: "DF_RC"\n", DP_ENGINE(ins), DP_RC(rc1)); } - chk_iv_ns_cleanup(&ins->ci_iv_ns); -out_group: - if (ins->ci_iv_group != NULL) { - crt_group_secondary_destroy(ins->ci_iv_group); - ins->ci_iv_group = NULL; - } + chk_iv_ns_destroy(ins); out_tree: chk_destroy_pending_tree(ins); chk_destroy_pool_tree(ins); @@ -2380,17 +2357,18 @@ chk_engine_start(uint64_t gen, uint32_t rank_nr, d_rank_t *ranks, uint32_t polic ins->ci_starting = 0; out_log: if (rc >= 0) { - D_INFO(DF_ENGINE " %s on rank %u with api_flags %x, phase %d, leader %u, " - "flags %x, iv "DF_UUIDF": rc %d\n", + D_INFO(DF_ENGINE " %s on rank %u with api_flags %x, ns_ver %d, leader %u, " + "flags %x, iv " DF_UUIDF ": rc %d\n", DP_ENGINE(ins), chk_is_ins_reset(ins, api_flags) ? "start" : "resume", - myrank, api_flags, phase, leader, flags, DP_UUID(iv_uuid), rc); + myrank, api_flags, ns_ver, leader, flags, DP_UUID(iv_uuid), rc); chk_ranks_dump(ins->ci_ranks->rl_nr, ins->ci_ranks->rl_ranks); chk_pools_dump(&ins->ci_pool_list, pool_nr, pools); } else { - D_ERROR(DF_ENGINE" failed to start on rank %u with %d pools, api_flags %x, " - "phase %d, leader %u, flags %x, gen "DF_X64", iv "DF_UUIDF": "DF_RC"\n", - DP_ENGINE(ins), myrank, pool_nr, api_flags, phase, leader, flags, gen, + D_ERROR(DF_ENGINE " failed to start on rank %u with %d pools, api_flags %x, " + "ns_ver %d, leader %u, flags %x, gen " DF_X64 ", iv " DF_UUIDF + ": " DF_RC "\n", + DP_ENGINE(ins), myrank, pool_nr, api_flags, ns_ver, leader, flags, gen, DP_UUID(iv_uuid), DP_RC(rc)); } @@ -2416,7 +2394,7 @@ chk_engine_stop(uint64_t gen, int pool_nr, uuid_t pools[], uint32_t *flags) if (cbk->cb_magic != CHK_BK_MAGIC_ENGINE) D_GOTO(log, rc = -DER_NOTAPPLICABLE); - if (ins->ci_starting) + if (ins->ci_starting || ins->ci_rejoining) D_GOTO(log, rc = -DER_BUSY); if (ins->ci_stopping || ins->ci_sched_exiting) @@ -2647,34 +2625,46 @@ chk_engine_query(uint64_t gen, int pool_nr, uuid_t pools[], uint32_t *ins_status int chk_engine_mark_rank_dead(uint64_t gen, d_rank_t rank, uint32_t version) { - struct chk_instance *ins = chk_engine; - struct chk_property *prop = &ins->ci_prop; - struct chk_bookmark *cbk = &ins->ci_bk; - d_rank_list_t *rank_list = NULL; - int rc = 0; + struct chk_instance *ins = chk_engine; + struct chk_property *prop = &ins->ci_prop; + struct chk_bookmark *cbk = &ins->ci_bk; + int rc = 0; CHK_IS_READY(ins); if (cbk->cb_gen != gen) D_GOTO(out, rc = -DER_NOTAPPLICABLE); - rc = chk_prop_fetch(prop, &rank_list); - if (rc != 0) - goto out; + /* For check engine on the leader, reload rank list that has been refreshed by leader. */ + if (chk_is_on_leader(cbk->cb_gen, prop->cp_leader, true)) { + d_rank_list_free(ins->ci_ranks); + ins->ci_ranks = NULL; + } - D_ASSERT(rank_list != NULL); + if (ins->ci_ranks == NULL) { + rc = chk_prop_fetch(prop, &ins->ci_ranks); + if (rc != 0) + goto out; - /* For check engine on the leader, related rank has already been marked as "dead". */ - if (chk_is_on_leader(cbk->cb_gen, prop->cp_leader, true)) - goto group; + /* For check engine on the leader, it's done. */ + if (chk_is_on_leader(cbk->cb_gen, prop->cp_leader, true)) { + ins->ci_ns_ver = version; + goto out; + } + } + + if (unlikely(ins->ci_ranks == NULL)) + D_GOTO(out, rc = -DER_NOTAPPLICABLE); - if (!chk_remove_rank_from_list(rank_list, rank)) + if (!chk_remove_rank_from_list(ins->ci_ranks, rank)) D_GOTO(out, rc = -DER_NOTAPPLICABLE); prop->cp_rank_nr--; - rc = chk_prop_update(prop, rank_list); + rc = chk_prop_update(prop, ins->ci_ranks); if (rc != 0) - goto out; + ins->ci_skip_oog = 1; + else + rc = chk_iv_ns_update(ins, version); /* * NOTE: If the rank dead before DAOS check start, then subsequent check start will @@ -2695,19 +2685,7 @@ chk_engine_mark_rank_dead(uint64_t gen, d_rank_t rank, uint32_t version) * sometime later as the DAOS check going. */ -group: - if (ins->ci_iv_group != NULL) - rc = crt_group_secondary_modify(ins->ci_iv_group, rank_list, rank_list, - CRT_GROUP_MOD_OP_REPLACE, version); - out: - if (rc == 0) { - d_rank_list_free(ins->ci_ranks); - ins->ci_ranks = rank_list; - rank_list = NULL; - } - - d_rank_list_free(rank_list); if (rc != -DER_NOTAPPLICABLE) D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, DF_ENGINE" on rank %u mark rank %u as dead with gen " @@ -3383,19 +3361,19 @@ chk_engine_notify(struct chk_iv *iv) void chk_engine_rejoin(void *args) { - struct chk_instance *ins = chk_engine; - struct chk_property *prop = &ins->ci_prop; - struct chk_bookmark *cbk = &ins->ci_bk; - uuid_t *pools = NULL; - struct chk_iv iv = { 0 }; - struct umem_attr uma = { 0 }; - char uuid_str[DAOS_UUID_STR_SIZE]; - d_rank_t myrank = dss_self_rank(); - uint32_t pool_nr = 0; - uint32_t flags = 0; - int rc = 0; - int rc1; - bool need_join = false; + struct chk_instance *ins = chk_engine; + struct chk_property *prop = &ins->ci_prop; + struct chk_bookmark *cbk = &ins->ci_bk; + d_rank_list_t *ranks = NULL; + uuid_t *pools = NULL; + struct chk_iv iv = {0}; + struct umem_attr uma = {0}; + d_rank_t myrank = dss_self_rank(); + uint32_t pool_nr = 0; + uint32_t flags = 0; + int rc = 0; + int rc1; + bool need_join = false; if (cbk->cb_magic != CHK_BK_MAGIC_ENGINE) goto out_log; @@ -3404,7 +3382,7 @@ chk_engine_rejoin(void *args) cbk->cb_ins_status != CHK__CHECK_INST_STATUS__CIS_PAUSED) goto out_log; - /* We do NOT support leader (and its associated engine ) to rejoin former check instance. */ + /* We do NOT support leader (and its associated engine) to rejoin former check instance. */ if (chk_is_on_leader(cbk->cb_gen, prop->cp_leader, true)) goto out_log; @@ -3439,22 +3417,10 @@ chk_engine_rejoin(void *args) if (rc != 0) goto out_tree; - uuid_unparse_lower(cbk->cb_iv_uuid, uuid_str); - rc = crt_group_secondary_create(uuid_str, NULL, ins->ci_ranks, &ins->ci_iv_group); - if (rc != 0) - goto out_tree; - - rc = ds_iv_ns_create(dss_get_module_info()->dmi_ctx, cbk->cb_iv_uuid, ins->ci_iv_group, - &ins->ci_iv_id, &ins->ci_iv_ns); - if (rc != 0) - goto out_group; - - ds_iv_ns_update(ins->ci_iv_ns, prop->cp_leader, ins->ci_iv_ns->iv_master_term + 1); - again: /* Ask leader whether this engine can rejoin or not. */ rc = chk_rejoin_remote(prop->cp_leader, cbk->cb_gen, myrank, cbk->cb_iv_uuid, &flags, - &pool_nr, &pools); + &ins->ci_ns_ver, &pool_nr, &pools, &ranks); if (rc != 0) { if ((rc == -DER_OOG || rc == -DER_GRPVER) && !ins->ci_pause) { D_INFO(DF_ENGINE" Someone is not ready %d, let's rejoin after 1 sec\n", @@ -3464,14 +3430,22 @@ chk_engine_rejoin(void *args) goto again; } - goto out_iv; + goto out_tree; } - if (pool_nr == 0) { + if (ranks == NULL || pool_nr == 0) { need_join = false; - D_GOTO(out_iv, rc = 1); + D_GOTO(out_tree, rc = 1); } + d_rank_list_free(ins->ci_ranks); + ins->ci_ranks = ranks; + ranks = NULL; + + rc = chk_iv_ns_create(ins, cbk->cb_iv_uuid, prop->cp_leader, ins->ci_ns_ver); + if (rc != 0) + goto out_tree; + rc = chk_pools_load_list(ins, cbk->cb_gen, 0, pool_nr, pools, NULL); if (rc != 0) goto out_notify; @@ -3511,21 +3485,17 @@ chk_engine_rejoin(void *args) iv.ci_to_leader = 1; /* Notify the leader that check instance exit on the engine. */ - rc1 = chk_iv_update(ins->ci_iv_ns, &iv, CRT_IV_SHORTCUT_TO_ROOT, CRT_IV_SYNC_NONE, true); + rc1 = chk_iv_update(ins, &iv, CRT_IV_SHORTCUT_TO_ROOT, CRT_IV_SYNC_NONE); D_CDEBUG(rc1 != 0, DLOG_ERR, DLOG_INFO, DF_ENGINE" on rank %u notify leader for its exit, status %u: rc1 = %d\n", DP_ENGINE(ins), myrank, cbk->cb_ins_status, rc1); -out_iv: - chk_iv_ns_cleanup(&ins->ci_iv_ns); -out_group: - if (ins->ci_iv_group != NULL) { - crt_group_secondary_destroy(ins->ci_iv_group); - ins->ci_iv_group = NULL; - } + chk_iv_ns_destroy(ins); out_tree: chk_destroy_pending_tree(ins); chk_destroy_pool_tree(ins); out_log: + d_rank_list_free(ranks); + D_FREE(pools); if (need_join) D_CDEBUG(rc < 0, DLOG_ERR, DLOG_INFO, DF_ENGINE" rejoin on rank %u with iv "DF_UUIDF": "DF_RC"\n", @@ -3549,6 +3519,8 @@ chk_engine_setup(void) * related local inconsistency firstly. */ + chk_report_seq_init(ins); + rc = chk_bk_fetch_engine(cbk); if (rc == -DER_NONEXIST) goto prop; @@ -3593,15 +3565,14 @@ chk_engine_setup(void) rc = chk_prop_fetch(&ins->ci_prop, &ins->ci_ranks); if (rc == -DER_NONEXIST) rc = 0; -fini: - if (rc != 0) { - chk_ins_fini(&ins); - } else { - chk_report_seq_init(ins); + if (rc == 0) { ins->ci_inited = 1; ins->ci_pause = 0; } +fini: + if (rc != 0) + chk_engine_cleanup(); return rc; } diff --git a/src/chk/chk_internal.h b/src/chk/chk_internal.h index 6c1d5508260..e4d6d52f3fd 100644 --- a/src/chk/chk_internal.h +++ b/src/chk/chk_internal.h @@ -76,6 +76,7 @@ struct chk_pool_mbs { uint32_t *cpm_tgt_status; }; +/* clang-format off */ /* * CHK_START: * From check leader to check engine to start the check instance on specified pool(s) or all pools. @@ -83,7 +84,7 @@ struct chk_pool_mbs { #define DAOS_ISEQ_CHK_START \ ((uint64_t) (csi_gen) CRT_VAR) \ ((uint32_t) (csi_flags) CRT_VAR) \ - ((int32_t) (csi_phase) CRT_VAR) \ + ((int32_t) (csi_ns_ver) CRT_VAR) \ ((d_rank_t) (csi_leader_rank) CRT_VAR) \ ((uint32_t) (csi_api_flags) CRT_VAR) \ ((uuid_t) (csi_iv_uuid) CRT_VAR) \ @@ -272,11 +273,13 @@ CRT_RPC_DECLARE(chk_report, DAOS_ISEQ_CHK_REPORT, DAOS_OSEQ_CHK_REPORT); #define DAOS_OSEQ_CHK_REJOIN \ ((int32_t) (cro_status) CRT_VAR) \ ((uint32_t) (cro_flags) CRT_VAR) \ + ((uint32_t) (cro_ns_ver) CRT_VAR) \ + ((uint32_t) (cro_padding) CRT_VAR) \ + ((d_rank_t) (cro_ranks) CRT_ARRAY) \ ((uuid_t) (cro_pools) CRT_ARRAY) CRT_RPC_DECLARE(chk_rejoin, DAOS_ISEQ_CHK_REJOIN, DAOS_OSEQ_CHK_REJOIN); -/* clang-format off */ /* * CHK_SET_POLICY: * From check leader to check engine to set policy during check instance running. @@ -501,16 +504,12 @@ struct chk_bookmark { * 'reset' for all pools. */ struct chk_property { - d_rank_t cp_leader; - Chk__CheckFlag cp_flags; - Chk__CheckInconsistAction cp_policies[CHK_POLICY_MAX]; - /* - * NOTE: Preserve for supporting to continue the check until the specified phase in the - * future. -1 means to check all phases. - */ - int32_t cp_phase; + d_rank_t cp_leader; + Chk__CheckFlag cp_flags; + Chk__CheckInconsistAction cp_policies[CHK_POLICY_MAX]; + uint32_t cp_padding; /* How many ranks (ever or should) take part in the check instance. */ - uint32_t cp_rank_nr; + uint32_t cp_rank_nr; }; /* @@ -555,6 +554,7 @@ struct chk_instance { d_list_t ci_dead_ranks; ABT_thread ci_sched; + ABT_thread ci_dead_rank_ult; ABT_rwlock ci_abt_lock; ABT_mutex ci_abt_mutex; ABT_cond ci_abt_cond; @@ -562,20 +562,12 @@ struct chk_instance { /* Generator for report event, pending repair actions, and so on. */ uint64_t ci_seq; - uint32_t ci_is_leader:1, - ci_sched_running:1, - ci_sched_exiting:1, - ci_for_orphan:1, - ci_orphan_done:1, /* leader has processed orphan pools. */ - ci_pool_stopped:1, /* check on some pools have been stopped. */ - ci_starting:1, - ci_stopping:1, - ci_started:1, - ci_inited:1, - ci_pause:1, - ci_rejoining:1, - ci_implicated:1; - uint32_t ci_start_flags; + uint32_t ci_is_leader : 1, ci_sched_running : 1, ci_sched_exiting : 1, ci_for_orphan : 1, + ci_orphan_done : 1, ci_pool_stopped : 1, /* check on some pools have been stopped. */ + ci_starting : 1, ci_stopping : 1, ci_started : 1, ci_inited : 1, ci_pause : 1, + ci_skip_oog : 1, ci_rejoining : 1, ci_implicated : 1; + uint32_t ci_start_flags; + uint32_t ci_ns_ver; }; struct chk_iv { @@ -762,9 +754,8 @@ void chk_pending_destroy(struct chk_pending_rec *cpr); int chk_policy_refresh(uint32_t policy_nr, struct chk_policy *policies, struct chk_property *prop); -int chk_prop_prepare(d_rank_t leader, uint32_t flags, int phase, - uint32_t policy_nr, struct chk_policy *policies, - d_rank_list_t *ranks, struct chk_property *prop); +int chk_prop_prepare(d_rank_t leader, uint32_t flags, uint32_t policy_nr, + struct chk_policy *policies, d_rank_list_t *ranks, struct chk_property *prop); uint32_t chk_pool_merge_status(uint32_t status_a, uint32_t status_b); @@ -781,7 +772,7 @@ void chk_ins_fini(struct chk_instance **p_ins); int chk_engine_start(uint64_t gen, uint32_t rank_nr, d_rank_t *ranks, uint32_t policy_nr, struct chk_policy *policies, int pool_nr, - uuid_t pools[], uint32_t api_flags, int phase, d_rank_t leader, + uuid_t pools[], uint32_t api_flags, uint32_t ns_ver, d_rank_t leader, uint32_t flags, uuid_t iv_uuid, struct ds_pool_clues *clues); int chk_engine_stop(uint64_t gen, int pool_nr, uuid_t pools[], uint32_t *flags); @@ -818,7 +809,14 @@ void chk_engine_fini(void); /* chk_iv.c */ -int chk_iv_update(void *ns, struct chk_iv *iv, uint32_t shortcut, uint32_t sync_mode, bool retry); +void chk_iv_ns_destroy(struct chk_instance *ins); + +int chk_iv_ns_create(struct chk_instance *ins, uuid_t uuid, d_rank_t leader, uint32_t ns_ver); + +int chk_iv_ns_update(struct chk_instance *ins, uint32_t ns_ver); + +int chk_iv_update(struct chk_instance *ins, struct chk_iv *iv, uint32_t shortcut, + uint32_t sync_mode); int chk_iv_init(void); @@ -834,8 +832,8 @@ int chk_leader_report(struct chk_report_unit *cru, uint64_t *seq, int *decision) int chk_leader_notify(struct chk_iv *iv); -int chk_leader_rejoin(uint64_t gen, d_rank_t rank, uuid_t iv_uuid, uint32_t *flags, int *pool_nr, - uuid_t **pools); +int chk_leader_rejoin(uint64_t gen, d_rank_t rank, uuid_t iv_uuid, uint32_t *flags, + uint32_t *ns_ver, int *pool_nr, uuid_t **pools, d_rank_list_t **ranks); int chk_leader_setup(void); @@ -849,8 +847,8 @@ void chk_leader_fini(void); int chk_start_remote(d_rank_list_t *rank_list, uint64_t gen, uint32_t rank_nr, d_rank_t *ranks, uint32_t policy_nr, struct chk_policy *policies, int pool_nr, - uuid_t pools[], uint32_t api_flags, int phase, d_rank_t leader, uint32_t flags, - uuid_t iv_uuid, chk_co_rpc_cb_t start_cb, void *args); + uuid_t pools[], uint32_t api_flags, uint32_t ns_ver, d_rank_t leader, + uint32_t flags, uuid_t iv_uuid, chk_co_rpc_cb_t start_cb, void *args); int chk_stop_remote(d_rank_list_t *rank_list, uint64_t gen, int pool_nr, uuid_t pools[], chk_co_rpc_cb_t stop_cb, void *args); @@ -879,7 +877,7 @@ int chk_report_remote(d_rank_t leader, uint64_t gen, uint32_t cla, uint32_t act, uint32_t detail_nr, d_sg_list_t *details, uint64_t seq); int chk_rejoin_remote(d_rank_t leader, uint64_t gen, d_rank_t rank, uuid_t iv_uuid, uint32_t *flags, - uint32_t *pool_nr, uuid_t **pools); + uint32_t *ns_ver, uint32_t *pool_nr, uuid_t **pools, d_rank_list_t **ranks); int chk_set_policy_remote(d_rank_list_t *rank_list, uint64_t gen, uint32_t policy_nr, struct chk_policy *policies); @@ -946,41 +944,24 @@ chk_ins_set_fail(struct chk_instance *ins, uint32_t phase) static inline bool chk_rank_in_list(d_rank_list_t *rlist, d_rank_t rank) { - int i; - bool found = false; - - /* TBD: more efficiently search for the sorted ranks list. */ - - for (i = 0; i < rlist->rl_nr; i++) { - if (rlist->rl_ranks[i] == rank) { - found = true; - break; - } - } - - return found; + return d_rank_list_bsearch(rlist, rank, NULL); } static inline bool chk_remove_rank_from_list(d_rank_list_t *rlist, d_rank_t rank) { - int i; - bool found = false; - - /* TBD: more efficiently search for the sorted ranks list. */ - - for (i = 0; i < rlist->rl_nr; i++) { - if (rlist->rl_ranks[i] == rank) { - found = true; - rlist->rl_nr--; - /* The leader rank will always be in the rank list. */ - D_ASSERT(rlist->rl_nr > 0); - - if (i < rlist->rl_nr) - memmove(&rlist->rl_ranks[i], &rlist->rl_ranks[i + 1], - sizeof(rlist->rl_ranks[i]) * (rlist->rl_nr - i)); - break; - } + int idx = -1; + bool found = false; + + if (d_rank_list_bsearch(rlist, rank, &idx)) { + D_ASSERT(rlist->rl_nr > 0); + D_ASSERT(idx >= 0); + + rlist->rl_nr--; + if (idx < rlist->rl_nr) + memmove(&rlist->rl_ranks[idx], &rlist->rl_ranks[idx + 1], + sizeof(rlist->rl_ranks[idx]) * (rlist->rl_nr - idx)); + found = true; } return found; @@ -1032,17 +1013,6 @@ chk_query_free(struct chk_query_pool_shard *shards, uint32_t shard_nr) } } -static inline void -chk_iv_ns_cleanup(struct ds_iv_ns **ns) -{ - if (*ns != NULL) { - if ((*ns)->iv_refcount == 1) - ds_iv_ns_cleanup(*ns); - ds_iv_ns_put(*ns); - *ns = NULL; - } -} - static inline void chk_pool_get(struct chk_pool_rec *cpr) { @@ -1249,7 +1219,7 @@ chk_ins_can_start(struct chk_instance *ins) if (ins->ci_starting) return -DER_INPROGRESS; - if (ins->ci_stopping || ins->ci_sched_exiting) + if (ins->ci_stopping || ins->ci_sched_exiting || ins->ci_rejoining) return -DER_BUSY; if (ins->ci_sched_running) diff --git a/src/chk/chk_iv.c b/src/chk/chk_iv.c index 299c1554856..712fd474c21 100644 --- a/src/chk/chk_iv.c +++ b/src/chk/chk_iv.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2022-2024 Intel Corporation. + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -181,13 +182,76 @@ struct ds_iv_class_ops chk_iv_ops = { .ivc_value_alloc = chk_iv_value_alloc, }; +void +chk_iv_ns_destroy(struct chk_instance *ins) +{ + if (ins->ci_iv_ns != NULL) { + if (ins->ci_iv_ns->iv_refcount == 1) + ds_iv_ns_cleanup(ins->ci_iv_ns); + ds_iv_ns_put(ins->ci_iv_ns); + ins->ci_iv_ns = NULL; + } + + if (ins->ci_iv_group != NULL) { + crt_group_secondary_destroy(ins->ci_iv_group); + ins->ci_iv_group = NULL; + } +} + int -chk_iv_update(void *ns, struct chk_iv *iv, uint32_t shortcut, uint32_t sync_mode, bool retry) +chk_iv_ns_create(struct chk_instance *ins, uuid_t uuid, d_rank_t leader, uint32_t ns_ver) { - d_sg_list_t sgl; - d_iov_t iov; - struct ds_iv_key key; - int rc; + char uuid_str[DAOS_UUID_STR_SIZE]; + int rc; + + uuid_unparse_lower(uuid, uuid_str); + rc = crt_group_secondary_create(uuid_str, NULL, NULL, &ins->ci_iv_group); + if (rc != 0) + goto out; + + rc = ds_iv_ns_create(dss_get_module_info()->dmi_ctx, uuid, ins->ci_iv_group, &ins->ci_iv_id, + &ins->ci_iv_ns); + if (rc != 0) + goto out; + + rc = chk_iv_ns_update(ins, ns_ver); + if (rc == 0) { + ds_iv_ns_update(ins->ci_iv_ns, leader, ins->ci_iv_ns->iv_master_term + 1); + ins->ci_skip_oog = 0; + } + +out: + if (rc != 0) + chk_iv_ns_destroy(ins); + return rc; +} + +int +chk_iv_ns_update(struct chk_instance *ins, uint32_t ns_ver) +{ + int rc; + + /* Let secondary rank == primary rank. */ + rc = crt_group_secondary_modify(ins->ci_iv_group, ins->ci_ranks, ins->ci_ranks, + CRT_GROUP_MOD_OP_REPLACE, ns_ver); + if (rc == 0) + ins->ci_ns_ver = ns_ver; + else + ins->ci_skip_oog = 1; + + return rc; +} + +int +chk_iv_update(struct chk_instance *ins, struct chk_iv *iv, uint32_t shortcut, uint32_t sync_mode) +{ + d_sg_list_t sgl; + d_iov_t iov; + struct ds_iv_key key; + uint32_t ver; + int try_cnt = 0; + int wait_cnt = 0; + int rc; iv->ci_rank = dss_self_rank(); iv->ci_seq = d_hlc_get(); @@ -208,9 +272,35 @@ chk_iv_update(void *ns, struct chk_iv *iv, uint32_t shortcut, uint32_t sync_mode memset(&key, 0, sizeof(key)); key.class_id = IV_CHK; - rc = ds_iv_update(ns, &key, &sgl, shortcut, sync_mode, 0, retry); + +again: + try_cnt++; + ver = ins->ci_ns_ver; + rc = ds_iv_update(ins->ci_iv_ns, &key, &sgl, shortcut, sync_mode, 0, true); + if (likely(rc != -DER_OOG)) + goto out; + + if (try_cnt % 10 == 0) + D_WARN("CHK iv " DF_X64 "/" DF_X64 " retry because of -DER_OOG for more " + "than %d times.\n", + iv->ci_gen, iv->ci_seq, try_cnt); + + /* Wait chk_deak_rank_ult to sync the IV namespace. */ + while (ver == ins->ci_ns_ver && ins->ci_skip_oog == 0 && ins->ci_pause == 0) { + dss_sleep(500); + if (++wait_cnt % 40 == 0) + D_WARN("CHK iv " DF_X64 "/" DF_X64 " is blocked because of DER_OOG " + "for %d seconds.\n", + iv->ci_gen, iv->ci_seq, wait_cnt / 2); + } + + if (ins->ci_pause || ins->ci_skip_oog) + goto out; + + goto again; } +out: D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, "CHK iv "DF_X64"/"DF_X64" on rank %u, phase %u, ins_status %u, " "pool_status %u, to_leader %s, from_psl %s: rc = %d\n", diff --git a/src/chk/chk_leader.c b/src/chk/chk_leader.c index 3f9d54b0d25..3c381346663 100644 --- a/src/chk/chk_leader.c +++ b/src/chk/chk_leader.c @@ -226,10 +226,9 @@ static void chk_leader_exit(struct chk_instance *ins, uint32_t ins_phase, uint32_t ins_status, uint32_t pool_status, bool bcast) { - struct chk_dead_rank *cdr; - struct chk_bookmark *cbk = &ins->ci_bk; - struct chk_iv iv = {0}; - int rc = 0; + struct chk_bookmark *cbk = &ins->ci_bk; + struct chk_iv iv = {0}; + int rc = 0; ins->ci_sched_exiting = 1; @@ -244,8 +243,7 @@ chk_leader_exit(struct chk_instance *ins, uint32_t ins_phase, uint32_t ins_statu iv.ci_ins_status = ins_status; /* Synchronously notify the engines that the check leader exit. */ - rc = chk_iv_update(ins->ci_iv_ns, &iv, CRT_IV_SHORTCUT_NONE, - CRT_IV_SYNC_EAGER, true); + rc = chk_iv_update(ins, &iv, CRT_IV_SHORTCUT_NONE, CRT_IV_SYNC_EAGER); D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, DF_LEADER" notify the engines its exit, status %u: rc = %d\n", DP_LEADER(ins), ins_status, rc); @@ -264,10 +262,6 @@ chk_leader_exit(struct chk_instance *ins, uint32_t ins_phase, uint32_t ins_statu DP_LEADER(ins), ins_status, DP_RC(rc)); } - while ((cdr = d_list_pop_entry(&ins->ci_dead_ranks, struct chk_dead_rank, cdr_link)) != - NULL) - D_FREE(cdr); - ins->ci_sched_exiting = 0; } @@ -316,8 +310,7 @@ chk_leader_post_repair(struct chk_instance *ins, struct chk_pool_rec *cpr, iv.ci_phase = cbk->cb_phase; iv.ci_pool_status = cbk->cb_pool_status; - rc = chk_iv_update(ins->ci_iv_ns, &iv, CRT_IV_SHORTCUT_NONE, CRT_IV_SYNC_EAGER, - true); + rc = chk_iv_update(ins, &iv, CRT_IV_SHORTCUT_NONE, CRT_IV_SYNC_EAGER); DL_CDEBUG(rc != 0, DLOG_WARN, DLOG_INFO, rc, DF_LEADER " notify engines that check pool " DF_UUIDF " done, status %u", DP_LEADER(ins), DP_UUID(cpr->cpr_uuid), iv.ci_pool_status); @@ -2102,8 +2095,7 @@ chk_leader_pool_ult(void *arg) uuid_copy(iv.ci_uuid, cpr->cpr_uuid); iv.ci_phase = cbk->cb_phase; - rc = chk_iv_update(ins->ci_iv_ns, &iv, CRT_IV_SHORTCUT_NONE, - CRT_IV_SYNC_EAGER, true); + rc = chk_iv_update(ins, &iv, CRT_IV_SHORTCUT_NONE, CRT_IV_SYNC_EAGER); D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, DF_LEADER" notify engines to exit check for pool "DF_UUIDF" failure: %d\n", DP_LEADER(ins), DP_UUID(cpr->cpr_uuid), rc); @@ -2128,7 +2120,7 @@ chk_leader_mark_rank_dead(struct chk_instance *ins, struct chk_dead_rank *cdr) struct chk_pool_shard *tmp; struct chk_property *prop = &ins->ci_prop; struct chk_bookmark *cbk = &ins->ci_bk; - uint32_t version = cbk->cb_gen - prop->cp_rank_nr - 1; + uint32_t version = ins->ci_ns_ver + 1; int rc = 0; if (!chk_remove_rank_from_list(ins->ci_ranks, cdr->cdr_rank)) @@ -2136,11 +2128,12 @@ chk_leader_mark_rank_dead(struct chk_instance *ins, struct chk_dead_rank *cdr) prop->cp_rank_nr--; rc = chk_prop_update(prop, ins->ci_ranks); - if (rc != 0) + if (rc != 0) { + ins->ci_skip_oog = 1; goto out; + } - rc = crt_group_secondary_modify(ins->ci_iv_group, ins->ci_ranks, ins->ci_ranks, - CRT_GROUP_MOD_OP_REPLACE, version); + rc = chk_iv_ns_update(ins, version); if (rc != 0) goto out; @@ -2198,7 +2191,6 @@ chk_leader_sched(void *args) { struct chk_instance *ins = args; struct chk_bookmark *cbk = &ins->ci_bk; - struct chk_dead_rank *cdr; struct chk_pending_rec *pending; struct chk_iv iv = {0}; uint32_t ins_phase; @@ -2208,7 +2200,6 @@ chk_leader_sched(void *args) int done = 0; int rc = 0; bool bcast = false; - bool more_dead; D_INFO(DF_LEADER" scheduler enter at phase %u\n", DP_LEADER(ins), cbk->cb_phase); @@ -2239,29 +2230,9 @@ chk_leader_sched(void *args) while (1) { dss_sleep(300); -check_dead: - ABT_mutex_lock(ins->ci_abt_mutex); - if (!d_list_empty(&ins->ci_dead_ranks)) { - cdr = d_list_pop_entry(&ins->ci_dead_ranks, struct chk_dead_rank, cdr_link); - if (!d_list_empty(&ins->ci_dead_ranks)) - more_dead = true; - else - more_dead = false; - } else { - cdr = NULL; - more_dead = false; - } - ABT_mutex_unlock(ins->ci_abt_mutex); - - if (cdr != NULL) - chk_leader_mark_rank_dead(ins, cdr); - if (chk_leader_need_stop(ins, &rc)) D_GOTO(out, bcast = (rc > 0 ? true : false)); - if (more_dead) - goto check_dead; - if (!d_list_empty(&ins->ci_interaction_filter_list)) { pending = d_list_pop_entry(&ins->ci_interaction_filter_list, struct chk_pending_rec, cpr_ins_link); @@ -2289,8 +2260,7 @@ chk_leader_sched(void *args) iv.ci_ins_status = CHK__CHECK_INST_STATUS__CIS_RUNNING; /* Synchronously notify engines that orphan pools have been processed. */ - rc = chk_iv_update(ins->ci_iv_ns, &iv, CRT_IV_SHORTCUT_NONE, - CRT_IV_SYNC_EAGER, true); + rc = chk_iv_update(ins, &iv, CRT_IV_SHORTCUT_NONE, CRT_IV_SYNC_EAGER); D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, DF_LEADER" notify engines that orphan pools have been process: %d\n", DP_LEADER(ins), rc); @@ -2461,8 +2431,8 @@ chk_leader_ranks_prepare(struct chk_instance *ins, uint32_t rank_nr, d_rank_t *r static int chk_leader_start_prep(struct chk_instance *ins, uint32_t rank_nr, d_rank_t *ranks, - uint32_t policy_nr, struct chk_policy *policies, int pool_nr, - uuid_t pools[], int phase, d_rank_t leader, uint32_t flags) + uint32_t policy_nr, struct chk_policy *policies, int pool_nr, uuid_t pools[], + d_rank_t leader, uint32_t flags) { struct chk_property *prop = &ins->ci_prop; struct chk_bookmark *cbk = &ins->ci_bk; @@ -2552,7 +2522,7 @@ chk_leader_start_prep(struct chk_instance *ins, uint32_t rank_nr, d_rank_t *rank cbk->cb_version = chk_ver; init: - rc = chk_prop_prepare(leader, flags, phase, policy_nr, policies, rank_list, prop); + rc = chk_prop_prepare(leader, flags, policy_nr, policies, rank_list, prop); if (rc != 0) goto out; @@ -2698,8 +2668,7 @@ chk_leader_start_post(struct chk_instance *ins) * to notify the engine for the check done, that is not fatal. That * can be redo in next check instance. */ - rc = chk_iv_update(ins->ci_iv_ns, &iv, CRT_IV_SHORTCUT_NONE, - CRT_IV_SYNC_EAGER, true); + rc = chk_iv_update(ins, &iv, CRT_IV_SHORTCUT_NONE, CRT_IV_SYNC_EAGER); D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, DF_LEADER" notify engines the pool "DF_UUIDF" is checked: %d\n", DP_LEADER(ins), DP_UUID(cpr->cpr_uuid), rc); @@ -2879,20 +2848,20 @@ chk_leader_start_cb(struct chk_co_rpc_cb_args *cb_args) int chk_leader_start(uint32_t rank_nr, d_rank_t *ranks, uint32_t policy_nr, struct chk_policy *policies, - int pool_nr, uuid_t pools[], uint32_t api_flags, int phase) + int pool_nr, uuid_t pools[], uint32_t api_flags) { - struct chk_instance *ins = chk_leader; - struct chk_bookmark *cbk = &ins->ci_bk; - uuid_t *c_pools = NULL; - struct umem_attr uma = { 0 }; - uuid_t dummy_pool = { 0 }; - char uuid_str[DAOS_UUID_STR_SIZE]; - uint64_t old_gen = cbk->cb_gen; - d_rank_t myrank = dss_self_rank(); - uint32_t flags = api_flags; - int c_pool_nr = 0; - int rc; - int rc1; + struct chk_instance *ins = chk_leader; + struct chk_bookmark *cbk = &ins->ci_bk; + uuid_t *c_pools = NULL; + struct umem_attr uma = {0}; + uuid_t dummy_pool = {0}; + uint64_t old_gen = cbk->cb_gen; + d_rank_t myrank = dss_self_rank(); + uint32_t flags = api_flags; + uint32_t ns_ver = (uint32_t)daos_wallclock_secs(); + int c_pool_nr = 0; + int rc; + int rc1; rc = chk_ins_can_start(ins); if (rc != 0) @@ -2917,13 +2886,7 @@ chk_leader_start(uint32_t rank_nr, d_rank_t *ranks, uint32_t policy_nr, struct c if (ins->ci_sched != ABT_THREAD_NULL) ABT_thread_free(&ins->ci_sched); - chk_iv_ns_cleanup(&ins->ci_iv_ns); - - if (ins->ci_iv_group != NULL) { - crt_group_secondary_destroy(ins->ci_iv_group); - ins->ci_iv_group = NULL; - } - + chk_iv_ns_destroy(ins); uma.uma_id = UMEM_CLASS_VMEM; rc = dbtree_create_inplace(DBTREE_CLASS_CHK_RANK, 0, CHK_BTREE_ORDER, &uma, @@ -2942,8 +2905,8 @@ chk_leader_start(uint32_t rank_nr, d_rank_t *ranks, uint32_t policy_nr, struct c goto out_tree; reset: - rc = chk_leader_start_prep(ins, rank_nr, ranks, policy_nr, policies, pool_nr, pools, - phase, myrank, flags); + rc = chk_leader_start_prep(ins, rank_nr, ranks, policy_nr, policies, pool_nr, pools, myrank, + flags); if (rc == 1 && !(flags & CHK__CHECK_FLAG__CF_RESET)) { /* Former check instance has done, let's re-start from the beginning. */ flags |= CHK__CHECK_FLAG__CF_RESET; @@ -2957,18 +2920,10 @@ chk_leader_start(uint32_t rank_nr, d_rank_t *ranks, uint32_t policy_nr, struct c goto remote; uuid_generate(dummy_pool); - uuid_unparse_lower(dummy_pool, uuid_str); - rc = crt_group_secondary_create(uuid_str, NULL, ins->ci_ranks, &ins->ci_iv_group); + rc = chk_iv_ns_create(ins, dummy_pool, myrank, ns_ver); if (rc != 0) goto out_tree; - rc = ds_iv_ns_create(dss_get_module_info()->dmi_ctx, dummy_pool, ins->ci_iv_group, - &ins->ci_iv_id, &ins->ci_iv_ns); - if (rc != 0) - goto out_group; - - ds_iv_ns_update(ins->ci_iv_ns, myrank, ins->ci_iv_ns->iv_master_term + 1); - if (d_list_empty(&ins->ci_pool_list)) { c_pool_nr = pool_nr; c_pools = pools; @@ -2980,7 +2935,7 @@ chk_leader_start(uint32_t rank_nr, d_rank_t *ranks, uint32_t policy_nr, struct c remote: rc = chk_start_remote(ins->ci_ranks, cbk->cb_gen, rank_nr, ranks, policy_nr, policies, - c_pool_nr, c_pools, flags, phase, myrank, ins->ci_start_flags, + c_pool_nr, c_pools, flags, ns_ver, myrank, ins->ci_start_flags, dummy_pool, chk_leader_start_cb, ins); if (rc != 0) { if (rc == -DER_OOG || rc == -DER_GRPVER || rc == -DER_AGAIN) { @@ -3023,10 +2978,9 @@ chk_leader_start(uint32_t rank_nr, d_rank_t *ranks, uint32_t policy_nr, struct c goto out_stop_pools; } - D_INFO("Leader %s check with api_flags %x, phase %d, leader %u, flags %x, gen " DF_X64 - " iv "DF_UUIDF": rc %d\n", - chk_is_ins_reset(ins, flags) ? "start" : "resume", api_flags, phase, myrank, - ins->ci_start_flags, cbk->cb_gen, DP_UUID(dummy_pool), rc); + D_INFO("Leader %s with api_flags %x, leader %u, flags %x, gen " DF_X64 " iv " DF_UUIDF "\n", + chk_is_ins_reset(ins, flags) ? "start" : "resume", api_flags, myrank, + ins->ci_start_flags, cbk->cb_gen, DP_UUID(dummy_pool)); chk_ranks_dump(ins->ci_ranks->rl_nr, ins->ci_ranks->rl_ranks); chk_pools_dump(&ins->ci_pool_list, c_pool_nr > 0 ? c_pool_nr : pool_nr, @@ -3049,8 +3003,6 @@ chk_leader_start(uint32_t rank_nr, d_rank_t *ranks, uint32_t policy_nr, struct c D_WARN(DF_LEADER" failed to rollback failed check start: "DF_RC"\n", DP_LEADER(ins), DP_RC(rc1)); out_iv: - chk_iv_ns_cleanup(&ins->ci_iv_ns); -out_group: if (cbk->cb_ins_status == CHK__CHECK_INST_STATUS__CIS_RUNNING || cbk->cb_gen != old_gen) { cbk->cb_gen = old_gen; if (cbk->cb_ins_status == CHK__CHECK_INST_STATUS__CIS_RUNNING) { @@ -3062,17 +3014,16 @@ chk_leader_start(uint32_t rank_nr, d_rank_t *ranks, uint32_t policy_nr, struct c D_WARN(DF_LEADER" failed to update leader bookmark: "DF_RC"\n", DP_LEADER(ins), DP_RC(rc1)); } - crt_group_secondary_destroy(ins->ci_iv_group); - ins->ci_iv_group = NULL; + chk_iv_ns_destroy(ins); out_tree: chk_leader_destroy_trees(ins); ins->ci_starting = 0; out_log: - D_CDEBUG(likely(rc < 0), DLOG_ERR, DLOG_INFO, - "Leader %s to start check on %u ranks for %d pools with " - "api_flags %x, phase %d, leader %u, gen "DF_X64": rc = %d\n", - rc < 0 ? "failed" : "try", rank_nr, pool_nr, api_flags, phase, - myrank, cbk->cb_gen, rc); + DL_CDEBUG(likely(rc < 0), DLOG_ERR, DLOG_INFO, rc, + "Leader %s to start check on %u ranks for %d pools with api_flags %x, ns_ver %d, " + "leader %u, gen " DF_X64, + rc < 0 ? "failed" : "try", rank_nr, pool_nr, api_flags, ns_ver, myrank, + cbk->cb_gen); if (unlikely(rc > 0)) rc = 0; @@ -3328,6 +3279,9 @@ chk_leader_query(int pool_nr, uuid_t pools[], chk_query_head_cb_t head_cb, uint32_t idx = 0; uint32_t status; uint32_t phase; + uint32_t ver; + int try_cnt = 0; + int wait_cnt = 0; int rc; int i; bool skip; @@ -3359,22 +3313,38 @@ chk_leader_query(int pool_nr, uuid_t pools[], chk_query_head_cb_t head_cb, D_GOTO(out, rc = -DER_NOMEM); again: - rc = chk_query_remote(ins->ci_ranks, gen, pool_nr, pools, chk_leader_query_cb, cqa); + try_cnt++; + ver = ins->ci_ns_ver; + rc = chk_query_remote(ins->ci_ranks, gen, pool_nr, pools, chk_leader_query_cb, cqa); if (rc != 0) { - if (rc == -DER_OOG || rc == -DER_GRPVER || rc == -DER_AGAIN) { - D_INFO(DF_LEADER" Someone is not ready %d, let's retry query after 1 sec\n", - DP_LEADER(ins), rc); - if (!d_list_empty(&cqa->cqa_list)) { - chk_cqa_free(cqa); - cqa = chk_cqa_alloc(ins); - if (cqa == NULL) - D_GOTO(out, rc = -DER_NOMEM); - } - dss_sleep(1000); - goto again; + if (rc != -DER_OOG && rc != -DER_GRPVER && rc != -DER_AGAIN) + goto out; + + if (try_cnt % 10 == 0) + D_WARN("Leader (" DF_X64 ") query retried because of %d for %d times.\n", + gen, rc, try_cnt); + + while (ver == ins->ci_ns_ver && ins->ci_skip_oog == 0 && ins->ci_pause == 0) { + dss_sleep(500); + if (++wait_cnt % 40 == 0) + D_WARN("Leader (" DF_X64 ") query is blocked because of %d for " + "about %d seconds.\n", + gen, rc, wait_cnt / 2); + if (rc != -DER_OOG) + break; } - goto out; + if (ins->ci_pause || ins->ci_skip_oog) + goto out; + + if (!d_list_empty(&cqa->cqa_list)) { + chk_cqa_free(cqa); + cqa = chk_cqa_alloc(ins); + if (cqa == NULL) + D_GOTO(out, rc = -DER_NOMEM); + } + + goto again; } d_list_for_each_entry(cpr, &ins->ci_pool_list, cpr_link) { @@ -3826,8 +3796,8 @@ chk_leader_notify(struct chk_iv *iv) } int -chk_leader_rejoin(uint64_t gen, d_rank_t rank, uuid_t iv_uuid, uint32_t *flags, int *pool_nr, - uuid_t **pools) +chk_leader_rejoin(uint64_t gen, d_rank_t rank, uuid_t iv_uuid, uint32_t *flags, uint32_t *ns_ver, + int *pool_nr, uuid_t **pools, d_rank_list_t **ranks) { struct chk_instance *ins = chk_leader; struct chk_bookmark *cbk = &ins->ci_bk; @@ -3854,7 +3824,9 @@ chk_leader_rejoin(uint64_t gen, d_rank_t rank, uuid_t iv_uuid, uint32_t *flags, if (ins->ci_orphan_done) *flags = CRF_ORPHAN_DONE; - rc = chk_leader_pools2list(ins, pool_nr, pools); + *ns_ver = ins->ci_ns_ver; + *ranks = ins->ci_ranks; + rc = chk_leader_pools2list(ins, pool_nr, pools); out: D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, @@ -3872,6 +3844,9 @@ chk_rank_event_cb(d_rank_t rank, uint64_t incarnation, enum crt_event_source src struct chk_dead_rank *cdr = NULL; int rc = 0; + if (ins->ci_ranks == NULL) + D_GOTO(out, rc = -DER_NOTAPPLICABLE); + /* Ignore the event that is not applicable to current rank. */ if (src != CRT_EVS_SWIM) @@ -3880,9 +3855,6 @@ chk_rank_event_cb(d_rank_t rank, uint64_t incarnation, enum crt_event_source src if (type != CRT_EVT_DEAD && type != CRT_EVT_ALIVE) D_GOTO(out, rc = -DER_NOTAPPLICABLE); - if (!ins->ci_sched_running) - D_GOTO(out, rc = -DER_NOTAPPLICABLE); - if (type == CRT_EVT_DEAD) { D_ALLOC_PTR(cdr); if (cdr == NULL) @@ -3917,6 +3889,34 @@ chk_rank_event_cb(d_rank_t rank, uint64_t incarnation, enum crt_event_source src DP_LEADER(ins), rank, type == CRT_EVT_DEAD ? "dead" : "alive", DP_RC(rc)); } +static void +chk_dead_rank_ult(void *args) +{ + struct chk_instance *ins = args; + struct chk_dead_rank *cdr; + + while (ins->ci_inited) { + cdr = NULL; + if (!d_list_empty(&ins->ci_dead_ranks)) { + ABT_mutex_lock(ins->ci_abt_mutex); + if (likely(!d_list_empty(&ins->ci_dead_ranks))) + cdr = d_list_pop_entry(&ins->ci_dead_ranks, struct chk_dead_rank, + cdr_link); + ABT_mutex_unlock(ins->ci_abt_mutex); + } + + if (cdr != NULL) + chk_leader_mark_rank_dead(ins, cdr); + + if (d_list_empty(&ins->ci_dead_ranks)) + dss_sleep(500); + } + + while ((cdr = d_list_pop_entry(&ins->ci_dead_ranks, struct chk_dead_rank, cdr_link)) != + NULL) + D_FREE(cdr); +} + int chk_leader_setup(void) { @@ -3932,6 +3932,8 @@ chk_leader_setup(void) * related local inconsistency firstly. */ + chk_report_seq_init(ins); + rc = chk_bk_fetch_leader(cbk); if (rc == -DER_NONEXIST) goto prop; @@ -3985,17 +3987,19 @@ chk_leader_setup(void) prop: rc = chk_prop_fetch(&ins->ci_prop, &ins->ci_ranks); - if (rc == 0 || rc == -DER_NONEXIST) + if (rc != 0 && rc != -DER_NONEXIST) + goto fini; + + ins->ci_inited = 1; + ins->ci_pause = 0; + + rc = dss_ult_create(chk_dead_rank_ult, ins, DSS_XS_SYS, 0, 0, &ins->ci_dead_rank_ult); + if (rc == 0) rc = crt_register_event_cb(chk_rank_event_cb, NULL); -fini: - if (rc != 0) { - chk_ins_fini(&ins); - } else { - chk_report_seq_init(ins); - ins->ci_inited = 1; - ins->ci_pause = 0; - } +fini: + if (rc != 0) + chk_leader_cleanup(); return rc; } @@ -4004,8 +4008,13 @@ chk_leader_cleanup(void) { struct chk_instance *ins = chk_leader; + crt_unregister_event_cb(chk_rank_event_cb, NULL); + chk_ins_cleanup(ins); D_ASSERT(d_list_empty(&ins->ci_rank_list)); + + if (ins->ci_dead_rank_ult != ABT_THREAD_NULL) + ABT_thread_free(&ins->ci_dead_rank_ult); } int @@ -4023,6 +4032,5 @@ chk_leader_init(void) void chk_leader_fini(void) { - crt_unregister_event_cb(chk_rank_event_cb, NULL); chk_ins_fini(&chk_leader); } diff --git a/src/chk/chk_rpc.c b/src/chk/chk_rpc.c index d81506e5c35..e250936dfc2 100644 --- a/src/chk/chk_rpc.c +++ b/src/chk/chk_rpc.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2022-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -527,8 +527,8 @@ chk_sg_rpc_prepare(d_rank_t rank, crt_opcode_t opc, crt_rpc_t **req) int chk_start_remote(d_rank_list_t *rank_list, uint64_t gen, uint32_t rank_nr, d_rank_t *ranks, - uint32_t policy_nr, struct chk_policy *policies, int pool_nr, - uuid_t pools[], uint32_t api_flags, int phase, d_rank_t leader, uint32_t flags, + uint32_t policy_nr, struct chk_policy *policies, int pool_nr, uuid_t pools[], + uint32_t api_flags, uint32_t ns_ver, d_rank_t leader, uint32_t flags, uuid_t iv_uuid, chk_co_rpc_cb_t start_cb, void *args) { struct chk_co_rpc_cb_args cb_args = { 0 }; @@ -544,12 +544,12 @@ chk_start_remote(d_rank_list_t *rank_list, uint64_t gen, uint32_t rank_nr, d_ran if (rc != 0) goto out; - csi = crt_req_get(req); - csi->csi_gen = gen; - csi->csi_flags = flags; - csi->csi_phase = phase; + csi = crt_req_get(req); + csi->csi_gen = gen; + csi->csi_flags = flags; + csi->csi_ns_ver = ns_ver; csi->csi_leader_rank = leader; - csi->csi_api_flags = api_flags; + csi->csi_api_flags = api_flags; uuid_copy(csi->csi_iv_uuid, iv_uuid); csi->csi_ranks.ca_count = rank_nr; csi->csi_ranks.ca_arrays = ranks; @@ -605,9 +605,9 @@ chk_start_remote(d_rank_list_t *rank_list, uint64_t gen, uint32_t rank_nr, d_ran crt_req_decref(req); } - D_CDEBUG(rc < 0, DLOG_ERR, DLOG_INFO, - "Rank %u start checker, gen "DF_X64", flags %x, phase %d, iv "DF_UUIDF":"DF_RC"\n", - leader, gen, flags, phase, DP_UUID(iv_uuid), DP_RC(rc)); + DL_CDEBUG(rc < 0, DLOG_ERR, DLOG_INFO, rc, + "Rank %u start checker, gen " DF_X64 ", flags %x, ns_ver %d, iv " DF_UUIDF, + leader, gen, flags, ns_ver, DP_UUID(iv_uuid)); return rc; } @@ -1019,7 +1019,7 @@ int chk_report_remote(d_rank_t leader, uint64_t gen, uint32_t cla, uint32_t act, int chk_rejoin_remote(d_rank_t leader, uint64_t gen, d_rank_t rank, uuid_t iv_uuid, uint32_t *flags, - uint32_t *pool_nr, uuid_t **pools) + uint32_t *ns_ver, uint32_t *pool_nr, uuid_t **pools, d_rank_list_t **ranks) { crt_rpc_t *req = NULL; struct chk_rejoin_in *cri; @@ -1042,8 +1042,22 @@ chk_rejoin_remote(d_rank_t leader, uint64_t gen, d_rank_t rank, uuid_t iv_uuid, cro = crt_reply_get(req); rc = cro->cro_status; - if (rc == 0 && cro->cro_pools.ca_count > 0) { - *flags = cro->cro_flags; + if (rc != 0) + goto out; + + *flags = cro->cro_flags; + *ns_ver = cro->cro_ns_ver; + + if (cro->cro_ranks.ca_count > 0) { + *ranks = d_rank_list_alloc(cro->cro_ranks.ca_count); + if (*ranks == NULL) + D_GOTO(out, rc = -DER_NOMEM); + + memcpy((*ranks)->rl_ranks, cro->cro_ranks.ca_arrays, + sizeof(d_rank_t) * cro->cro_ranks.ca_count); + } + + if (cro->cro_pools.ca_count > 0) { D_ALLOC(tmp, cro->cro_pools.ca_count); if (tmp == NULL) D_GOTO(out, rc = -DER_NOMEM); diff --git a/src/chk/chk_srv.c b/src/chk/chk_srv.c index 84d6f3a21bc..d50e3b59657 100644 --- a/src/chk/chk_srv.c +++ b/src/chk/chk_srv.c @@ -27,7 +27,7 @@ ds_chk_start_hdlr(crt_rpc_t *rpc) rc = chk_engine_start(csi->csi_gen, csi->csi_ranks.ca_count, csi->csi_ranks.ca_arrays, csi->csi_policies.ca_count, csi->csi_policies.ca_arrays, csi->csi_uuids.ca_count, csi->csi_uuids.ca_arrays, csi->csi_api_flags, - csi->csi_phase, csi->csi_leader_rank, csi->csi_flags, + csi->csi_ns_ver, csi->csi_leader_rank, csi->csi_flags, csi->csi_iv_uuid, &clues); if (rc > 0) { D_ALLOC_PTR(rank); @@ -249,18 +249,21 @@ ds_chk_report_hdlr(crt_rpc_t *rpc) static void ds_chk_rejoin_hdlr(crt_rpc_t *rpc) { - struct chk_rejoin_in *cri = crt_req_get(rpc); - struct chk_rejoin_out *cro = crt_reply_get(rpc); - uuid_t *pools = NULL; - int pool_nr = 0; - int rc; + struct chk_rejoin_in *cri = crt_req_get(rpc); + struct chk_rejoin_out *cro = crt_reply_get(rpc); + uuid_t *pools = NULL; + d_rank_list_t *ranks = NULL; + int pool_nr = 0; + int rc; rc = chk_leader_rejoin(cri->cri_gen, cri->cri_rank, cri->cri_iv_uuid, &cro->cro_flags, - &pool_nr, &pools); + &cro->cro_ns_ver, &pool_nr, &pools, &ranks); cro->cro_status = rc; if (rc == 0) { - cro->cro_pools.ca_count = pool_nr; + cro->cro_ranks.ca_count = ranks->rl_nr; + cro->cro_ranks.ca_arrays = ranks->rl_ranks; + cro->cro_pools.ca_count = pool_nr; cro->cro_pools.ca_arrays = pools; } diff --git a/src/engine/server_iv.c b/src/engine/server_iv.c index 2f3ea5f46a2..a369e06538e 100644 --- a/src/engine/server_iv.c +++ b/src/engine/server_iv.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2017-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -1129,7 +1129,7 @@ _iv_op(struct ds_iv_ns *ns, struct ds_iv_key *key, d_sg_list_t *value, rc = iv_op_internal(ns, key, value, sync, shortcut, opc); if (retry && !ns->iv_stop && (daos_rpc_retryable_rc(rc) || rc == -DER_NOTLEADER || rc == -DER_BUSY)) { - if (rc == -DER_GRPVER && engine_in_check()) { + if ((rc == -DER_GRPVER || rc == -DER_OOG) && engine_in_check()) { /* * Under check mode, the pool shard on peer rank/target does * not exist, then it will reply "-DER_GRPVER" that is normal diff --git a/src/gurt/misc.c b/src/gurt/misc.c index 094b3cc1a56..afca34ab918 100644 --- a/src/gurt/misc.c +++ b/src/gurt/misc.c @@ -1,6 +1,6 @@ /* * (C) Copyright 2016-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -550,9 +550,24 @@ d_rank_list_shuffle(d_rank_list_t *rank_list) } /** - * Must be previously sorted or not modified at all in order to guarantee - * consistent indexes. - **/ + * Binary search \a rank in the sorted \a rank_list. + */ + +bool +d_rank_list_bsearch(d_rank_list_t *rank_list, d_rank_t rank, int *idx) +{ + d_rank_t *pos = NULL; + + if (rank_list != NULL) { + pos = bsearch(&rank, rank_list->rl_ranks, rank_list->rl_nr, sizeof(rank), + rank_compare); + if (pos != NULL && idx != NULL) + *idx = ((void *)pos - (void *)rank_list->rl_ranks) / sizeof(rank); + } + + return pos != NULL; +} + bool d_rank_list_find(d_rank_list_t *rank_list, d_rank_t rank, int *idx) { diff --git a/src/include/daos_srv/daos_chk.h b/src/include/daos_srv/daos_chk.h index 5756c84232e..9c363c86c9c 100644 --- a/src/include/daos_srv/daos_chk.h +++ b/src/include/daos_srv/daos_chk.h @@ -1,6 +1,6 @@ /** * (C) Copyright 2022-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -74,9 +74,9 @@ typedef int (*chk_query_pool_cb_t)(struct chk_query_pool_shard *shard, uint32_t typedef int (*chk_prop_cb_t)(void *buf, uint32_t policies[], int cnt, uint32_t flags); -int chk_leader_start(uint32_t rank_nr, d_rank_t *ranks, uint32_t policy_nr, - struct chk_policy *policies, int pool_nr, uuid_t pools[], - uint32_t api_flags, int phase); +int +chk_leader_start(uint32_t rank_nr, d_rank_t *ranks, uint32_t policy_nr, struct chk_policy *policies, + int pool_nr, uuid_t pools[], uint32_t api_flags); int chk_leader_stop(int pool_nr, uuid_t pools[]); diff --git a/src/include/gurt/common.h b/src/include/gurt/common.h index 4f50f02e323..6963fa8f7f4 100644 --- a/src/include/gurt/common.h +++ b/src/include/gurt/common.h @@ -1,6 +1,6 @@ /* * (C) Copyright 2016-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -457,6 +457,7 @@ d_power2_nbits(unsigned int val) return val == LOWEST_BIT_SET(val) ? shift - 1 : shift; } +/* clang-format off */ int d_rank_list_dup(d_rank_list_t **dst, const d_rank_list_t *src); int d_rank_list_dup_sort_uniq(d_rank_list_t **dst, const d_rank_list_t *src); void d_rank_list_filter(d_rank_list_t *src_set, d_rank_list_t *dst_set, @@ -468,6 +469,7 @@ void d_rank_list_free(d_rank_list_t *rank_list); int d_rank_list_copy(d_rank_list_t *dst, d_rank_list_t *src); void d_rank_list_shuffle(d_rank_list_t *rank_list); void d_rank_list_sort(d_rank_list_t *rank_list); +bool d_rank_list_bsearch(d_rank_list_t *rank_list, d_rank_t rank, int *idx); bool d_rank_list_find(d_rank_list_t *rank_list, d_rank_t rank, int *idx); void d_rank_list_del_at(d_rank_list_t *list, int idx); int d_rank_list_del(d_rank_list_t *rank_list, d_rank_t rank); @@ -479,15 +481,13 @@ int d_rank_list_append(d_rank_list_t *rank_list, d_rank_t rank); int d_rank_list_dump(d_rank_list_t *rank_list, d_string_t name, int name_len); d_rank_list_t *uint32_array_to_rank_list(uint32_t *ints, size_t len); int rank_list_to_uint32_array(d_rank_list_t *rl, uint32_t **ints, size_t *len); -int - d_rank_list_to_str(d_rank_list_t *rank_list, char **rank_str); - +int d_rank_list_to_str(d_rank_list_t *rank_list, char **rank_str); d_rank_range_list_t *d_rank_range_list_alloc(uint32_t size); d_rank_range_list_t *d_rank_range_list_realloc(d_rank_range_list_t *range_list, uint32_t size); d_rank_range_list_t *d_rank_range_list_create_from_ranks(d_rank_list_t *rank_list); -int - d_rank_range_list_str(d_rank_range_list_t *list, char **ranks_str); +int d_rank_range_list_str(d_rank_range_list_t *list, char **ranks_str); void d_rank_range_list_free(d_rank_range_list_t *range_list); +/* clang-format on */ #ifdef FAULT_INJECTION diff --git a/src/mgmt/srv_chk.c b/src/mgmt/srv_chk.c index 705f4f0609e..3dd937d9bff 100644 --- a/src/mgmt/srv_chk.c +++ b/src/mgmt/srv_chk.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2022 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -48,7 +48,7 @@ ds_mgmt_chk_parse_uuid(int pool_nr, char **pools, uuid_t **p_uuids) int ds_mgmt_check_start(uint32_t rank_nr, d_rank_t *ranks, uint32_t policy_nr, Mgmt__CheckInconsistPolicy **policies, int32_t pool_nr, char **pools, - uint32_t flags, int32_t phase) + uint32_t flags) { uuid_t *uuids = NULL; struct chk_policy *ply = NULL; @@ -70,7 +70,7 @@ ds_mgmt_check_start(uint32_t rank_nr, d_rank_t *ranks, uint32_t policy_nr, } } - rc = chk_leader_start(rank_nr, ranks, policy_nr, ply, pool_nr, uuids, flags, phase); + rc = chk_leader_start(rank_nr, ranks, policy_nr, ply, pool_nr, uuids, flags); out: D_FREE(uuids); diff --git a/src/mgmt/srv_drpc.c b/src/mgmt/srv_drpc.c index 47202fce0a2..2fedf826efd 100644 --- a/src/mgmt/srv_drpc.c +++ b/src/mgmt/srv_drpc.c @@ -1,6 +1,6 @@ /* * (C) Copyright 2019-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -2789,7 +2789,7 @@ ds_mgmt_drpc_check_start(Drpc__Call *drpc_req, Drpc__Response *drpc_resp) D_INFO("Received request to start check\n"); rc = ds_mgmt_check_start(req->n_ranks, req->ranks, req->n_policies, req->policies, - req->n_uuids, req->uuids, req->flags, -1 /* phase */); + req->n_uuids, req->uuids, req->flags); if (rc < 0) D_ERROR("Failed to start check: "DF_RC"\n", DP_RC(rc)); diff --git a/src/mgmt/srv_internal.h b/src/mgmt/srv_internal.h index 511cae1b7c4..5c18c47817c 100644 --- a/src/mgmt/srv_internal.h +++ b/src/mgmt/srv_internal.h @@ -1,6 +1,6 @@ /* * (C) Copyright 2016-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -139,9 +139,10 @@ int const char *user, const char *group); /** srv_chk.c */ -int ds_mgmt_check_start(uint32_t rank_nr, d_rank_t *ranks, uint32_t policy_nr, +int + ds_mgmt_check_start(uint32_t rank_nr, d_rank_t *ranks, uint32_t policy_nr, Mgmt__CheckInconsistPolicy **policies, int pool_nr, char **pools, - uint32_t flags, int phase); + uint32_t flags); int ds_mgmt_check_stop(int pool_nr, char **pools); int ds_mgmt_check_query(int pool_nr, char **pools, chk_query_head_cb_t head_cb, chk_query_pool_cb_t pool_cb, void *buf); diff --git a/src/mgmt/tests/mocks.c b/src/mgmt/tests/mocks.c index 9d93e8697e7..382616db41f 100644 --- a/src/mgmt/tests/mocks.c +++ b/src/mgmt/tests/mocks.c @@ -1,6 +1,6 @@ /* * (C) Copyright 2019-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -694,7 +694,7 @@ mock_ds_mgmt_dev_set_faulty_setup(void) int ds_mgmt_check_start(uint32_t rank_nr, d_rank_t *ranks, uint32_t policy_nr, Mgmt__CheckInconsistPolicy **policies, int pool_nr, char **pools, - uint32_t flags, int phase) + uint32_t flags) { return 0; } diff --git a/src/object/cli_coll.c b/src/object/cli_coll.c index d517e3269d6..ef10ed439a5 100644 --- a/src/object/cli_coll.c +++ b/src/object/cli_coll.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2024 Intel Corporation. + * (C) Copyright 2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -724,8 +725,8 @@ dc_obj_coll_punch(tse_task_t *task, struct dc_object *obj, struct dtx_epoch *epo if (rc == 0) { if (!shard->do_rebuilding && !shard->do_reintegrating) { tmp_tgt.dct_rank = shard->do_target_rank; - dct = bsearch(&tmp_tgt, coa->coa_dcts, coa->coa_dct_nr, - sizeof(tmp_tgt), &dc_coll_sort_cmp); + dct = bsearch(&tmp_tgt, coa->coa_dcts, coa->coa_dct_nr, + sizeof(tmp_tgt), dc_coll_sort_cmp); D_ASSERT(dct != NULL); goto gen_mbs; diff --git a/src/tests/ftest/recovery/cat_recov_core.yaml b/src/tests/ftest/recovery/cat_recov_core.yaml index 039ab7b130a..73a5a583283 100644 --- a/src/tests/ftest/recovery/cat_recov_core.yaml +++ b/src/tests/ftest/recovery/cat_recov_core.yaml @@ -8,6 +8,7 @@ server_config: engines_per_host: 2 engines: 0: + targets: 4 pinned_numa_node: 0 nr_xs_helpers: 0 log_file: daos_server0.log @@ -22,6 +23,7 @@ server_config: storage: auto 1: + targets: 4 pinned_numa_node: 1 nr_xs_helpers: 0 log_file: daos_server1.log @@ -37,7 +39,10 @@ server_config: transport_config: allow_insecure: true - system_ram_reserved: 64 + +pool: + scm_size: 6G + nvme_size: 80G agent_config: transport_config: diff --git a/src/tests/ftest/recovery/check_start_corner_case.py b/src/tests/ftest/recovery/check_start_corner_case.py index 5bdf4973b3b..546dabb53d0 100644 --- a/src/tests/ftest/recovery/check_start_corner_case.py +++ b/src/tests/ftest/recovery/check_start_corner_case.py @@ -1,5 +1,5 @@ """ - (C) Copyright 2025 Hewlett Packard Enterprise Development LP + (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -233,7 +233,7 @@ def test_two_pools_healthy(self): # Wait for the checker to detect the inconsistent container label. query_reports = None for _ in range(8): - check_query_out = dmg_command.check_query() + check_query_out = dmg_command.check_query(pool=pool_3.uuid) # Status becomes RUNNING immediately, but it may take a while to detect the # inconsistency. If detected, "reports" field is filled. if check_query_out["response"]["status"] == "RUNNING": diff --git a/src/tests/ftest/recovery/container_cleanup.yaml b/src/tests/ftest/recovery/container_cleanup.yaml index d207e4193e5..5a2a97b4619 100644 --- a/src/tests/ftest/recovery/container_cleanup.yaml +++ b/src/tests/ftest/recovery/container_cleanup.yaml @@ -14,7 +14,7 @@ server_config: storage: auto pool: - size: 5G + size: 15G container: type: POSIX diff --git a/src/tests/ftest/recovery/container_list_consolidation.yaml b/src/tests/ftest/recovery/container_list_consolidation.yaml index d207e4193e5..5a2a97b4619 100644 --- a/src/tests/ftest/recovery/container_list_consolidation.yaml +++ b/src/tests/ftest/recovery/container_list_consolidation.yaml @@ -14,7 +14,7 @@ server_config: storage: auto pool: - size: 5G + size: 15G container: type: POSIX diff --git a/src/tests/ftest/recovery/ms_membership.yaml b/src/tests/ftest/recovery/ms_membership.yaml index 04665b2b39c..017a461e06b 100644 --- a/src/tests/ftest/recovery/ms_membership.yaml +++ b/src/tests/ftest/recovery/ms_membership.yaml @@ -7,12 +7,17 @@ server_config: engines_per_host: 2 engines: 0: + targets: 4 pinned_numa_node: 0 nr_xs_helpers: 1 log_file: daos_server0.log storage: auto 1: + targets: 4 pinned_numa_node: 1 nr_xs_helpers: 1 log_file: daos_server1.log storage: auto + +pool: + size: 80G diff --git a/src/tests/ftest/recovery/pool_cleanup.yaml b/src/tests/ftest/recovery/pool_cleanup.yaml index cbefae8b3f4..3ca798f822b 100644 --- a/src/tests/ftest/recovery/pool_cleanup.yaml +++ b/src/tests/ftest/recovery/pool_cleanup.yaml @@ -13,4 +13,4 @@ server_config: storage: auto pool: - size: 5G + size: 15G diff --git a/src/tests/ftest/recovery/pool_list_consolidation.yaml b/src/tests/ftest/recovery/pool_list_consolidation.yaml index 3abd589e99f..2ea68fd69e4 100644 --- a/src/tests/ftest/recovery/pool_list_consolidation.yaml +++ b/src/tests/ftest/recovery/pool_list_consolidation.yaml @@ -23,7 +23,7 @@ setup: start_servers_once: False pool: - size: 60G + size: 100G container: control_method: daos diff --git a/src/tests/suite/daos_cr.c b/src/tests/suite/daos_cr.c index 9821740cf55..f2be8fbc056 100644 --- a/src/tests/suite/daos_cr.c +++ b/src/tests/suite/daos_cr.c @@ -1350,7 +1350,7 @@ cr_engine_interaction(void **state) rc = cr_system_start(); assert_rc_equal(rc, 0); - /* Former connection for the pool has been evicted by checkre. Let's re-connect the pool. */ + /* Former connection for the pool has been evicted by checker. Let's re-connect the pool. */ rc = cr_cont_get_label(state, &pool, &cont, true, &label); assert_rc_equal(rc, 0); @@ -1732,7 +1732,7 @@ cr_stop_engine_interaction(void **state) rc = cr_system_start(); assert_rc_equal(rc, 0); - /* Former connection for the pool has been evicted by checkre. Let's re-connect the pool. */ + /* Former connection for the pool has been evicted by checker. Let's re-connect the pool. */ rc = cr_cont_get_label(state, &pool, &cont, true, &label); assert_rc_equal(rc, 0); @@ -3848,6 +3848,61 @@ cr_maintenance_mode(void **state) cr_cleanup(arg, &pool, 1); } +/* + * 1. Exclude rank 0. + * 2. Create pool without inconsistency. + * 3. Start checker without options. + * 4. Query checker, it should be completed instead of being blocked. + * 5. Switch to normal mode and cleanup. + */ +static void +cr_lost_rank0(void **state) +{ + test_arg_t *arg = *state; + struct test_pool pool = {0}; + struct daos_check_info dci = {0}; + int rc; + + print_message("CR29: CR with rank 0 excluded at the beginning\n"); + + print_message("CR: excluding the rank 0 ...\n"); + rc = dmg_system_exclude_rank(dmg_config_file, 0); + assert_rc_equal(rc, 0); + + rc = cr_pool_create(state, &pool, false, TCC_NONE); + assert_rc_equal(rc, 0); + + rc = cr_system_stop(false); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(true); + assert_rc_equal(rc, 0); + + rc = cr_check_start(TCSF_RESET, 0, NULL, NULL); + assert_rc_equal(rc, 0); + + cr_ins_wait(1, &pool.pool_uuid, &dci); + + rc = cr_ins_verify(&dci, TCIS_COMPLETED); + assert_rc_equal(rc, 0); + + rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_CHECKED, 0, NULL, NULL, NULL); + assert_rc_equal(rc, 0); + + /* Reint the rank for subsequent test. */ + rc = cr_rank_reint(0, true); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(false); + assert_rc_equal(rc, 0); + + rc = cr_system_start(); + assert_rc_equal(rc, 0); + + cr_dci_fini(&dci); + cr_cleanup(arg, &pool, 1); +} + /* clang-format off */ static const struct CMUnitTest cr_tests[] = { { "CR1: start checker for specified pools", @@ -3906,6 +3961,8 @@ static const struct CMUnitTest cr_tests[] = { cr_handle_fail_pool2, async_disable, test_case_teardown}, { "CR28: maintenance mode after dry-run check", cr_maintenance_mode, async_disable, test_case_teardown}, + { "CR29: CR with rank 0 excluded at the beginning", + cr_lost_rank0, async_disable, test_case_teardown}, }; /* clang-format on */ From 0374aaa72bb7a87e0baa996e4e13b6dc17d44065 Mon Sep 17 00:00:00 2001 From: Makito Kano Date: Thu, 29 Jan 2026 00:49:19 +0900 Subject: [PATCH 157/253] DAOS-18481 test: recovery/check_start_corner_case.yaml - Reset checker state (#17449) test_start_back_to_back interferes with test_two_pools_healthy. The repaired fault in test_start_back_to_back (orphan container) appears in the check query output during test_two_pools_healthy. The test expects the query output to be clean at the beginning of the test, so reset the checker state by stopping (dmg check stop) and restarting with --reset (dmg check start --reset) after confirming the repair result. Signed-off-by: Makito Kano --- .../ftest/recovery/check_start_corner_case.py | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/tests/ftest/recovery/check_start_corner_case.py b/src/tests/ftest/recovery/check_start_corner_case.py index 546dabb53d0..150e1a540e4 100644 --- a/src/tests/ftest/recovery/check_start_corner_case.py +++ b/src/tests/ftest/recovery/check_start_corner_case.py @@ -76,8 +76,9 @@ def test_start_back_to_back(self): result in Operation already performed error. In that case, repeat. When the first pool is fixed, the second start should work. 6. Query checker and verify that they’re fixed. - 7. Disable checker and start system. - 8. Verify that the faults are actually fixed. + 7. Clear the checker inconsistency reports. + 8. Disable checker and start system. + 9. Verify that the faults are actually fixed. Jira ID: DAOS-17860 @@ -86,14 +87,12 @@ def test_start_back_to_back(self): :avocado: tags=recovery,cat_recov :avocado: tags=DMGCheckStartCornerCaseTest,test_start_back_to_back """ - # 1. Create two pools and a container. self.log_step("Create two pools and a container.") pool_1 = self.get_pool(connect=False) pool_2 = self.get_pool(connect=False) container_1 = self.get_container(pool=pool_1) container_2 = self.get_container(pool=pool_2) - # 2. Inject fault on both containers. self.log_step("Inject fault on both containers.") daos_command = self.get_daos_command() daos_command.faults_container( @@ -103,16 +102,13 @@ def test_start_back_to_back(self): pool=pool_2.identifier, cont=container_2.identifier, location="DAOS_CHK_CONT_ORPHAN") - # 3. Enable checker. self.log_step("Enable checker.") dmg_command = self.get_dmg_command() dmg_command.check_enable() - # 4. Start with the first pool. self.log_step("Start with the first pool.") dmg_command.check_start(pool=pool_1.identifier) - # 5. Immediately after starting the first pool, start the second pool. self.log_step("Immediately after starting the first pool, start the second pool.") pool_2_started = False for count in range(8): @@ -130,15 +126,19 @@ def test_start_back_to_back(self): time.sleep(5) self.assertTrue(pool_2_started, "dmg check start pool_2 failed after 40 sec!") - # 6. Query checker and verify that they’re fixed. self.log_step("Query checker and verify that they’re fixed.") wait_for_check_complete(dmg=dmg_command) - # 7. Disable checker and start system. + self.log_step("Clear the checker inconsistency reports.") + dmg_command.check_stop() + # Start with --reset clears the inconsistency reports. Old inconsistency reports + # may cause subsequent tests to fail because the tests don't expect them. The + # tests expect the system to be clean at the beginning. + dmg_command.check_start(reset=True) + self.log_step("Disable checker and start system.") dmg_command.check_disable() - # 8. Verify that the faults are actually fixed. self.log_step("Verify that the faults are actually fixed.") # In this case, check that the containers were removed. container_list_out_1 = daos_command.pool_list_containers(pool=pool_1.identifier) From 6bb50dd49b9475bcd6fe3c668262bcb52e9cc287 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 28 Jan 2026 07:59:00 -0800 Subject: [PATCH 158/253] DAOS-18522 cq: update GHA versions (#17447) Updates `actions/checkout` from 6.0.1 to 6.0.2 Updates `actions/setup-python` from 6.1.0 to 6.2.0 Updates `github/codeql-action` from 4.31.10 to 4.31.11 Signed-off-by: dependabot[bot] --- .github/workflows/bash_unit_testing.yml | 4 ++-- .github/workflows/bullseye-coverage.yml | 8 +++---- .github/workflows/ci2.yml | 4 ++-- .github/workflows/create_release.yml | 2 +- .github/workflows/landing-builds.yml | 10 ++++---- .github/workflows/linting.yml | 30 ++++++++++++------------ .github/workflows/ossf-scorecard.yml | 4 ++-- .github/workflows/pr-metadata.yml | 2 +- .github/workflows/rpm-build-and-test.yml | 8 +++---- .github/workflows/trivy.yml | 4 ++-- .github/workflows/unit-testing.yml | 2 +- 11 files changed, 39 insertions(+), 39 deletions(-) diff --git a/.github/workflows/bash_unit_testing.yml b/.github/workflows/bash_unit_testing.yml index 6210d3e99e6..434f27fe1c3 100644 --- a/.github/workflows/bash_unit_testing.yml +++ b/.github/workflows/bash_unit_testing.yml @@ -20,11 +20,11 @@ jobs: runs-on: [self-hosted, light] steps: - name: Checkout code - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: ref: ${{ github.event.pull_request.head.sha }} - name: Checkout bash_unit project - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: repository: 'pgrange/bash_unit' path: bash_unit diff --git a/.github/workflows/bullseye-coverage.yml b/.github/workflows/bullseye-coverage.yml index 52c9b5dea81..a4139fca908 100644 --- a/.github/workflows/bullseye-coverage.yml +++ b/.github/workflows/bullseye-coverage.yml @@ -109,7 +109,7 @@ jobs: matrix: ${{ steps.matrix.outputs.text }} steps: - name: Checkout code - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: ref: ${{ github.event.pull_request.head.sha }} - name: Import commit pragmas @@ -235,7 +235,7 @@ jobs: COMMIT_STATUS_DISTRO_VERSION: steps: - name: Checkout code - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: submodules: 'recursive' fetch-depth: 500 @@ -409,7 +409,7 @@ jobs: matrix: ${{ steps.matrix.outputs.text }} steps: - name: Checkout code - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: ref: ${{ github.event.pull_request.head.sha }} - name: Import commit pragmas @@ -519,7 +519,7 @@ jobs: SIZE: steps: - name: Checkout code - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: submodules: 'recursive' fetch-depth: 500 diff --git a/.github/workflows/ci2.yml b/.github/workflows/ci2.yml index 80a3ade7968..ab377fe0624 100644 --- a/.github/workflows/ci2.yml +++ b/.github/workflows/ci2.yml @@ -34,7 +34,7 @@ jobs: DOCKER_BASE: ${{ matrix.base }} steps: - name: Checkout code - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: submodules: true fetch-depth: 500 @@ -100,7 +100,7 @@ jobs: COMPILER: ${{ matrix.compiler }} steps: - name: Checkout code - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: submodules: true fetch-depth: 500 diff --git a/.github/workflows/create_release.yml b/.github/workflows/create_release.yml index 9d3795f8c26..b8c8c74d4a9 100644 --- a/.github/workflows/create_release.yml +++ b/.github/workflows/create_release.yml @@ -18,7 +18,7 @@ jobs: permissions: contents: write steps: - - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: fetch-depth: 2 - uses: ./.github/actions/make_release diff --git a/.github/workflows/landing-builds.yml b/.github/workflows/landing-builds.yml index f126cca4465..11e7e9723ca 100644 --- a/.github/workflows/landing-builds.yml +++ b/.github/workflows/landing-builds.yml @@ -64,7 +64,7 @@ jobs: DOCKER_BASE: ${{ matrix.base }} steps: - name: Checkout code - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: submodules: 'recursive' fetch-depth: 500 @@ -112,7 +112,7 @@ jobs: COMPILER: clang steps: - name: Checkout code - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: submodules: 'recursive' fetch-depth: 500 @@ -181,7 +181,7 @@ jobs: COMPILER: ${{ matrix.compiler }} steps: - name: Checkout code - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: submodules: 'recursive' fetch-depth: 500 @@ -255,7 +255,7 @@ jobs: BASE_DISTRO: ${{ matrix.with }} steps: - name: Checkout code - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: submodules: 'recursive' fetch-depth: 500 @@ -344,7 +344,7 @@ jobs: COMPILER: ${{ matrix.compiler }} steps: - name: Checkout code - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: submodules: 'recursive' fetch-depth: 500 diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 5d4442e0ab4..59c01678ed1 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -25,11 +25,11 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout code - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: ref: ${{ github.event.pull_request.head.sha }} - name: Set up Python environment - uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: '3' - name: Install extra python packages @@ -48,7 +48,7 @@ jobs: runs-on: ubuntu-24.04 steps: - name: Checkout code - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: ref: ${{ github.event.pull_request.head.sha }} - name: Run @@ -66,7 +66,7 @@ jobs: runs-on: ubuntu-24.04 steps: - name: Checkout code - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: ref: ${{ github.event.pull_request.head.sha }} - name: Check DAOS logging macro use. @@ -77,7 +77,7 @@ jobs: runs-on: ubuntu-24.04 steps: - name: Checkout code - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Check DAOS ftest tags. run: \[ ! -x src/tests/ftest/tags.py \] || ./src/tests/ftest/tags.py lint --verbose @@ -86,11 +86,11 @@ jobs: name: Flake8 check steps: - name: Check out source repository - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: ref: ${{ github.event.pull_request.head.sha }} - name: Set up Python environment - uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: '3' - name: Add parser @@ -119,7 +119,7 @@ jobs: runs-on: ubuntu-24.04 steps: - name: Checkout code - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: ref: ${{ github.event.pull_request.head.sha }} - name: Install doxygen @@ -140,10 +140,10 @@ jobs: runs-on: ubuntu-24.04 steps: - name: Checkout code - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: ref: ${{ github.event.pull_request.head.sha }} - - uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 + - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: '3.11' - name: Install python packages @@ -160,7 +160,7 @@ jobs: runs-on: ubuntu-24.04 steps: - name: Checkout code - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Install extra python packages run: python3 -m pip install --requirement utils/cq/requirements.txt - name: Run check @@ -175,7 +175,7 @@ jobs: runs-on: ubuntu-24.04 steps: - name: Check out source repository - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: ref: ${{ github.event.pull_request.head.sha }} fetch-depth: 0 @@ -202,11 +202,11 @@ jobs: runs-on: ubuntu-24.04 steps: - name: Check out source repository - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: ref: ${{ github.event.pull_request.head.sha }} - name: Set up Python environment - uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: '3' - name: Install extra python packages @@ -219,7 +219,7 @@ jobs: runs-on: ubuntu-24.04 steps: - name: Check out source repository - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: ref: ${{ github.event.pull_request.head.sha }} fetch-depth: 0 diff --git a/.github/workflows/ossf-scorecard.yml b/.github/workflows/ossf-scorecard.yml index 2151192fcd1..6ce378da7da 100644 --- a/.github/workflows/ossf-scorecard.yml +++ b/.github/workflows/ossf-scorecard.yml @@ -33,7 +33,7 @@ jobs: steps: - name: "Checkout code" - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: persist-credentials: false @@ -71,6 +71,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard (optional). # Commenting out will disable upload of results to your repo's Code Scanning dashboard - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@cdefb33c0f6224e58673d9004f47f7cb3e328b89 # v4.31.10 + uses: github/codeql-action/upload-sarif@19b2f06db2b6f5108140aeb04014ef02b648f789 # v4.31.11 with: sarif_file: results.sarif diff --git a/.github/workflows/pr-metadata.yml b/.github/workflows/pr-metadata.yml index d563f31162e..edca6d12300 100644 --- a/.github/workflows/pr-metadata.yml +++ b/.github/workflows/pr-metadata.yml @@ -19,7 +19,7 @@ jobs: name: Report Jira data to PR comment steps: - name: Checkout - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: install jira run: python3 -m pip install jira - name: Load jira metadata diff --git a/.github/workflows/rpm-build-and-test.yml b/.github/workflows/rpm-build-and-test.yml index 00eb9b021a8..2483fbbaa7b 100644 --- a/.github/workflows/rpm-build-and-test.yml +++ b/.github/workflows/rpm-build-and-test.yml @@ -118,7 +118,7 @@ jobs: matrix: ${{ steps.matrix.outputs.text }} steps: - name: Checkout code - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: ref: ${{ github.event.pull_request.head.sha }} - name: Import commit pragmas @@ -244,7 +244,7 @@ jobs: COMMIT_STATUS_DISTRO_VERSION: steps: - name: Checkout code - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: submodules: recursive fetch-depth: 500 @@ -418,7 +418,7 @@ jobs: matrix: ${{ steps.matrix.outputs.text }} steps: - name: Checkout code - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: ref: ${{ github.event.pull_request.head.sha }} - name: Import commit pragmas @@ -528,7 +528,7 @@ jobs: SIZE: steps: - name: Checkout code - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: submodules: recursive fetch-depth: 500 diff --git a/.github/workflows/trivy.yml b/.github/workflows/trivy.yml index 0e5937aac62..4da0b87d1fb 100644 --- a/.github/workflows/trivy.yml +++ b/.github/workflows/trivy.yml @@ -33,7 +33,7 @@ jobs: security-events: write steps: - name: Checkout code - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Run Trivy vulnerability scanner in filesystem mode (table format) uses: aquasecurity/trivy-action@b6643a29fecd7f34b3597bc6acb0a98b03d33ff8 # 0.33.1 @@ -68,7 +68,7 @@ jobs: trivy-config: 'utils/trivy/trivy.yaml' - name: Upload Trivy scan results to GitHub Security tab - uses: github/codeql-action/upload-sarif@cdefb33c0f6224e58673d9004f47f7cb3e328b89 # v4.31.10 + uses: github/codeql-action/upload-sarif@19b2f06db2b6f5108140aeb04014ef02b648f789 # v4.31.11 with: sarif_file: 'trivy-results.sarif' diff --git a/.github/workflows/unit-testing.yml b/.github/workflows/unit-testing.yml index a2b5e2ddedf..ca7ed7a2733 100644 --- a/.github/workflows/unit-testing.yml +++ b/.github/workflows/unit-testing.yml @@ -15,7 +15,7 @@ jobs: runs-on: [self-hosted, docker] steps: - name: Checkout code - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: submodules: 'recursive' - name: Build deps in Docker From 5dd17be752a0c7cfddb2335e9758bea86e79030d Mon Sep 17 00:00:00 2001 From: Niu Yawei Date: Thu, 29 Jan 2026 09:14:40 +0800 Subject: [PATCH 159/253] DAOS-18495 object: use deep stack for IV involved ULT (#17422) Use deep stack for IV involved ULTs. Signed-off-by: Niu Yawei --- src/container/srv_container.c | 5 +++-- src/container/srv_target.c | 9 ++++----- src/object/srv_ec_aggregate.c | 3 ++- src/object/srv_obj.c | 3 ++- 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/container/srv_container.c b/src/container/srv_container.c index 578784d3d3c..c2d39f5ea1d 100644 --- a/src/container/srv_container.c +++ b/src/container/srv_container.c @@ -1,6 +1,6 @@ /* * (C) Copyright 2016-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -2266,7 +2266,8 @@ cont_svc_eph_track_leader_start(struct cont_svc *svc) D_ASSERT(svc->cs_cont_ephs_leader_req == NULL); uuid_clear(anonym_uuid); sched_req_attr_init(&attr, SCHED_REQ_ANONYM, &anonym_uuid); - svc->cs_cont_ephs_leader_req = sched_create_ult(&attr, cont_track_eph_leader_ult, svc, 0); + svc->cs_cont_ephs_leader_req = + sched_create_ult(&attr, cont_track_eph_leader_ult, svc, DSS_DEEP_STACK_SZ); if (svc->cs_cont_ephs_leader_req == NULL) { D_ERROR(DF_UUID" Failed to create EC leader eph ULT.\n", DP_UUID(svc->cs_pool_uuid)); diff --git a/src/container/srv_target.c b/src/container/srv_target.c index 3b8aedfa404..6bfce2f6397 100644 --- a/src/container/srv_target.c +++ b/src/container/srv_target.c @@ -1,7 +1,7 @@ /** * (C) Copyright 2016-2024 Intel Corporation. * (C) Copyright 2025 Google LLC - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -2358,10 +2358,9 @@ ds_cont_tgt_snapshot_notify_handler(crt_rpc_t *rpc) args.snap_opts = in->tsi_opts; args.oit_oid = in->tsi_oit_oid; - out->tso_rc = ds_pool_thread_collective(in->tsi_pool_uuid, - PO_COMP_ST_NEW | PO_COMP_ST_DOWN | - PO_COMP_ST_DOWNOUT, cont_snap_notify_one, - &args, 0); + out->tso_rc = ds_pool_thread_collective( + in->tsi_pool_uuid, PO_COMP_ST_NEW | PO_COMP_ST_DOWN | PO_COMP_ST_DOWNOUT, + cont_snap_notify_one, &args, DSS_ULT_DEEP_STACK); if (out->tso_rc != 0) D_ERROR(DF_CONT": Snapshot notify failed: "DF_RC"\n", DP_CONT(in->tsi_pool_uuid, in->tsi_cont_uuid), diff --git a/src/object/srv_ec_aggregate.c b/src/object/srv_ec_aggregate.c index 67d3c4aaee5..1baef1660a1 100644 --- a/src/object/srv_ec_aggregate.c +++ b/src/object/srv_ec_aggregate.c @@ -2687,7 +2687,8 @@ ec_agg_param_init(struct ds_cont_child *cont, struct agg_param *param) agg_param->ap_credits_max = EC_AGG_ITERATION_MAX; D_INIT_LIST_HEAD(&agg_param->ap_agg_entry.ae_cur_stripe.as_dextents); - rc = dss_ult_execute(ec_agg_init_ult, agg_param, NULL, NULL, DSS_XS_SYS, 0, 0); + rc = dss_ult_execute(ec_agg_init_ult, agg_param, NULL, NULL, DSS_XS_SYS, 0, + DSS_DEEP_STACK_SZ); if (rc != 0) D_GOTO(out, rc); diff --git a/src/object/srv_obj.c b/src/object/srv_obj.c index b08b8981dee..2ecd52f1a0f 100644 --- a/src/object/srv_obj.c +++ b/src/object/srv_obj.c @@ -1405,7 +1405,8 @@ obj_fetch_ec_agg_boundary(struct obj_io_context *ioc, daos_unit_oid_t *uoid) arg.eab_pool = ioc->ioc_coc->sc_pool->spc_pool; uuid_copy(arg.eab_co_uuid, ioc->ioc_coc->sc_uuid); - rc = dss_ult_execute(obj_fetch_ec_agg_boundary_ult, &arg, NULL, NULL, DSS_XS_SYS, 0, 0); + rc = dss_ult_execute(obj_fetch_ec_agg_boundary_ult, &arg, NULL, NULL, DSS_XS_SYS, 0, + DSS_DEEP_STACK_SZ); if (rc) { DL_ERROR(rc, DF_CONT ", " DF_UOID " fetch ec_agg_boundary failed.", DP_CONT(ioc->ioc_coc->sc_pool_uuid, ioc->ioc_coc->sc_uuid), From 9f5ac621e533cd958f810ed23c6c4e56af297471 Mon Sep 17 00:00:00 2001 From: Niu Yawei Date: Thu, 29 Jan 2026 14:44:33 +0800 Subject: [PATCH 160/253] DAOS-18488 sched: adjust default settings (#17416) - Change default sched_inactive_max from 40s to 5m - Print symbols when problematic ULT is detected Signed-off-by: Niu Yawei --- src/engine/sched.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/src/engine/sched.c b/src/engine/sched.c index 0cc388c3085..cea0d52ba4a 100644 --- a/src/engine/sched.c +++ b/src/engine/sched.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2016-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -179,7 +179,7 @@ unsigned int sched_relax_intvl = SCHED_RELAX_INTVL_DEFAULT; unsigned int sched_relax_mode; unsigned int sched_unit_runtime_max = 32; /* ms */ bool sched_watchdog_all; -unsigned int sched_inactive_max = 40000; /* ms */ +unsigned int sched_inactive_max = 300000; /* ms, 5 mins */ bool sched_monitor_kill = true; enum { @@ -2115,6 +2115,7 @@ sched_xs_monitor(struct dss_xstream *cur_dx) struct sched_info *info, *cur_info; struct sched_hist_seq *hist; unsigned int gap; + char **strings = NULL; int rc, i, inactive_tgt, inactive_id = -1; D_ASSERT(is_monitor_xs(cur_dx)); @@ -2154,6 +2155,7 @@ sched_xs_monitor(struct dss_xstream *cur_dx) inactive_id = dx->dx_xs_id; inactive_tgt = dx->dx_tgt_id; gap = cur_info->si_cur_ts - hist->sm_last_ts; + strings = backtrace_symbols(&info->si_ult_func, 1); break; } } @@ -2161,8 +2163,10 @@ sched_xs_monitor(struct dss_xstream *cur_dx) dss_sched_monitor_exit(); if (inactive_id >= 0) { - D_WARN("SCHED_MONITOR: xs %d (tgt:%d) is inactive for more than %u ms!\n", - inactive_id, inactive_tgt, gap); + D_WARN("SCHED_MONITOR: xs %d (tgt:%d) is inactive for more than %u ms! symbol:%s\n", + inactive_id, inactive_tgt, gap, strings != NULL ? strings[0] : NULL); + free(strings); + if (sched_monitor_kill) { D_ERROR("SCHED_MONITOR: Killing engine...\n"); rc = kill(getpid(), SIGKILL); @@ -2262,6 +2266,16 @@ sched_exec_time(uint64_t *msecs, const char *ult_name) return 0; } +static inline bool +sched_monitor_enabled(struct dss_xstream *dx) +{ + if (sched_inactive_max == 0) + return false; + + /* Monitor SYS & VOS xstreams only */ + return dx->dx_xs_id == 0 || dx->dx_main_xs; +} + static void sched_watchdog_prep(struct dss_xstream *dx, ABT_unit unit) { @@ -2270,10 +2284,12 @@ sched_watchdog_prep(struct dss_xstream *dx, ABT_unit unit) void (*thread_func)(void *); int rc; - if (!watchdog_enabled(dx)) + if (!watchdog_enabled(dx) && !sched_monitor_enabled(dx)) return; - info->si_ult_start = daos_getmtime_coarse(); + if (watchdog_enabled(dx)) + info->si_ult_start = daos_getmtime_coarse(); + rc = ABT_unit_get_thread(unit, &thread); D_ASSERT(rc == ABT_SUCCESS); rc = ABT_thread_get_thread_func(thread, &thread_func); From 11cc0313b1886fb3424cc0be6822849661f6359a Mon Sep 17 00:00:00 2001 From: Jan Michalski Date: Thu, 29 Jan 2026 15:43:46 +0000 Subject: [PATCH 161/253] SRE-3562 ci: update MLNX_VER_NUM to 3.2.1 (#17463) Required for EL9.7 Signed-off-by: Jan Michalski --- ci/provisioning/post_provision_config.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/provisioning/post_provision_config.sh b/ci/provisioning/post_provision_config.sh index 0e98332a4d0..345d38b336f 100755 --- a/ci/provisioning/post_provision_config.sh +++ b/ci/provisioning/post_provision_config.sh @@ -1,7 +1,7 @@ #!/bin/bash # # Copyright 2020-2023 Intel Corporation. -# Copyright 2025 Hewlett Packard Enterprise Development LP +# Copyright 2025-2026 Hewlett Packard Enterprise Development LP # # SPDX-License-Identifier: BSD-2-Clause-Patent # @@ -29,7 +29,7 @@ source ci/junit.sh # Before running the script, environment variables may be needed for # the specific site. -: "${MLNX_VER_NUM:=3.0.0}" +: "${MLNX_VER_NUM:=3.2.1}" # This is tangled and needs a better fix as it has DISTRO being passed # as EL_8 for EL_9, yet other places expect DISTRO to really be EL_8 and From 8080040ec87594592a06dba5b126e5fbcc43e307 Mon Sep 17 00:00:00 2001 From: Makito Kano Date: Fri, 30 Jan 2026 00:59:13 +0900 Subject: [PATCH 162/253] =?UTF-8?q?DAOS-18402=20test:=20Fix=20test=5Flost?= =?UTF-8?q?=5Fmajority=5Fps=5Freplicas=20to=20make=20sure=20it=20r?= =?UTF-8?q?=E2=80=A6=20(#17339)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, the algorithm removes rbd-pool only from /mnt/daos0/. rdb-pool is created in 3 out of 4 ranks randomly. Thus, if it’s not created in /mnt/daos0/ of one of the nodes, the test will only remove one rdb-pool and the test will fail. Fix it so that it removes two rdb-pool. Signed-off-by: Makito Kano --- .../ftest/recovery/pool_list_consolidation.py | 87 +++++++++++-------- 1 file changed, 49 insertions(+), 38 deletions(-) diff --git a/src/tests/ftest/recovery/pool_list_consolidation.py b/src/tests/ftest/recovery/pool_list_consolidation.py index 41be76bd5cf..0a82463eb49 100644 --- a/src/tests/ftest/recovery/pool_list_consolidation.py +++ b/src/tests/ftest/recovery/pool_list_consolidation.py @@ -1,6 +1,6 @@ """ (C) Copyright 2024 Intel Corporation. - (C) Copyright 2025 Hewlett Packard Enterprise Development LP + (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -265,13 +265,12 @@ def test_orphan_pool_trust_ms(self): def test_lost_majority_ps_replicas(self): """Test lost the majority of PS replicas. - 1. Create a pool with --nsvc=3. Rank 0, 1, and 2 will be pool service replicas. + 1. Create a pool with --nsvc=3. There will be three ranks with rdb-pool. 2. Stop servers. - 3. Remove //rdb-pool from rank 0 and 2. - 4. Start servers. - 5. Run DAOS checker under kinds of mode. - 6. Try creating a container. The pool can be started now, so create should succeed. - 7. Show that rdb-pool are recovered. i.e., at least three out of four ranks + 3. Remove //rdb-pool from two ranks. + 4. Run DAOS checker under kinds of mode. + 5. Try creating a container. The pool can be started now, so create should succeed. + 6. Show that rdb-pool are recovered. i.e., at least three out of four ranks should have rdb-pool. Jira ID: DAOS-12029 @@ -281,7 +280,17 @@ def test_lost_majority_ps_replicas(self): :avocado: tags=recovery,cat_recov,pool_list_consolidation :avocado: tags=PoolListConsolidationTest,test_lost_majority_ps_replicas """ + if self.server_managers[0].manager.job.using_control_metadata: + self.log.info("MD-on-SSD cluster. It will be supported later.") + self.cancelForTicket('DAOS-18395') + self.log_step("Create a pool with --nsvc=3.") + # We can generalize this test more. For example, use + # svcn = self.server_managers[0].engines - 1 + # Then remove (svcn / 2 + 1) count of rdb-pool, etc. However, I don't think it's + # necessary to increase the number of servers for this test. Also, I'm not sure + # if --nsvc > 3 will work. Thus, we keep the numbers hard-coded to make the code + # simple. pool = self.get_pool(svcn=3) self.log_step("Stop servers") @@ -289,31 +298,31 @@ def test_lost_majority_ps_replicas(self): dmg_command.system_stop() self.log_step("Remove //rdb-pool from two ranks.") - rdb_pool_path = f"{self.server_managers[0].get_vos_path(pool)}/rdb-pool" - command = f"sudo rm {rdb_pool_path}" + rdb_pool_paths = [] + for engine_params in self.server_managers[0].manager.job.yaml.engine_params: + scm_mount = engine_params.get_value('scm_mount') + rdb_pool_path = f"{scm_mount}/{pool.uuid.lower()}/rdb-pool" + rdb_pool_paths.append(rdb_pool_path) + self.log.info("rdb_pool_paths = %s", rdb_pool_paths) hosts = list(set(self.server_managers[0].ranks.values())) count = 0 + # Iterate both pool mount points of both ranks. I.e., 4 ranks total. for host in hosts: - node = NodeSet(host) - check_out = check_file_exists(hosts=node, filename=rdb_pool_path, sudo=True) - if check_out[0]: - if not run_remote(log=self.log, hosts=node, command=command).passed: - self.fail(f'Failed to remove {rdb_pool_path} on {host}') - self.log.info("rm rdb-pool from %s", str(node)) - count += 1 - if count > 1: - break - using_control_metadata = self.server_managers[0].manager.job.using_control_metadata - if count == 0 or using_control_metadata: - msg = ("MD-on-SSD cluster. Contents under mount point are removed by control plane " - "after system stop.") - self.log.info(msg) - dmg_command.system_start() - # return results in PASS. - return - - self.log_step("Start servers.") - dmg_command.system_start() + for rdb_pool_path in rdb_pool_paths: + node = NodeSet(host) + check_out = check_file_exists( + hosts=node, filename=rdb_pool_path, sudo=True) + if check_out[0]: + command = f"rm {rdb_pool_path}" + command_root = command_as_user(command=command, user="root") + if not run_remote(log=self.log, hosts=node, command=command_root).passed: + self.fail(f'Failed to remove {rdb_pool_path} on {host}') + self.log.info("Remove %s from %s", rdb_pool_path, str(node)) + count += 1 + if count == 2: + break + if count == 2: + break self.log_step("Run DAOS checker under kinds of mode.") errors = [] @@ -329,27 +338,29 @@ def test_lost_majority_ps_replicas(self): cont_create_success = True break except TestFail as error: - msg = f"## Container create failed after running checker! error = {error}" + msg = f"Container create failed after running checker! error = {error}" self.log.debug(msg) if not cont_create_success: errors.append("Container create failed after running checker!") - msg = ("Show that rdb-pool are recovered. i.e., at least three out of four ranks should " + msg = ("Show that rdb-pool are recovered. i.e., three out of four ranks should " "have rdb-pool.") self.log_step(msg) hosts = list(set(self.server_managers[0].ranks.values())) count = 0 for host in hosts: - node = NodeSet(host) - check_out = check_file_exists(hosts=node, filename=rdb_pool_path, sudo=True) - if check_out[0]: - count += 1 - self.log.info("rdb-pool found at %s", str(node)) + for rdb_pool_path in rdb_pool_paths: + node = NodeSet(host) + check_out = check_file_exists( + hosts=node, filename=rdb_pool_path, sudo=True) + if check_out[0]: + count += 1 + self.log.info("rdb-pool found at %s: %s", str(node), rdb_pool_path) self.log.info("rdb-pool count = %d", count) - if count < len(hosts) - 1: - errors.append(f"Not enough rdb-pool has been recovered! - {count} ranks") + if count != 3: + errors.append(f"Unexpected number of rdb-pool after repair! - {count} ranks") report_errors(test=self, errors=errors) From 22397f3573a48c657eebaaf9523d5d0ac601f121 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 29 Jan 2026 08:03:47 -0800 Subject: [PATCH 163/253] DAOS-18530 java: bump assertj-core from 3.19.0 to 3.27.7 (#17455) Bumps [org.assertj:assertj-core](https://github.com/assertj/assertj) from 3.19.0 to 3.27.7. Signed-off-by: dependabot[bot] --- src/client/java/hadoop-daos/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/client/java/hadoop-daos/pom.xml b/src/client/java/hadoop-daos/pom.xml index 28576e3ec63..98fa7f00fc9 100644 --- a/src/client/java/hadoop-daos/pom.xml +++ b/src/client/java/hadoop-daos/pom.xml @@ -123,7 +123,7 @@ org.assertj assertj-core - 3.19.0 + 3.27.7 test From c2d11296c7ee81723930f4d78cadd3a1794f8a23 Mon Sep 17 00:00:00 2001 From: Ken Cain Date: Thu, 29 Jan 2026 13:22:26 -0500 Subject: [PATCH 164/253] DAOS-15993 rebuild: for manual rebuilds do not eval self_heal (#17345) Consider a quick maintenance scenario in which a daos_engine is stopped briefly, and the administrator does not wish to have the DAOS automatic recovery / rebuild mechanism occur. That is, a pool map update (targets from UP_IN to DOWN) is to occur, the pool to enter a degraded mode (still allowing ongoing I/O), and NO rebuild to be triggered during this brief time window. The above can be arranged by modifying the system or pool-specific self_heal property value (to not set the rebuild bit), and then stopping the engine. Now also consider the conclusion of the maintenance that involes re-starting the engine, and reintegrating that rank back into the pool. It is most convenient to directly issue a dmg pool reintegrate command from the maintenance state. Before this change, manual administration commands such as dmg pool exclude/reintegrate were prevented from triggering rebuilds due to the pool self_heal property setting. However, the intention of the self_heal (aka auto recovery) feature is to only apply to automatic rebuilds. With this change, the is_pool_rebuild_allowed() function is updated to accept an indication of whether the self_heal checks are applicable. Manual pool map update and rebuild cases supply false for this argument (allowing those cases to result in a rebuild being scheduled). Signed-off-by: Kenneth Cain --- src/include/daos_srv/pool.h | 13 ++++++------ src/include/daos_srv/rebuild.h | 4 ++-- src/pool/srv_pool.c | 35 +++++++++++++++++-------------- src/rebuild/srv.c | 18 ++++++++-------- src/tests/suite/daos_degrade_ec.c | 16 ++++++++++++++ src/tests/suite/daos_degraded.c | 6 ++++++ 6 files changed, 59 insertions(+), 33 deletions(-) diff --git a/src/include/daos_srv/pool.h b/src/include/daos_srv/pool.h index 9ad67d3e170..147e4bb3fc1 100644 --- a/src/include/daos_srv/pool.h +++ b/src/include/daos_srv/pool.h @@ -567,18 +567,19 @@ int ds_pool_prop_recov_cont_reset(struct rdb_tx *tx, struct ds_rsvc *rsvc); static inline bool -is_pool_rebuild_allowed(struct ds_pool *pool, bool check_delayed_rebuild) +is_pool_rebuild_allowed(struct ds_pool *pool, uint64_t self_heal, bool auto_recovery) { - uint64_t flags = DAOS_SELF_HEAL_AUTO_REBUILD; - - if (check_delayed_rebuild) - flags |= DAOS_SELF_HEAL_DELAY_REBUILD; + bool auto_rebuild_enabled = self_heal & DAOS_SELF_HEAL_AUTO_REBUILD; + bool delay_rebuild_enabled = self_heal & DAOS_SELF_HEAL_DELAY_REBUILD; if (pool->sp_disable_rebuild) return false; - if (!(pool->sp_self_heal & flags)) + + /* If auto recovery is requested, only allow if self_heal enables auto or delay_rebuild */ + if (auto_recovery && !(auto_rebuild_enabled || delay_rebuild_enabled)) return false; + /* Otherwise, rebuild is allowed */ return true; } diff --git a/src/include/daos_srv/rebuild.h b/src/include/daos_srv/rebuild.h index b513d04c21a..a161ba8d5ad 100644 --- a/src/include/daos_srv/rebuild.h +++ b/src/include/daos_srv/rebuild.h @@ -1,6 +1,6 @@ /** * (C) Copyright 2017-2023 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -96,7 +96,7 @@ void ds_rebuild_running_query(uuid_t pool_uuid, uint32_t opc, uint32_t *rebuild_ daos_epoch_t *current_eph, uint32_t *rebuild_gen); int ds_rebuild_regenerate_task(struct ds_pool *pool, daos_prop_t *prop, uint64_t sys_self_heal, - uint64_t delay_sec); + bool auto_recovery, uint64_t delay_sec); void ds_rebuild_leader_stop_all(void); void ds_rebuild_abort(uuid_t pool_uuid, unsigned int version, uint32_t rebuild_gen, uint64_t term); diff --git a/src/pool/srv_pool.c b/src/pool/srv_pool.c index c31c3358914..f3c7e62ddfc 100644 --- a/src/pool/srv_pool.c +++ b/src/pool/srv_pool.c @@ -1,6 +1,6 @@ /* * (C) Copyright 2016-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * (C) Copyright 2025 Google LLC * * SPDX-License-Identifier: BSD-2-Clause-Patent @@ -2577,7 +2577,8 @@ pool_svc_step_up_cb(struct ds_rsvc *rsvc) if (rc != 0) goto out; - rc = ds_rebuild_regenerate_task(svc->ps_pool, prop, sys_self_heal, 0); + rc = ds_rebuild_regenerate_task(svc->ps_pool, prop, sys_self_heal, true /* auto_recovery */, + 0 /* delay_sec */); if (rc != 0) goto out; @@ -7746,23 +7747,24 @@ pool_svc_update_map(struct pool_svc *svc, crt_opcode_t opc, bool exclude_rank, struct pool_target_addr_list *inval_list_out, uint32_t *map_version, struct rsvc_hint *hint, enum map_update_source src, uint32_t flags) { - struct pool_target_id_list target_list = {0}; - uint32_t tgt_map_ver = 0; - bool updated; - int rc; - char *env; - daos_epoch_t rebuild_eph = d_hlc_get(); - uint64_t delay = 2; - bool sys_self_heal_applicable; - uint64_t sys_self_heal = 0; + struct pool_target_id_list target_list = {0}; + uint32_t tgt_map_ver = 0; + bool updated; + int rc; + char *env; + daos_epoch_t rebuild_eph = d_hlc_get(); + uint64_t delay = 2; + bool auto_recovery; + uint64_t sys_self_heal = 0; /* - * The system self-heal policy only applies to automatic pool exclude + * The pool and system self-heal policies only apply to automatic pool exclude * and rebuild operations. */ - sys_self_heal_applicable = (opc == MAP_EXCLUDE && src == MUS_SWIM); + auto_recovery = (opc == MAP_EXCLUDE && src == MUS_SWIM); - if (sys_self_heal_applicable) { + /* If applicable, check system self-heal policy. */ + if (auto_recovery) { rc = ds_mgmt_get_self_heal_policy(pool_svc_abort_gshp, svc, &sys_self_heal); if (rc != 0) { DL_ERROR(rc, DF_UUID ": failed to get self-heal policy", @@ -7784,6 +7786,7 @@ pool_svc_update_map(struct pool_svc *svc, crt_opcode_t opc, bool exclude_rank, } } + /* Pool self-heal policy is checked in this call. */ rc = pool_svc_update_map_internal(svc, opc, exclude_rank, extend_rank_list, extend_domains_nr, extend_domains, &target_list, list, hint, &updated, map_version, &tgt_map_ver, inval_list_out, @@ -7804,14 +7807,14 @@ pool_svc_update_map(struct pool_svc *svc, crt_opcode_t opc, bool exclude_rank, } d_freeenv_str(&env); - if (sys_self_heal_applicable && !(sys_self_heal & DS_MGMT_SELF_HEAL_POOL_REBUILD)) { + if (auto_recovery && !(sys_self_heal & DS_MGMT_SELF_HEAL_POOL_REBUILD)) { D_DEBUG(DB_MD, DF_UUID ": pool_rebuild disabled in system property self_heal\n", DP_UUID(svc->ps_uuid)); rc = 0; goto out; } - if (!is_pool_rebuild_allowed(svc->ps_pool, true)) { + if (!is_pool_rebuild_allowed(svc->ps_pool, svc->ps_pool->sp_self_heal, auto_recovery)) { D_DEBUG(DB_MD, DF_UUID ": rebuild disabled for pool\n", DP_UUID(svc->ps_pool->sp_uuid)); D_GOTO(out, rc); diff --git a/src/rebuild/srv.c b/src/rebuild/srv.c index 5dad600030e..24598ab68cf 100644 --- a/src/rebuild/srv.c +++ b/src/rebuild/srv.c @@ -2614,11 +2614,12 @@ regenerate_task_of_type(struct ds_pool *pool, pool_comp_state_t match_states, ui return rc; } - -/* Regenerate the rebuild tasks when changing the leader. */ +/* Regenerate rebuild tasks when changing the leader, or manually starting rebuilds. + * auto_recovery (true for leader change, false for manual) applies to both sys_self_heal and prop. + */ int ds_rebuild_regenerate_task(struct ds_pool *pool, daos_prop_t *prop, uint64_t sys_self_heal, - uint64_t delay_sec) + bool auto_recovery, uint64_t delay_sec) { struct daos_prop_entry *entry; char *env; @@ -2626,7 +2627,7 @@ ds_rebuild_regenerate_task(struct ds_pool *pool, daos_prop_t *prop, uint64_t sys rebuild_gst.rg_abort = 0; - if (!(sys_self_heal & DS_MGMT_SELF_HEAL_POOL_REBUILD)) { + if (auto_recovery && !(sys_self_heal & DS_MGMT_SELF_HEAL_POOL_REBUILD)) { D_DEBUG(DB_REBUILD, DF_UUID ": pool_rebuild disabled in sys_self_heal\n", DP_UUID(pool->sp_uuid)); return DER_SUCCESS; @@ -2648,10 +2649,8 @@ ds_rebuild_regenerate_task(struct ds_pool *pool, daos_prop_t *prop, uint64_t sys } entry = daos_prop_entry_get(prop, DAOS_PROP_PO_SELF_HEAL); - D_ASSERT(entry != NULL); - if (entry->dpe_val & (DAOS_SELF_HEAL_AUTO_REBUILD | DAOS_SELF_HEAL_DELAY_REBUILD) && - !pool->sp_disable_rebuild) { + if (is_pool_rebuild_allowed(pool, entry->dpe_val /* self_heal */, auto_recovery)) { rc = regenerate_task_of_type( pool, PO_COMP_ST_DOWN, entry->dpe_val & DAOS_SELF_HEAL_DELAY_REBUILD ? -1 : delay_sec); @@ -2662,7 +2661,7 @@ ds_rebuild_regenerate_task(struct ds_pool *pool, daos_prop_t *prop, uint64_t sys if (rc != 0) return rc; } else { - D_DEBUG(DB_REBUILD, DF_UUID" self healing is disabled\n", + D_DEBUG(DB_REBUILD, "Pool " DF_UUID " self healing is disabled\n", DP_UUID(pool->sp_uuid)); } @@ -2699,7 +2698,8 @@ ds_rebuild_admin_start(struct ds_pool *pool) goto out; } - rc = ds_rebuild_regenerate_task(pool, &prop, DS_MGMT_SELF_HEAL_ALL, 0); + rc = ds_rebuild_regenerate_task(pool, &prop, DS_MGMT_SELF_HEAL_ALL /* sys_self_heal */, + false /* auto_recovery */, 0 /* delay_sec */); daos_prop_fini(&prop); if (rc) DL_ERROR(rc, DF_UUID ": regenerate rebuild task failed", DP_UUID(pool->sp_uuid)); diff --git a/src/tests/suite/daos_degrade_ec.c b/src/tests/suite/daos_degrade_ec.c index c22204a62d5..09a68609f64 100644 --- a/src/tests/suite/daos_degrade_ec.c +++ b/src/tests/suite/daos_degrade_ec.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2016-2023 Intel Corporation. + * (C) Copyright 2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -38,6 +39,11 @@ degrade_small_sub_setup(void **state) arg = *state; arg->no_rebuild = 1; + + /* Disable manual rebuilds */ + test_set_engine_fail_loc(arg, CRT_NO_RANK, DAOS_REBUILD_DISABLE | DAOS_FAIL_ALWAYS); + + /* Disable automatic rebuilds */ rc = daos_pool_set_prop(arg->pool.pool_uuid, "self_heal", "exclude"); return rc; @@ -56,6 +62,11 @@ degrade_sub_setup(void **state) arg = *state; arg->no_rebuild = 1; + + /* Disable manual rebuilds */ + test_set_engine_fail_loc(arg, CRT_NO_RANK, DAOS_REBUILD_DISABLE | DAOS_FAIL_ALWAYS); + + /* Disable automatic rebuilds */ rc = daos_pool_set_prop(arg->pool.pool_uuid, "self_heal", "exclude"); return rc; @@ -74,6 +85,11 @@ degrade_sub_rf1_setup(void **state) arg = *state; arg->no_rebuild = 1; + + /* Disable manual rebuilds */ + test_set_engine_fail_loc(arg, CRT_NO_RANK, DAOS_REBUILD_DISABLE | DAOS_FAIL_ALWAYS); + + /* Disable automatic rebuilds */ rc = daos_pool_set_prop(arg->pool.pool_uuid, "self_heal", "exclude"); return rc; diff --git a/src/tests/suite/daos_degraded.c b/src/tests/suite/daos_degraded.c index c1328553abb..c26ae442b1f 100644 --- a/src/tests/suite/daos_degraded.c +++ b/src/tests/suite/daos_degraded.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2016-2023 Intel Corporation. + * (C) Copyright 2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -258,6 +259,11 @@ degraded_setup(void **state) arg = *state; arg->no_rebuild = 1; + + /* Disable manual rebuilds */ + test_set_engine_fail_loc(arg, CRT_NO_RANK, DAOS_REBUILD_DISABLE | DAOS_FAIL_ALWAYS); + + /* Disable automatic rebuilds */ rc = daos_pool_set_prop(arg->pool.pool_uuid, "self_heal", "exclude"); return rc; From f59cba323c45cec1f353d005d7e07afabd806a12 Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Fri, 30 Jan 2026 10:31:41 -0500 Subject: [PATCH 165/253] DAOS-18519 test: Update test pool size for new defaults (#17477) The default BS cluster size was increased from 32MB -> 128MB in MD on SSD mode. Updating the test pool sizes to work with this new default. Signed-off-by: Phil Henderson --- src/tests/ftest/aggregation/continuous_write.yaml | 2 +- src/tests/ftest/datamover/obj_large_posix.yaml | 2 +- src/tests/ftest/datamover/serial_large_posix.yaml | 2 +- src/tests/ftest/deployment/io_sys_admin.yaml | 2 +- src/tests/ftest/fault_injection/pool.yaml | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/tests/ftest/aggregation/continuous_write.yaml b/src/tests/ftest/aggregation/continuous_write.yaml index 713359c460f..56ff036a86f 100644 --- a/src/tests/ftest/aggregation/continuous_write.yaml +++ b/src/tests/ftest/aggregation/continuous_write.yaml @@ -15,7 +15,7 @@ server_config: storage: auto pool: - scm_size: 100MB + scm_size: 128MB nvme_size: 1GiB container: diff --git a/src/tests/ftest/datamover/obj_large_posix.yaml b/src/tests/ftest/datamover/obj_large_posix.yaml index b15370b6e65..245883e765b 100644 --- a/src/tests/ftest/datamover/obj_large_posix.yaml +++ b/src/tests/ftest/datamover/obj_large_posix.yaml @@ -22,7 +22,7 @@ server_config: storage: auto pool: - size: 60G + size: 100G container: type: POSIX diff --git a/src/tests/ftest/datamover/serial_large_posix.yaml b/src/tests/ftest/datamover/serial_large_posix.yaml index 854317d19c0..2f587af3b46 100644 --- a/src/tests/ftest/datamover/serial_large_posix.yaml +++ b/src/tests/ftest/datamover/serial_large_posix.yaml @@ -22,7 +22,7 @@ server_config: storage: auto pool: - size: 60G + size: 100G container: type: POSIX diff --git a/src/tests/ftest/deployment/io_sys_admin.yaml b/src/tests/ftest/deployment/io_sys_admin.yaml index 003b72bfe40..ce1a0b36667 100644 --- a/src/tests/ftest/deployment/io_sys_admin.yaml +++ b/src/tests/ftest/deployment/io_sys_admin.yaml @@ -25,7 +25,7 @@ dmg: storage_sub_command: scan pool_1: - scm_size: 256MiB + scm_size: 2GiB nvme_size: 16GiB pool_2: diff --git a/src/tests/ftest/fault_injection/pool.yaml b/src/tests/ftest/fault_injection/pool.yaml index a9a04d63ad7..dc31c0bc6f5 100644 --- a/src/tests/ftest/fault_injection/pool.yaml +++ b/src/tests/ftest/fault_injection/pool.yaml @@ -22,7 +22,7 @@ server_config: storage: auto pool: - size: 20G + size: 40G nranks: 4 rebuild_timeout: 120 pool_query_timeout: 60 From bdfdf732e189f04bd5fb6af9b18791e254a7fd69 Mon Sep 17 00:00:00 2001 From: Nasf-Fan Date: Sun, 1 Feb 2026 20:26:17 +0800 Subject: [PATCH 166/253] DAOS-18238 chk: handle CRT_EVS_GRPMOD event from CaRT PG (#17459) To guarantee that the rank deatch event will not be omitted, related CR logic needs handle the event from both SWIM and CaRT PG, although there will be a lot of useless event callback. Signed-off-by: Fan Yong --- src/chk/chk_iv.c | 8 +++++++- src/chk/chk_leader.c | 41 ++++++++++++++++++++++++++++++++++++++--- 2 files changed, 45 insertions(+), 4 deletions(-) diff --git a/src/chk/chk_iv.c b/src/chk/chk_iv.c index 712fd474c21..467648520bb 100644 --- a/src/chk/chk_iv.c +++ b/src/chk/chk_iv.c @@ -288,10 +288,16 @@ chk_iv_update(struct chk_instance *ins, struct chk_iv *iv, uint32_t shortcut, ui /* Wait chk_deak_rank_ult to sync the IV namespace. */ while (ver == ins->ci_ns_ver && ins->ci_skip_oog == 0 && ins->ci_pause == 0) { dss_sleep(500); - if (++wait_cnt % 40 == 0) + if (++wait_cnt % 40 == 0) { D_WARN("CHK iv " DF_X64 "/" DF_X64 " is blocked because of DER_OOG " "for %d seconds.\n", iv->ci_gen, iv->ci_seq, wait_cnt / 2); + /* + * Let's retry IV in case of related dead rank recovered back before + * being handled by chk_dead_rank_ult, although it is rare. + */ + break; + } } if (ins->ci_pause || ins->ci_skip_oog) diff --git a/src/chk/chk_leader.c b/src/chk/chk_leader.c index 3c381346663..31cfab811f6 100644 --- a/src/chk/chk_leader.c +++ b/src/chk/chk_leader.c @@ -3326,10 +3326,17 @@ chk_leader_query(int pool_nr, uuid_t pools[], chk_query_head_cb_t head_cb, while (ver == ins->ci_ns_ver && ins->ci_skip_oog == 0 && ins->ci_pause == 0) { dss_sleep(500); - if (++wait_cnt % 40 == 0) + if (++wait_cnt % 40 == 0) { D_WARN("Leader (" DF_X64 ") query is blocked because of %d for " "about %d seconds.\n", gen, rc, wait_cnt / 2); + /* + * Let's retry query in case of related dead rank recovered back + * before being handled by chk_dead_rank_ult, although it is rare. + */ + break; + } + if (rc != -DER_OOG) break; } @@ -3849,7 +3856,7 @@ chk_rank_event_cb(d_rank_t rank, uint64_t incarnation, enum crt_event_source src /* Ignore the event that is not applicable to current rank. */ - if (src != CRT_EVS_SWIM) + if (src != CRT_EVS_SWIM && src != CRT_EVS_GRPMOD) D_GOTO(out, rc = -DER_NOTAPPLICABLE); if (type != CRT_EVT_DEAD && type != CRT_EVT_ALIVE) @@ -3861,14 +3868,37 @@ chk_rank_event_cb(d_rank_t rank, uint64_t incarnation, enum crt_event_source src D_GOTO(out, rc = -DER_NOMEM); cdr->cdr_rank = rank; + } else if (d_list_empty(&ins->ci_dead_ranks)) { + D_GOTO(out, rc = -DER_NOTAPPLICABLE); } ABT_mutex_lock(ins->ci_abt_mutex); if (cdr != NULL) { + struct chk_dead_rank *tmp; + /* - * The event may be triggered on non-system SX. Let's notify the leader scheduler + * The event may be triggered on non-system SX (SWIM). Let's ask chk_dead_rank_ult * to handle that on system XS. + * + * The callback for one rank dead event maybe triggered twice from multiple source: + * SWIM and PG memberskip changes. Let's only add once into the ins->ci_dead_ranks. + * + * Generally, ins->ci_dead_ranks is very short. Then it is very fast to go through + * the whole list. */ + d_list_for_each_entry(tmp, &ins->ci_dead_ranks, cdr_link) { + if (tmp->cdr_rank == rank) { + /* Repeated one, ignore it. */ + D_FREE(cdr); + D_GOTO(unlock, rc = -DER_NOTAPPLICABLE); + } + + if (tmp->cdr_rank > rank) { + d_list_add(&cdr->cdr_link, &tmp->cdr_link); + D_GOTO(unlock, rc = 0); + } + } + d_list_add_tail(&cdr->cdr_link, &ins->ci_dead_ranks); } else { /* Remove former non-handled dead rank from the list. */ @@ -3878,8 +3908,13 @@ chk_rank_event_cb(d_rank_t rank, uint64_t incarnation, enum crt_event_source src D_FREE(cdr); break; } + + if (cdr->cdr_rank > rank) + D_GOTO(unlock, rc = -DER_NOTAPPLICABLE); } } + +unlock: ABT_mutex_unlock(ins->ci_abt_mutex); out: From eea376fbd39d90523d31d00f0be9a67d8e34c136 Mon Sep 17 00:00:00 2001 From: "John E. Malmberg" Date: Mon, 2 Feb 2026 10:03:27 -0600 Subject: [PATCH 167/253] SRE-3404 Hardware pre-checks, Leap fixes (#17254) Use Mellanox script for all distros as part of enabling tests on Leap/SLES. Jenkinsfile: rpm_test_post(): String node can be a comma delimited list of nodes. Must only want the first node in that case. ci/functional/test_main_prep_node.sh: Handle udev rules for ib_xx named devices. Handle that PMEM may not be present. ci/junit.sh: Fix a syntax error that sometimes generate log noise. ci/provisioning/post_provision_config_nodes.sh: Remove hack to limit NVMe devices used. ci/provisioning/post_provision_config_common_functions.sh ci/provisioning/post_provision_config_nodes_EL.sh: Move the install_mofed function to common functions and clean it up. src/tests/ftest/scripts/main.sh: Minor Spelling fixes. ci/storage/test_main_storage_prepare_node.sh: Fix to not fail on the test controller node with no PMEM. Signed-off-by: John E. Malmberg --- Jenkinsfile | 12 +- ci/functional/test_main_prep_node.sh | 213 ++++++++++-------- ci/junit.sh | 2 +- .../post_provision_config_common_functions.sh | 31 ++- .../post_provision_config_nodes.sh | 75 +----- .../post_provision_config_nodes_EL.sh | 65 +----- .../post_provision_config_nodes_LEAP.sh | 2 +- ci/storage/test_main_storage_prepare_node.sh | 25 +- src/tests/ftest/scripts/main.sh | 8 +- 9 files changed, 176 insertions(+), 257 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 756892b901d..8c52a25ae2a 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -5,7 +5,7 @@ /* groovylint-disable ParameterName, VariableName */ /* Copyright 2019-2024 Intel Corporation /* Copyright 2025 Google LLC - * Copyright 2025 Hewlett Packard Enterprise Development LP + * Copyright 2025-2026 Hewlett Packard Enterprise Development LP * All rights reserved. * * This file is part of the DAOS Project. It is subject to the license terms @@ -152,9 +152,12 @@ String vm9_label(String distro) { } void rpm_test_post(String stageName, String node) { + // Extract first node from comma-delimited list + String firstNode = node.split(',')[0].trim() sh label: 'Fetch and stage artifacts', - script: 'hostname; ssh -i ci_key jenkins@' + node + ' ls -ltar /tmp; mkdir -p "' + env.STAGE_NAME + '/" && ' + - 'scp -i ci_key jenkins@' + node + + script: 'hostname; ssh -i ci_key jenkins@' + firstNode + + ' ls -ltar /tmp; mkdir -p "' + env.STAGE_NAME + '/" && ' + + 'scp -i ci_key jenkins@' + firstNode + ':/tmp/{{suite_dmg,daos_{server_helper,{control,agent}}}.log,daos_server.log.*} "' + stageName + '/"' archiveArtifacts artifacts: env.STAGE_NAME + '/**' @@ -591,7 +594,6 @@ pipeline { ' --build-arg DAOS_KEEP_SRC=yes ' + ' --build-arg REPOS="' + prRepos() + '"' + ' --build-arg POINT_RELEASE=.6 ' - } } steps { @@ -644,7 +646,6 @@ pipeline { ' --build-arg DAOS_KEEP_SRC=yes ' + " -t ${sanitized_JOB_NAME()}-leap15" + ' --build-arg POINT_RELEASE=.5 ' - } } steps { @@ -695,7 +696,6 @@ pipeline { ' --build-arg DAOS_PACKAGES_BUILD=no ' + ' --build-arg COMPILER=icc' + ' --build-arg POINT_RELEASE=.5 ' - } } steps { diff --git a/ci/functional/test_main_prep_node.sh b/ci/functional/test_main_prep_node.sh index fec054efb54..71ca727af1a 100755 --- a/ci/functional/test_main_prep_node.sh +++ b/ci/functional/test_main_prep_node.sh @@ -1,7 +1,7 @@ #!/bin/bash # # Copyright 2020-2023 Intel Corporation. -# Copyright 2025 Hewlett Packard Enterprise Development LP +# Copyright 2025-2026 Hewlett Packard Enterprise Development LP # # SPDX-License-Identifier: BSD-2-Clause-Patent # @@ -16,6 +16,9 @@ domain1="${JENKINS_URL#https://}" mail_domain="${domain1%%/*}" : "${EMAIL_DOMAIN:=$mail_domain}" : "${DAOS_DEVOPS_EMAIL:="$HOSTNAME"@"$EMAIL_DOMAIN"}" +: "${DAOS_INFINIBAND:=}" +: "${DAOS_PMEM:=0}" +: "${DAOS_NVME:=0}" result=0 mail_message='' @@ -58,7 +61,7 @@ if ! command -v lspci; then if command -v dnf; then dnf -y install pciutils else - echo "pciutils not installed, can not test for Infiniband devices" + echo "pciutils not installed, can not test for hardware devices" fi fi @@ -106,17 +109,27 @@ The Omni-Path adapters will not be used." mail_message+="${nl}${ib_message}${nl}" echo "$ib_message" fi +if [ -z "$DAOS_INFINIBAND" ]; then + DAOS_INFINIBAND=$ib_count +fi set -x # Wait for at least the expected IB devices to show up. -# in the case of dual port HBAs, not all IB devices will -# show up. +# in the case of dual port HBAs, only the ports that are connected may show up. # For some unknown reason, sometimes IB devices will not show up # except in the lspci output unless an ip link set up command for # at least one device that should be present shows up. good_ibs=() function do_wait_for_ib { - local ib_devs=("$@") + # The problem is that we do not know the actual device names + # ahead of time. So we try to bring up all possible devices + # and see if at least the expected number show up with IP + # addresses. + local ib_devs=("ib0" "ib1" "ib2" "ib3" "ib4") + # Udev rule convention, first digit is the numa node + # second digit should be an index of the HBA on that numa node. + ib_devs+=("ib_00" "ib_01" "ib_02" "ib_03") + ib_devs+=("ib_10" "ib_11" "ib_12" "ib_13") local working_ib ib_timeout=300 # 5 minutes retry_wait=10 # seconds @@ -147,15 +160,14 @@ function do_wait_for_ib { return 1 } -# Migrating to using udev rules for network devices -if [ -e /etc/udev/rules.d/70-persistent-ipoib.rules ]; then - ib_list=('ib_cpu0_0' 'ib_cpu1_0') -else - ib_list=('ib0') - if [ "$ib_count" -gt 1 ]; then - ib_list+=('ib1') +# Get list of actual InfiniBand devices from /sys/class/net/ +ib_list=() +for iface in /sys/class/net/ib*; do + if [ -e "$iface" ]; then + iface_name=$(basename "$iface") + ib_list+=("$iface_name") fi -fi +done function check_ib_devices { local ib_devs=("$@") @@ -165,11 +177,10 @@ function check_ib_devices { set +x if ! ip addr show "$iface" | grep "inet "; then ib_message="$({ - echo "Found interface $iface down after reboot on $HOSTNAME." + echo "Found interface $iface with no ip address after reboot on $HOSTNAME." ip addr show "$iface" || true cat /sys/class/net/"$iface"/mode || true ip link set up "$iface" || true - cat /etc/sysconfig/network-scripts/ifcfg-"$iface" || true } 2>&1)" mail_message+="${nl}${ib_message}${nl}" echo "$ib_message" @@ -190,11 +201,10 @@ function check_ib_devices { done } - # First check for InfiniBand devices if [ "$ib_count" -gt 0 ]; then - if do_wait_for_ib "${ib_list[@]}"; then - echo "Found at least $ib_count working devices in" "${ib_list[@]}" + if do_wait_for_ib; then + echo "Found at least $ib_count working devices on $HOSTNAME" # All good, generate Junit report check_ib_devices "${good_ibs[@]}" else @@ -205,106 +215,111 @@ fi # having -x just makes the console log harder to read. # set +x -if [ "$ib_count" -ge 2 ]; then - # now check for pmem & NVMe drives when multiple ib are present. - # ipmctl show -dimm should show an even number of drives, all healthy - dimm_count=$(ipmctl show -dimm | grep Healthy -c) - if [ "$dimm_count" -eq 0 ] || [ $((dimm_count%2)) -ne 0 ]; then - # May not be fatal, the PMEM DIMM should be replaced when downtime can be - # scheduled for this system. - dimm_message="FAIL: Wrong number $dimm_count healthy PMEM DIMMs seen" - dimm_message+=" on $HOSTNAME." +if [ "$ib_count" -ge 2 ] ; then + if [ "$DAOS_PMEM" -gt 0 ]; then + # now check for pmem & NVMe drives when multiple ib are present. + # ipmctl show -dimm should show an even number of drives, all healthy + dimm_count=$(ipmctl show -dimm | grep Healthy -c) + if [ "$dimm_count" -eq 0 ] || [ $((dimm_count%2)) -ne 0 ]; then + # May not be fatal, the PMEM DIMM should be replaced when downtime + # can be # scheduled for this system. + dimm_message="FAIL: Wrong number $dimm_count healthy PMEM DIMMs seen" + dimm_message+=" on $HOSTNAME." - mail_message+="$nl$dimm_message$nl$(ipmctl show -dimm)$nl" - else - echo "OK: Found $dimm_count PMEM DIMMs." - fi - # Should have 2 regions 0x0000 and 0x0001, type AppDirect - dimm_rcount=0 - while IFS= read -r line; do - if [[ "$line" != *"| AppDirect"*"| Healthy"* ]]; then continue; fi - ((dimm_rcount++)) || true - done < <(ipmctl show -region) + mail_message+="$nl$dimm_message$nl$(ipmctl show -dimm)$nl" + else + echo "OK: Found $dimm_count PMEM DIMMs." + fi + # Should have 2 regions 0x0000 and 0x0001, type AppDirect + dimm_rcount=0 + while IFS= read -r line; do + if [[ "$line" != *"| AppDirect"*"| Healthy"* ]]; then continue; fi + ((dimm_rcount++)) || true + done < <(ipmctl show -region) - ((testruns++)) || true - testcases+=" ${nl}" - if [ "$dimm_rcount" -ne 2 ]; then - pmem_message="FAIL: Found $dimm_rcount of DIMM PMEM regions, need 2" - pmem_message+=" on $HOSTNAME." - pmem_message+="$nl$(ipmctl show -region)" - mail_message+="$nl$pmem_message$nl" - ((testfails++)) || true - testcases+=" - + ((testruns++)) || true + testcases+=" ${nl}" + if [ "$dimm_rcount" -ne 2 ]; then + pmem_message="FAIL: Found $dimm_rcount of DIMM PMEM regions, need 2" + pmem_message+=" on $HOSTNAME." + pmem_message+="$nl$(ipmctl show -region)" + mail_message+="$nl$pmem_message$nl" + ((testfails++)) || true + testcases+=" + $nl" result=3 - else - echo "OK: Found $dimm_rcount DIMM PMEM regions." - fi - testcases+=" $nl" - - # While this gets more data than needed, it is the same search that - # DAOS tests do and records it in the console log. - nvme_devices="$(lspci -vmm -D | grep -E '^(Slot|Class|Device|NUMANode):' | - grep -E 'Class:\s+Non-Volatile memory controller' -B 1 -A 2)" - nvme_count=0 - while IFS= read -r line; do - if [[ "$line" != *"Class:"*"Non-Volatile memory controller"* ]];then - continue + else + echo "OK: Found $dimm_rcount DIMM PMEM regions." fi - ((nvme_count++)) || true - done < <(printf %s "$nvme_devices") + testcases+=" $nl" + fi + if [ "$DAOS_NVME" -gt 0 ]; then + # While this gets more data than needed, it is the same search that + # DAOS tests do and records it in the console log. + nvme_devices="$(lspci -vmm -D | grep -E '^(Slot|Class|Device|NUMANode):' | + grep -E 'Class:\s+Non-Volatile memory controller' -B 1 -A 2)" + nvme_count=0 + while IFS= read -r line; do + if [[ "$line" != *"Class:"*"Non-Volatile memory controller"* ]];then + continue + fi + ((nvme_count++)) || true + done < <(printf %s "$nvme_devices") - ((testruns++)) || true - testcases+=" ${nl}" - if [ $((nvme_count%2)) -ne 0 ]; then - nvme_message="Fail: Odd number ($nvme_count) of NVMe devices seen." - mail_message+="$nl$nvme_message$nl$nvme_devices$nl" - ((testfails++)) || true - testcases+=" + ((testruns++)) || true + testcases+=" ${nl}" + if [ $((nvme_count%2)) -ne 0 ]; then + nvme_message="Fail: Odd number ($nvme_count) of NVMe devices seen." + mail_message+="$nl$nvme_message$nl$nvme_devices$nl" + ((testfails++)) || true + testcases+=" $nl" - result=4 - else - echo "OK: Even number ($nvme_count) of NVMe devices seen." + result=4 + else + echo "OK: Even number ($nvme_count) of NVMe devices seen." + fi + testcases+=" $nl" fi - testcases+=" $nl" - # All storage found by lspci should also be in lsblk report lsblk_nvme=$(lsblk | grep nvme -c) lsblk_pmem=$(lsblk | grep pmem -c) - ((testruns++)) || true - testcases+=" ${nl}" - if [ "$lsblk_nvme" -ne "$nvme_count" ]; then - lsblk_nvme_msg="Fail: Only $lsblk_nvme of $nvme_count NVMe devices seen" - lsblk_nvme_msg+=" on $HOSTNAME." - mail_message+="$nl$lsblk_nvme_msg$nl$(lsblk)$nl" - ((testfails++)) || true - testcases+=" + if [ "$DAOS_NVME" -gt 0 ]; then + ((testruns++)) || true + testcases+=" ${nl}" + if [ "$lsblk_nvme" -ne "$nvme_count" ]; then + lsblk_nvme_msg="Fail: Only $lsblk_nvme of $nvme_count NVMe devices seen" + lsblk_nvme_msg+=" on $HOSTNAME." + mail_message+="$nl$lsblk_nvme_msg$nl$(lsblk)$nl" + ((testfails++)) || true + testcases+=" $nl" - result=5 - else - echo "OK: All $nvme_count NVMe devices are in lsblk report." + result=5 + else + echo "OK: All $nvme_count NVMe devices are in lsblk report." + fi + testcases+=" $nl" fi - testcases+=" $nl" - - ((testruns++)) || true - testcases+=" ${nl}" - if [ "$lsblk_pmem" -ne "$dimm_rcount" ]; then - lsblk_pmem_msg="Only $lsblk_pmem of $dimm_rcount PMEM devices seen" - lsblk_pmem_msg+=" on $HOSTNAME." - mail_message+="$nl$lsblk_pmem_msg$nl$(lsblk)$nl" - ((testfails++)) || true - testcases+=" + if [ "$DAOS_PMEM" -gt 0 ]; then + ((testruns++)) || true + testcases+=" ${nl}" + if [ "$lsblk_pmem" -ne "$dimm_rcount" ]; then + lsblk_pmem_msg="Only $lsblk_pmem of $dimm_rcount PMEM devices seen" + lsblk_pmem_msg+=" on $HOSTNAME." + mail_message+="$nl$lsblk_pmem_msg$nl$(lsblk)$nl" + ((testfails++)) || true + testcases+=" $nl" - result=6 - else - echo "OK: All $dimm_rcount PMEM devices are in lsblk report." + result=6 + else + echo "OK: All $dimm_rcount PMEM devices are in lsblk report." + fi + testcases+=" $nl" fi - testcases+=" $nl" fi # Additional information if any check failed diff --git a/ci/junit.sh b/ci/junit.sh index e0051b819cd..c7e3a2aee3b 100644 --- a/ci/junit.sh +++ b/ci/junit.sh @@ -24,7 +24,7 @@ report_junit() { clush -o '-i ci_key' -l root -w "$nodes" --rcopy "$results" local results_files - results_files=$(find . -maxdepth 1 -name "$results.*") + readarray -t results_files < <(find . -maxdepth 1 -name "$results.*") if [ ${#results_files[@]} -eq 0 ]; then echo "No results found to report as JUnit results" diff --git a/ci/provisioning/post_provision_config_common_functions.sh b/ci/provisioning/post_provision_config_common_functions.sh index c3b11439d47..0b3d9413ab2 100755 --- a/ci/provisioning/post_provision_config_common_functions.sh +++ b/ci/provisioning/post_provision_config_common_functions.sh @@ -2,7 +2,7 @@ # # Copyright 2022-2023 Intel Corporation. # Copyright 2025 Google LLC -# Copyright 2025 Hewlett Packard Enterprise Development LP +# Copyright 2025-2026 Hewlett Packard Enterprise Development LP # # SPDX-License-Identifier: BSD-2-Clause-Patent # @@ -438,3 +438,32 @@ post_provision_config_nodes() { return 0 } + +install_mofed() { + if [ -z "$MLNX_VER_NUM" ]; then + echo "MLNX_VER_NUM is not set" + env + exit 1 + fi + + : "${ARTIFACTORY_URL:=}" + if [ -z "$ARTIFACTORY_URL" ]; then + return + fi + + # Install Mellanox OFED or DOCA RPMS + install_mellanox="install_mellanox.sh" + script_url="${ARTIFACTORY_URL}/raw-internal/sre_tools/$install_mellanox" + install_target="/usr/local/sbin/$install_mellanox" + + if [ ! -e "$install_target" ]; then + if ! curl --silent --show-error --fail \ + -o "/usr/local/sbin/$install_mellanox" "$script_url"; then + echo "Failed to fetch $script_url" + return 1 + fi + chmod 0755 "$install_target" + fi + + MELLANOX_VERSION="$MLNX_VER_NUM" "$install_mellanox" +} diff --git a/ci/provisioning/post_provision_config_nodes.sh b/ci/provisioning/post_provision_config_nodes.sh index 37ac6f23aaa..c62c7064cad 100644 --- a/ci/provisioning/post_provision_config_nodes.sh +++ b/ci/provisioning/post_provision_config_nodes.sh @@ -1,7 +1,7 @@ #!/bin/bash # # Copyright 2020-2023 Intel Corporation. -# Copyright 2025 Hewlett Packard Enterprise Development LP +# Copyright 2025-2026 Hewlett Packard Enterprise Development LP # # SPDX-License-Identifier: BSD-2-Clause-Patent # @@ -78,79 +78,6 @@ if lspci | grep -i nvme; then daos_server nvme reset && rmmod vfio_pci && modprobe vfio_pci fi -# FOR now limit to 2 devices per CPU NUMA node -: "${DAOS_CI_NVME_NUMA_LIMIT:=2}" - -function mount_nvme_drive { - local drive="$1" - file_system=$(file -sL "/dev/$drive") - if [[ "$file_system" != *"ext4 filesystem"* ]]; then - yes | mkfs -t ext4 "/dev/$drive" - fi - mkdir -p "/mnt/$drive" - mount "/dev/$drive" "/mnt/$drive" -} - - -nvme_class="/sys/class/nvme/" -function nvme_limit { - set +x - if [ ! -d "${nvme_class}" ] || [ -z "$(ls -A "${nvme_class}")" ]; then - echo "No NVMe devices found" - return - fi - local numa0_devices=() - local numa1_devices=() - for nvme_path in "$nvme_class"*; do - nvme="$(basename "$nvme_path")n1" - numa_node="$(cat "${nvme_path}/numa_node")" - if mount | grep "$nvme"; then - continue - fi - if [ "$numa_node" -eq 0 ]; then - numa0_devices+=("$nvme") - else - numa1_devices+=("$nvme") - fi - done - echo numa0 "${numa0_devices[@]}" - echo numa1 "${numa1_devices[@]}" - if [ "${#numa0_devices[@]}" -gt 0 ] && [ "${#numa1_devices[@]}" -gt 0 ]; then - echo "balanced NVMe configuration possible" - nvme_count=0 - for nvme in "${numa0_devices[@]}"; do - if [ "$nvme_count" -ge "${DAOS_CI_NVME_NUMA_LIMIT}" ]; then - mount_nvme_drive "$nvme" - else - ((nvme_count++)) || true - fi - done - nvme_count=0 - for nvme in "${numa1_devices[@]}"; do - if [ "$nvme_count" -ge "${DAOS_CI_NVME_NUMA_LIMIT}" ]; then - mount_nvme_drive "$nvme" - else - ((nvme_count++)) || true - fi - done - else - echo "balanced NVMe configuration not possible" - for nvme in "${numa0_devices[@]}" "${numa1_devices[@]}"; do - ((needed = "$DAOS_CI_NVME_NUMA_LIMIT" + 1)) || true - nvme_count=0 - if [ "$nvme_count" -ge "$needed" ]; then - mount_nvme_drive "$nvme" - else - ((nvme_count++)) || true - fi - done - fi - set -x -} - -# Force only the desired number of NVMe devices to be seen by DAOS tests -# by mounting the extra ones. -nvme_limit systemctl enable nfs-server.service systemctl start nfs-server.service diff --git a/ci/provisioning/post_provision_config_nodes_EL.sh b/ci/provisioning/post_provision_config_nodes_EL.sh index 75e1d7934e3..c9257d87c22 100644 --- a/ci/provisioning/post_provision_config_nodes_EL.sh +++ b/ci/provisioning/post_provision_config_nodes_EL.sh @@ -1,7 +1,7 @@ #!/bin/bash # # Copyright 2021-2024 Intel Corporation. -# Copyright 2025 Hewlett Packard Enterprise Development LP +# Copyright 2025-2026 Hewlett Packard Enterprise Development LP # # SPDX-License-Identifier: BSD-2-Clause-Patent @@ -33,66 +33,3 @@ distro_custom() { dnf -y install python39 python39-devel dnf -y install python3.11 python3.11-devel } - -install_mofed() { - if [ -z "$MLNX_VER_NUM" ]; then - echo "MLNX_VER_NUM is not set" - env - exit 1 - fi - - # Remove Omni-Path software - # shellcheck disable=SC2046 - time dnf -y remove $(rpm -q opa-address-resolution \ - opa-basic-tools \ - opa-fastfabric \ - opa-libopamgt \ - compat-openmpi16 \ - compat-openmpi16-devel \ - openmpi \ - openmpi-devel \ - ompi \ - ompi-debuginfo \ - ompi-devel | grep -v 'is not installed') - - - stream=false - gversion="$VERSION_ID" - if [ "$gversion" == "8" ]; then - # Mellanox does not have a release for 8.9 yet. - gversion="8.8" - stream=true - elif [[ $gversion = *.*.* ]]; then - gversion="${gversion%.*}" - fi - - : "${ARTIFACTORY_URL:=}" - if [ -z "$ARTIFACTORY_URL" ]; then - return - fi - - # Install Mellanox OFED or DOCA RPMS - install_mellanox="install_mellanox.sh" - script_url="${ARTIFACTORY_URL}/raw-internal/sre_tools/$install_mellanox" - install_target="/usr/local/sbin/$install_mellanox" - - if [ ! -e "$install_target" ]; then - if ! curl --silent --show-error --fail \ - -o "/usr/local/sbin/$install_mellanox" "$script_url"; then - echo "Failed to fetch $script_url" - return 1 - fi - chmod 0755 "$install_target" - fi - - MELLANOX_VERSION="$MLNX_VER_NUM" "$install_mellanox" - - dnf list --showduplicates perftest - if [ "$gversion" == "8.5" ]; then - dnf remove -y perftest || true - fi - if $stream; then - dnf list --showduplicates ucx-knem - dnf remove -y ucx-knem || true - fi -} diff --git a/ci/provisioning/post_provision_config_nodes_LEAP.sh b/ci/provisioning/post_provision_config_nodes_LEAP.sh index 5a2a553e56f..4fbc3739f0f 100644 --- a/ci/provisioning/post_provision_config_nodes_LEAP.sh +++ b/ci/provisioning/post_provision_config_nodes_LEAP.sh @@ -1,7 +1,7 @@ #!/bin/bash # # Copyright 2021-2024 Intel Corporation. -# Copyright 2025 Hewlett Packard Enterprise Development LP +# Copyright 2025-2026 Hewlett Packard Enterprise Development LP # # SPDX-License-Identifier: BSD-2-Clause-Patent diff --git a/ci/storage/test_main_storage_prepare_node.sh b/ci/storage/test_main_storage_prepare_node.sh index a0b6811b5e8..489baa21006 100755 --- a/ci/storage/test_main_storage_prepare_node.sh +++ b/ci/storage/test_main_storage_prepare_node.sh @@ -1,7 +1,7 @@ #!/bin/bash # # Copyright 2021-2023 Intel Corporation. -# Copyright 2025 Hewlett Packard Enterprise Development LP +# Copyright 2025-2026 Hewlett Packard Enterprise Development LP # # SPDX-License-Identifier: BSD-2-Clause-Patent # @@ -44,6 +44,22 @@ if command -v ibv_devinfo; then ibv_devinfo || true; fi lspci | grep -i "Non-Volatile memory controller" || true +ib_count=0 +for ib_path in /sys/class/net/ib*; do + if [ ! -e "$ib_path" ]; then + continue + fi + ((ib_count++)) || true + ip addr show "$(basename "$ib_path")" +done + +# Skip test controller +if [ "$ib_count" -le 1 ]; then + echo "Less than 2 Infiniband devices found ($ib_count)." + echo "Assuming this is a test controller node. Skipping PMEM setup." + exit +fi + if ipmctl show -dimm; then ipmctl show -goal ipmctl show -region @@ -60,12 +76,7 @@ if ipmctl show -dimm; then fi fi else - counter=0 - for ib_path in /sys/class/net/ib*; do - ((counter++)) || true - ip addr show "$(basename "$ib_path")" - done - if [ $counter -ge 2 ]; then + if [ "$ib_count" -ge 2 ]; then # All of our CI nodes with two ib adapters should have PMEM DIMMs echo 'No PMEM DIMM devices found on CI node!' exit 1 diff --git a/src/tests/ftest/scripts/main.sh b/src/tests/ftest/scripts/main.sh index 80bf68070cf..f6b4ebb4500 100755 --- a/src/tests/ftest/scripts/main.sh +++ b/src/tests/ftest/scripts/main.sh @@ -1,8 +1,8 @@ #!/bin/bash # shellcheck disable=SC1113 # /* -# * (C) Copyright 2016-2024 Intel Corporation. -# * Copyright 2025 Hewlett Packard Enterprise Development LP +# * Copyright 2016-2024 Intel Corporation. +# * Copyright 2025-2026 Hewlett Packard Enterprise Development LP # * # * SPDX-License-Identifier: BSD-2-Clause-Patent # */ @@ -58,7 +58,7 @@ unset D_PROVIDER # Disable D_INTERFACE to allow launch.py to pick the fastest interface unset D_INTERFACE -# At Oct2018 Longmond F2F it was decided that per-server logs are preferred +# At Oct2018 Longmont F2F it was decided that per-server logs are preferred # But now we need to collect them! Avoid using 'client_daos.log' due to # conflicts with the daos_test log renaming. # shellcheck disable=SC2153 @@ -74,7 +74,7 @@ if ${SETUP_ONLY:-false}; then exit 0 fi -# need to increase the number of oopen files (on EL8 at least) +# need to increase the number of open files (on EL8 at least) ulimit -n 4096 # Clean stale job results From 517c475456229fdba80aaf7f90378e0ce57a2fc2 Mon Sep 17 00:00:00 2001 From: Makito Kano Date: Tue, 3 Feb 2026 01:05:47 +0900 Subject: [PATCH 168/253] DAOS-18403 test: util/server_utils.py Update get_vos_path (#17360) Update get_vos_path to get_vos_paths so that it returns all vos paths such as /mnt/daos0, /mnt/daos1, and so on. Update the tests calling get_vos_path. Signed-off-by: Makito Kano --- src/tests/ftest/recovery/check_start_options.py | 4 ++-- src/tests/ftest/recovery/ddb.py | 10 +++++----- .../ftest/recovery/pool_list_consolidation.py | 2 +- src/tests/ftest/recovery/pool_membership.py | 4 ++-- src/tests/ftest/util/server_utils.py | 16 ++++++++++------ 5 files changed, 20 insertions(+), 16 deletions(-) diff --git a/src/tests/ftest/recovery/check_start_options.py b/src/tests/ftest/recovery/check_start_options.py index e7468d53309..7d91e1ec8be 100644 --- a/src/tests/ftest/recovery/check_start_options.py +++ b/src/tests/ftest/recovery/check_start_options.py @@ -1,5 +1,5 @@ """ - (C) Copyright 2025 Hewlett Packard Enterprise Development LP + (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -321,7 +321,7 @@ def test_check_start_failout(self): # 6. Remove the pool directory from the mount point. self.log_step("Remove the pool directory from the mount point.") - pool_path = self.server_managers[0].get_vos_path(pool) + pool_path = self.server_managers[0].get_vos_paths(pool)[0] pool_out = check_file_exists( hosts=self.hostlist_servers, filename=pool_path, sudo=True) if not pool_out[0]: diff --git a/src/tests/ftest/recovery/ddb.py b/src/tests/ftest/recovery/ddb.py index 17993cfba05..137f21ca5f3 100644 --- a/src/tests/ftest/recovery/ddb.py +++ b/src/tests/ftest/recovery/ddb.py @@ -1,6 +1,6 @@ """ (C) Copyright 2022-2024 Intel Corporation. - (C) Copyright 2025 Hewlett Packard Enterprise Development LP + (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -149,7 +149,7 @@ def test_recovery_ddb_ls(self): # Find the vos file name. e.g., /mnt/daos0//vos-0. vos_paths = self.server_managers[0].get_vos_files(pool) if not vos_paths: - self.fail(f"vos file wasn't found in {self.server_managers[0].get_vos_path(pool)}") + self.fail(f"vos file wasn't found in {self.server_managers[0].get_vos_paths(pool)[0]}") ddb_command = DdbCommand(self.server_managers[0].hosts[0:1], self.bin, vos_paths[0]) errors = [] @@ -339,7 +339,7 @@ def test_recovery_ddb_rm(self): # 3. Find the vos file name. vos_paths = self.server_managers[0].get_vos_files(pool) if not vos_paths: - self.fail(f"vos file wasn't found in {self.server_managers[0].get_vos_path(pool)}") + self.fail(f"vos file wasn't found in {self.server_managers[0].get_vos_paths(pool)[0]}") ddb_command = DdbCommand(self.server_managers[0].hosts[0:1], self.bin, vos_paths[0]) # 4. Call ddb rm to remove the akey. @@ -481,7 +481,7 @@ def test_recovery_ddb_load(self): host = self.server_managers[0].hosts[0:1] vos_paths = self.server_managers[0].get_vos_files(pool) if not vos_paths: - self.fail(f"vos file wasn't found in {self.server_managers[0].get_vos_path(pool)}") + self.fail(f"vos file wasn't found in {self.server_managers[0].get_vos_paths(pool)[0]}") ddb_command = DdbCommand(host, self.bin, vos_paths[0]) # 5. Load new data into [0]/[0]/[0]/[0] @@ -562,7 +562,7 @@ def test_recovery_ddb_dump_value(self): # 4. Find the vos file name. vos_paths = self.server_managers[0].get_vos_files(pool) if not vos_paths: - self.fail(f"vos file wasn't found in {self.server_managers[0].get_vos_path(pool)}") + self.fail(f"vos file wasn't found in {self.server_managers[0].get_vos_paths(pool)[0]}") ddb_command = DdbCommand(self.server_managers[0].hosts[0:1], self.bin, vos_paths[0]) # 5. Dump the two akeys to files. diff --git a/src/tests/ftest/recovery/pool_list_consolidation.py b/src/tests/ftest/recovery/pool_list_consolidation.py index 0a82463eb49..6b11a9cc386 100644 --- a/src/tests/ftest/recovery/pool_list_consolidation.py +++ b/src/tests/ftest/recovery/pool_list_consolidation.py @@ -192,7 +192,7 @@ def verify_pool_dir_removed(self, pool, errors): list: Error list. """ - pool_path = self.server_managers[0].get_vos_path(pool) + pool_path = self.server_managers[0].get_vos_paths(pool)[0] check_out = check_file_exists( hosts=self.hostlist_servers, filename=pool_path, directory=True) if check_out[0]: diff --git a/src/tests/ftest/recovery/pool_membership.py b/src/tests/ftest/recovery/pool_membership.py index 22385866e10..3bf3a9a9769 100644 --- a/src/tests/ftest/recovery/pool_membership.py +++ b/src/tests/ftest/recovery/pool_membership.py @@ -251,7 +251,7 @@ def test_dangling_pool_map(self): self.log_step("Manually remove ///vos-0 from rank 0 node.") rank_0_host = NodeSet(self.server_managers[0].get_host(0)) - vos_0_path = f"{self.server_managers[0].get_vos_path(pool)}/vos-0" + vos_0_path = f"{self.server_managers[0].get_vos_paths(pool)[0]}/vos-0" vos_0_result = check_file_exists(hosts=self.hostlist_servers, filename=vos_0_path) if not vos_0_result[0]: msg = ("MD-on-SSD cluster. Contents under mount point are removed by control plane " @@ -337,7 +337,7 @@ def test_dangling_rank_entry(self): self.log_step("Remove pool directory from one of the mount points.") rank_1_host = NodeSet(self.server_managers[0].get_host(1)) - pool_directory = self.server_managers[0].get_vos_path(self.pool) + pool_directory = self.server_managers[0].get_vos_paths(self.pool)[0] pool_directory_result = check_file_exists( hosts=self.hostlist_servers, filename=pool_directory, directory=True) if not pool_directory_result[0]: diff --git a/src/tests/ftest/util/server_utils.py b/src/tests/ftest/util/server_utils.py index 42581f0d43e..d36900dc54f 100644 --- a/src/tests/ftest/util/server_utils.py +++ b/src/tests/ftest/util/server_utils.py @@ -1,6 +1,6 @@ """ (C) Copyright 2018-2024 Intel Corporation. - (C) Copyright 2025 Hewlett Packard Enterprise Development LP + (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -1164,16 +1164,20 @@ def get_daos_metrics(self, verbose=False, timeout=60): engines.append(result) return engines - def get_vos_path(self, pool): - """Get the VOS file path. + def get_vos_paths(self, pool): + """Get the VOS file paths. Args: pool (TestPool): the pool containing the vos file Returns: - str: the full path to the vos file + list: the full path list to the vos file """ - return os.path.join(self.get_config_value("scm_mount"), pool.uuid.lower()) + vos_paths = [] + for engine_params in self.manager.job.yaml.engine_params: + scm_mount = engine_params.get_value("scm_mount") + vos_paths.append(os.path.join(scm_mount, pool.uuid.lower())) + return vos_paths def get_vos_files(self, pool, pattern="vos"): """Get all the VOS file paths containing the pattern. @@ -1187,7 +1191,7 @@ def get_vos_files(self, pool, pattern="vos"): /mnt/daos0//vos-0. If no matches are found the list will be empty. """ vos_files = [] - vos_path = self.get_vos_path(pool) + vos_path = self.get_vos_paths(pool)[0] command = command_as_user(f"ls {vos_path}", "root") result = run_remote(self.log, self.hosts[0:1], command) if result.passed: From a18e896819f83b657f441465220c6f7879796f80 Mon Sep 17 00:00:00 2001 From: Li Wei Date: Tue, 3 Feb 2026 02:22:32 +0900 Subject: [PATCH 169/253] DAOS-18478 tests: Escape self_heal value in POOL13 (#17475) When calling daos_pool_set_prop for self_heal, we have to escape the ';'s in the value. With this fix POOL16 and POOL17 rebuild tasks will start properly. And, due to #17028, each rebuild task takes 10 s longer now. Hence, we have to increase the test timeout. Signed-off-by: Li Wei --- src/tests/ftest/daos_test/suite.yaml | 2 +- src/tests/suite/daos_pool.c | 9 ++++--- src/tests/suite/daos_test.h | 3 +++ src/tests/suite/daos_test_common.c | 38 ++++++++++++++++++++++++++++ 4 files changed, 48 insertions(+), 4 deletions(-) diff --git a/src/tests/ftest/daos_test/suite.yaml b/src/tests/ftest/daos_test/suite.yaml index c9810280ecc..0a3f6a19fe0 100644 --- a/src/tests/ftest/daos_test/suite.yaml +++ b/src/tests/ftest/daos_test/suite.yaml @@ -10,7 +10,7 @@ timeout: 600 timeouts: test_daos_degraded_mode: 450 test_daos_management: 110 - test_daos_pool: 180 + test_daos_pool: 240 test_daos_container: 700 test_daos_epoch: 125 test_daos_verify_consistency: 105 diff --git a/src/tests/suite/daos_pool.c b/src/tests/suite/daos_pool.c index 0286a467241..31582d78921 100644 --- a/src/tests/suite/daos_pool.c +++ b/src/tests/suite/daos_pool.c @@ -738,12 +738,15 @@ pool_op_retry(void **state) rc = daos_pool_get_prop(arg->pool.pool_uuid, "self_heal", &orig_self_heal); assert_rc_equal(rc, 0); test_set_engine_fail_loc(arg, leader_rank, DAOS_MD_OP_PASS_NOREPLY | DAOS_FAIL_ONCE); - print_message("set pool prop (retry / dup rpc detection)... "); + print_message("set pool prop self_heal from %s to none (retry / dup rpc detection)... ", + orig_self_heal); rc = daos_pool_set_prop(arg->pool.pool_uuid, "self_heal", "none"); assert_rc_equal(rc, 0); - print_message("success\n"); - rc = daos_pool_set_prop(arg->pool.pool_uuid, "self_heal", orig_self_heal); + char *orig_self_heal_escaped = test_escape_self_heal(orig_self_heal); + rc = daos_pool_set_prop(arg->pool.pool_uuid, "self_heal", orig_self_heal_escaped); + free(orig_self_heal_escaped); assert_rc_equal(rc, 0); + print_message("success (restored self_heal to %s)\n", orig_self_heal); free(orig_self_heal); /* pool evict success committed, "lost" reply - duplicate RPC retry */ diff --git a/src/tests/suite/daos_test.h b/src/tests/suite/daos_test.h index 002c54e9473..33353a3cb2f 100644 --- a/src/tests/suite/daos_test.h +++ b/src/tests/suite/daos_test.h @@ -766,6 +766,9 @@ void void test_set_engine_fail_value(test_arg_t *arg, d_rank_t engine_rank, uint64_t fail_value); void test_set_engine_fail_num(test_arg_t *arg, d_rank_t engine_rank, uint64_t fail_num); +char * +test_escape_self_heal(const char *value); + void test_verify_cont(test_arg_t *arg, struct test_pool *pool, struct test_cont *conts, int cont_nr); diff --git a/src/tests/suite/daos_test_common.c b/src/tests/suite/daos_test_common.c index 7f823126552..a68a564b8b3 100644 --- a/src/tests/suite/daos_test_common.c +++ b/src/tests/suite/daos_test_common.c @@ -1709,3 +1709,41 @@ test_set_engine_fail_num(test_arg_t *arg, d_rank_t engine_rank, uint64_t fail_nu rc = daos_debug_set_params(arg->group, engine_rank, DMG_KEY_FAIL_NUM, fail_num, 0, NULL); assert_rc_equal(rc, 0); } + +/** + * Duplicate unescaped \a value, escaping every ';' with '\\'. The caller is + * responsible for freeing the returned string. + * + * \param[in] value self_heal value to escape + */ +char * +test_escape_self_heal(const char *value) +{ + size_t len = 0; + char *new_value; + const char *src; + char *dst; + + for (src = value; *src != '\0'; src++) { + D_ASSERT(*src != '\\'); + len++; + if (*src == ';') + len++; /* for '\\' */ + } + + D_ALLOC(new_value, len + 1 /* '\0' */); + D_ASSERT(new_value != NULL); + + dst = new_value; + for (src = value; *src != '\0'; src++) { + if (*src == ';') { + *dst++ = '\\'; + *dst++ = ';'; + } else { + *dst++ = *src; + } + } + *dst = '\0'; + + return new_value; +} From 2b86199e5cd2fa80dfe0033d92b5721f7baeda8a Mon Sep 17 00:00:00 2001 From: Li Wei Date: Tue, 3 Feb 2026 02:28:40 +0900 Subject: [PATCH 170/253] DAOS-17938 pool: Add degraded to query output (#17371) Add the following to pool query output (shown as dmg pool query output): - Data redundancy: degraded When data redundancy is intact, "normal" is shown instead of "degraded". Signed-off-by: Li Wei --- src/client/pydaos/raw/daos_cref.py | 5 +- src/control/cmd/daos/pretty/pool.go | 5 + src/control/cmd/daos/pretty/pool_test.go | 64 +++- src/control/common/proto/mgmt/pool.pb.go | 342 +++++++++--------- src/control/lib/daos/api/pool.go | 9 +- src/control/lib/daos/pool.go | 1 + src/include/daos_pool.h | 14 +- src/mgmt/pool.pb-c.c | 19 +- src/mgmt/pool.pb-c.h | 6 +- src/mgmt/srv_drpc.c | 2 + src/pool/srv_pool.c | 32 ++ src/proto/mgmt/pool.proto | 1 + src/rebuild/README.md | 6 +- .../ftest/control/dmg_pool_query_test.py | 3 +- .../ftest/control/dmg_pool_query_test.yaml | 1 + src/tests/ftest/pool/list_verbose.py | 36 +- src/tests/ftest/util/test_utils_pool.py | 6 +- 17 files changed, 339 insertions(+), 213 deletions(-) diff --git a/src/client/pydaos/raw/daos_cref.py b/src/client/pydaos/raw/daos_cref.py index 3878912b698..f86faaa937d 100644 --- a/src/client/pydaos/raw/daos_cref.py +++ b/src/client/pydaos/raw/daos_cref.py @@ -1,6 +1,6 @@ """ (C) Copyright 2018-2023 Intel Corporation. - (C) Copyright 2025 Hewlett Packard Enterprise Development LP + (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -53,7 +53,8 @@ class RebuildStatus(ctypes.Structure): ("rs_errno", ctypes.c_uint32), ("rs_state", ctypes.c_uint32), ("rs_max_supported_layout_ver", ctypes.c_uint16), - ("rs_padding16", ctypes.c_uint16), + ("rs_flags", ctypes.c_uint8), + ("rs_reserved_", ctypes.c_uint8), ("rs_fail_rank", ctypes.c_uint32), ("rs_toberb_obj_nr", ctypes.c_uint64), ("rs_obj_nr", ctypes.c_uint64), diff --git a/src/control/cmd/daos/pretty/pool.go b/src/control/cmd/daos/pretty/pool.go index e6665cedc4d..c2650137f44 100644 --- a/src/control/cmd/daos/pretty/pool.go +++ b/src/control/cmd/daos/pretty/pool.go @@ -139,6 +139,11 @@ func PrintPoolInfo(pi *daos.PoolInfo, out io.Writer) error { fmt.Fprintf(w, "- Rebuild %s (state=%s, status=%d)\n", pi.Rebuild.DerivedState, pi.Rebuild.State, pi.Rebuild.Status) } + s := "normal" + if pi.Rebuild.Degraded { + s = "degraded" + } + fmt.Fprintf(w, "- Data redundancy: %s\n", s) } else { fmt.Fprintln(w, "- No rebuild status available.") } diff --git a/src/control/cmd/daos/pretty/pool_test.go b/src/control/cmd/daos/pretty/pool_test.go index 8bb07a787f1..0c20b247ae8 100644 --- a/src/control/cmd/daos/pretty/pool_test.go +++ b/src/control/cmd/daos/pretty/pool_test.go @@ -50,9 +50,10 @@ Pool health info: PoolLayoutVer: 1, UpgradeLayoutVer: 2, Rebuild: &daos.PoolRebuildStatus{ - State: daos.PoolRebuildStateBusy, - Objects: 42, - Records: 21, + State: daos.PoolRebuildStateBusy, + Objects: 42, + Records: 21, + Degraded: true, }, TierStats: []*daos.StorageUsageStats{ { @@ -72,6 +73,7 @@ Pool %s, ntarget=2, disabled=1, leader=42, version=100, state=TargetsExcluded Pool layout out of date (1 < 2) -- see `+backtickStr+` for details. Pool health info: - Rebuild busy, 42 objs, 21 recs +- Data redundancy: degraded Pool space info: - Target count:1 - Storage tier 0 (SCM): @@ -96,9 +98,10 @@ Pool space info: UpgradeLayoutVer: 2, EnabledRanks: ranklist.MustCreateRankSet("[0,1,2]"), Rebuild: &daos.PoolRebuildStatus{ - State: daos.PoolRebuildStateBusy, - Objects: 42, - Records: 21, + State: daos.PoolRebuildStateBusy, + Objects: 42, + Records: 21, + Degraded: true, }, TierStats: []*daos.StorageUsageStats{ { @@ -119,6 +122,7 @@ Pool layout out of date (1 < 2) -- see `+backtickStr+` for details. Pool health info: - Enabled ranks: 0-2 - Rebuild busy, 42 objs, 21 recs +- Data redundancy: degraded Pool space info: - Target count:1 - Storage tier 0 (SCM): @@ -144,9 +148,10 @@ Pool space info: DisabledRanks: ranklist.MustCreateRankSet("[0,1,3]"), DeadRanks: ranklist.MustCreateRankSet("[2]"), Rebuild: &daos.PoolRebuildStatus{ - State: daos.PoolRebuildStateBusy, - Objects: 42, - Records: 21, + State: daos.PoolRebuildStateBusy, + Objects: 42, + Records: 21, + Degraded: true, }, TierStats: []*daos.StorageUsageStats{ { @@ -166,6 +171,7 @@ Pool health info: - Disabled ranks: 0-1,3 - Dead ranks: 2 - Rebuild busy, 42 objs, 21 recs +- Data redundancy: degraded `, poolUUID.String()), }, "normal response; disabled ranks": { @@ -182,9 +188,10 @@ Pool health info: UpgradeLayoutVer: 2, DisabledRanks: ranklist.MustCreateRankSet("[0,1,3]"), Rebuild: &daos.PoolRebuildStatus{ - State: daos.PoolRebuildStateBusy, - Objects: 42, - Records: 21, + State: daos.PoolRebuildStateBusy, + Objects: 42, + Records: 21, + Degraded: true, }, TierStats: []*daos.StorageUsageStats{ { @@ -205,6 +212,7 @@ Pool layout out of date (1 < 2) -- see `+backtickStr+` for details. Pool health info: - Disabled ranks: 0-1,3 - Rebuild busy, 42 objs, 21 recs +- Data redundancy: degraded Pool space info: - Target count:1 - Storage tier 0 (SCM): @@ -229,9 +237,10 @@ Pool space info: UpgradeLayoutVer: 2, DisabledRanks: ranklist.MustCreateRankSet("[0,1,3]"), Rebuild: &daos.PoolRebuildStatus{ - State: 42, - Objects: 42, - Records: 21, + State: 42, + Objects: 42, + Records: 21, + Degraded: false, }, TierStats: []*daos.StorageUsageStats{ { @@ -252,6 +261,7 @@ Pool layout out of date (1 < 2) -- see `+backtickStr+` for details. Pool health info: - Disabled ranks: 0-1,3 - Rebuild unknown, 42 objs, 21 recs +- Data redundancy: normal Pool space info: - Target count:1 - Storage tier 0 (SCM): @@ -280,6 +290,7 @@ Pool space info: DerivedState: daos.PoolRebuildStateFailing, Objects: 42, Records: 21, + Degraded: true, }, TierStats: []*daos.StorageUsageStats{ { @@ -300,6 +311,7 @@ Pool %s, ntarget=2, disabled=1, leader=42, version=100, state=TargetsExcluded Pool layout out of date (1 < 2) -- see `+backtickStr+` for details. Pool health info: - Rebuild failing (state=busy, status=-2) +- Data redundancy: degraded Pool space info: - Target count:1 - Storage tier 0 (SCM): @@ -323,9 +335,10 @@ Pool space info: PoolLayoutVer: 1, UpgradeLayoutVer: 2, Rebuild: &daos.PoolRebuildStatus{ - State: daos.PoolRebuildStateBusy, - Objects: 42, - Records: 21, + State: daos.PoolRebuildStateBusy, + Objects: 42, + Records: 21, + Degraded: true, }, TierStats: []*daos.StorageUsageStats{ { @@ -347,6 +360,7 @@ Pool %s, ntarget=2, disabled=1, leader=42, version=100, state=TargetsExcluded Pool layout out of date (1 < 2) -- see `+backtickStr+` for details. Pool health info: - Rebuild busy, 42 objs, 21 recs +- Data redundancy: degraded Pool space info: - Target count:1 - Total memory-file size: 1.1 GB @@ -370,12 +384,14 @@ Pool space info: Status: 0, Objects: 0, Records: 0, + Degraded: true, }, }, expPrintStr: fmt.Sprintf(` Pool %s, ntarget=8, disabled=0, leader=0, version=0, state=Ready Pool health info: - Rebuild idle, 0 objs, 0 recs +- Data redundancy: degraded `, poolUUID.String()), }, "rebuild state stopped": { @@ -390,12 +406,14 @@ Pool health info: Status: int32(daos.OpCanceled), Objects: 0, Records: 0, + Degraded: true, }, }, expPrintStr: fmt.Sprintf(` Pool %s, ntarget=8, disabled=0, leader=0, version=0, state=Ready Pool health info: - Rebuild stopped (state=done, status=-2027) +- Data redundancy: degraded `, poolUUID.String()), }, "rebuild state done": { @@ -410,12 +428,14 @@ Pool health info: Status: 0, Objects: 200, Records: 1000, + Degraded: false, }, }, expPrintStr: fmt.Sprintf(` Pool %s, ntarget=8, disabled=0, leader=0, version=0, state=Ready Pool health info: - Rebuild done, 200 objs, 1000 recs +- Data redundancy: normal `, poolUUID.String()), }, "rebuild state failed": { @@ -428,12 +448,14 @@ Pool health info: State: daos.PoolRebuildStateDone, DerivedState: daos.PoolRebuildStateFailed, Status: -1, + Degraded: true, }, }, expPrintStr: fmt.Sprintf(` Pool %s, ntarget=8, disabled=0, leader=0, version=0, state=Ready Pool health info: - Rebuild failed (state=done, status=-1) +- Data redundancy: degraded `, poolUUID.String()), }, "rebuild state busy": { @@ -448,12 +470,14 @@ Pool health info: Status: 0, Objects: 150, Records: 750, + Degraded: true, }, }, expPrintStr: fmt.Sprintf(` Pool %s, ntarget=8, disabled=0, leader=0, version=0, state=Ready Pool health info: - Rebuild busy, 150 objs, 750 recs +- Data redundancy: degraded `, poolUUID.String()), }, "rebuild state stopping": { @@ -468,12 +492,14 @@ Pool health info: Status: int32(daos.OpCanceled), Objects: 100, Records: 500, + Degraded: true, }, }, expPrintStr: fmt.Sprintf(` Pool %s, ntarget=8, disabled=0, leader=0, version=0, state=Ready Pool health info: - Rebuild stopping (state=busy, status=-2027) +- Data redundancy: degraded `, poolUUID.String()), }, "rebuild state failing": { @@ -488,12 +514,14 @@ Pool health info: Status: -1, Objects: 75, Records: 300, + Degraded: true, }, }, expPrintStr: fmt.Sprintf(` Pool %s, ntarget=8, disabled=0, leader=0, version=0, state=Ready Pool health info: - Rebuild failing (state=busy, status=-1) +- Data redundancy: degraded `, poolUUID.String()), }, } { diff --git a/src/control/common/proto/mgmt/pool.pb.go b/src/control/common/proto/mgmt/pool.pb.go index d3f73fda719..12286b528fb 100644 --- a/src/control/common/proto/mgmt/pool.pb.go +++ b/src/control/common/proto/mgmt/pool.pb.go @@ -1747,6 +1747,7 @@ type PoolRebuildStatus struct { Objects uint64 `protobuf:"varint,3,opt,name=objects,proto3" json:"objects,omitempty"` Records uint64 `protobuf:"varint,4,opt,name=records,proto3" json:"records,omitempty"` DerivedState PoolRebuildStatus_State `protobuf:"varint,5,opt,name=derived_state,json=derivedState,proto3,enum=mgmt.PoolRebuildStatus_State" json:"derived_state,omitempty"` + Degraded bool `protobuf:"varint,6,opt,name=degraded,proto3" json:"degraded,omitempty"` // data redundancy degraded } func (x *PoolRebuildStatus) Reset() { @@ -1816,6 +1817,13 @@ func (x *PoolRebuildStatus) GetDerivedState() PoolRebuildStatus_State { return PoolRebuildStatus_BUSY } +func (x *PoolRebuildStatus) GetDegraded() bool { + if x != nil { + return x.Degraded + } + return false +} + // PoolQueryResp represents a pool query response. type PoolQueryResp struct { state protoimpl.MessageState @@ -3237,7 +3245,7 @@ var file_mgmt_pool_proto_rawDesc = []byte{ 0x35, 0x0a, 0x0a, 0x6d, 0x65, 0x64, 0x69, 0x61, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x16, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x4d, 0x65, 0x64, 0x69, 0x61, 0x54, 0x79, 0x70, 0x65, 0x52, 0x09, 0x6d, 0x65, 0x64, - 0x69, 0x61, 0x54, 0x79, 0x70, 0x65, 0x22, 0xb3, 0x02, 0x0a, 0x11, 0x50, 0x6f, 0x6f, 0x6c, 0x52, + 0x69, 0x61, 0x54, 0x79, 0x70, 0x65, 0x22, 0xcf, 0x02, 0x0a, 0x11, 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x65, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x33, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x02, 0x20, @@ -3251,179 +3259,181 @@ var file_mgmt_pool_proto_rawDesc = []byte{ 0x20, 0x01, 0x28, 0x0e, 0x32, 0x1d, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x65, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x0c, 0x64, 0x65, 0x72, 0x69, 0x76, 0x65, 0x64, 0x53, 0x74, 0x61, 0x74, - 0x65, 0x22, 0x59, 0x0a, 0x05, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x08, 0x0a, 0x04, 0x42, 0x55, - 0x53, 0x59, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x49, 0x44, 0x4c, 0x45, 0x10, 0x01, 0x12, 0x08, - 0x0a, 0x04, 0x44, 0x4f, 0x4e, 0x45, 0x10, 0x02, 0x12, 0x0c, 0x0a, 0x08, 0x53, 0x54, 0x4f, 0x50, - 0x50, 0x49, 0x4e, 0x47, 0x10, 0x03, 0x12, 0x0b, 0x0a, 0x07, 0x53, 0x54, 0x4f, 0x50, 0x50, 0x45, - 0x44, 0x10, 0x04, 0x12, 0x0b, 0x0a, 0x07, 0x46, 0x41, 0x49, 0x4c, 0x49, 0x4e, 0x47, 0x10, 0x05, - 0x12, 0x0a, 0x0a, 0x06, 0x46, 0x41, 0x49, 0x4c, 0x45, 0x44, 0x10, 0x06, 0x22, 0x89, 0x07, 0x0a, - 0x0d, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, - 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, - 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x75, 0x75, 0x69, 0x64, 0x18, 0x02, - 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x75, 0x75, 0x69, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x6c, 0x61, - 0x62, 0x65, 0x6c, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6c, 0x61, 0x62, 0x65, 0x6c, - 0x12, 0x23, 0x0a, 0x0d, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, - 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0c, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x54, 0x61, - 0x72, 0x67, 0x65, 0x74, 0x73, 0x12, 0x25, 0x0a, 0x0e, 0x61, 0x63, 0x74, 0x69, 0x76, 0x65, 0x5f, - 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0d, 0x61, - 0x63, 0x74, 0x69, 0x76, 0x65, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x12, 0x29, 0x0a, 0x10, - 0x64, 0x69, 0x73, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x5f, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, - 0x18, 0x06, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0f, 0x64, 0x69, 0x73, 0x61, 0x62, 0x6c, 0x65, 0x64, - 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x12, 0x31, 0x0a, 0x07, 0x72, 0x65, 0x62, 0x75, 0x69, - 0x6c, 0x64, 0x18, 0x07, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x17, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, - 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x65, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x53, 0x74, 0x61, 0x74, 0x75, - 0x73, 0x52, 0x07, 0x72, 0x65, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x12, 0x36, 0x0a, 0x0a, 0x74, 0x69, - 0x65, 0x72, 0x5f, 0x73, 0x74, 0x61, 0x74, 0x73, 0x18, 0x08, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x17, - 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x55, 0x73, 0x61, - 0x67, 0x65, 0x53, 0x74, 0x61, 0x74, 0x73, 0x52, 0x09, 0x74, 0x69, 0x65, 0x72, 0x53, 0x74, 0x61, - 0x74, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x0a, 0x20, - 0x01, 0x28, 0x0d, 0x52, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x16, 0x0a, 0x06, - 0x6c, 0x65, 0x61, 0x64, 0x65, 0x72, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x06, 0x6c, 0x65, - 0x61, 0x64, 0x65, 0x72, 0x12, 0x23, 0x0a, 0x0d, 0x65, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x5f, - 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x0c, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x65, 0x6e, 0x61, - 0x62, 0x6c, 0x65, 0x64, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x25, 0x0a, 0x0e, 0x64, 0x69, 0x73, - 0x61, 0x62, 0x6c, 0x65, 0x64, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x0d, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x0d, 0x64, 0x69, 0x73, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x52, 0x61, 0x6e, 0x6b, 0x73, - 0x12, 0x23, 0x0a, 0x0d, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x65, 0x6e, 0x67, 0x69, 0x6e, 0x65, - 0x73, 0x18, 0x0e, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0c, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x45, 0x6e, - 0x67, 0x69, 0x6e, 0x65, 0x73, 0x12, 0x26, 0x0a, 0x0f, 0x70, 0x6f, 0x6f, 0x6c, 0x5f, 0x6c, 0x61, - 0x79, 0x6f, 0x75, 0x74, 0x5f, 0x76, 0x65, 0x72, 0x18, 0x0f, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0d, - 0x70, 0x6f, 0x6f, 0x6c, 0x4c, 0x61, 0x79, 0x6f, 0x75, 0x74, 0x56, 0x65, 0x72, 0x12, 0x2c, 0x0a, - 0x12, 0x75, 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, 0x5f, 0x6c, 0x61, 0x79, 0x6f, 0x75, 0x74, 0x5f, - 0x76, 0x65, 0x72, 0x18, 0x10, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x10, 0x75, 0x70, 0x67, 0x72, 0x61, - 0x64, 0x65, 0x4c, 0x61, 0x79, 0x6f, 0x75, 0x74, 0x56, 0x65, 0x72, 0x12, 0x2c, 0x0a, 0x05, 0x73, - 0x74, 0x61, 0x74, 0x65, 0x18, 0x11, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x16, 0x2e, 0x6d, 0x67, 0x6d, - 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x53, 0x74, 0x61, - 0x74, 0x65, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x17, 0x0a, 0x07, 0x73, 0x76, 0x63, - 0x5f, 0x6c, 0x64, 0x72, 0x18, 0x12, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x06, 0x73, 0x76, 0x63, 0x4c, - 0x64, 0x72, 0x12, 0x19, 0x0a, 0x08, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x65, 0x70, 0x73, 0x18, 0x13, - 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, 0x73, 0x76, 0x63, 0x52, 0x65, 0x70, 0x73, 0x12, 0x1d, 0x0a, - 0x0a, 0x71, 0x75, 0x65, 0x72, 0x79, 0x5f, 0x6d, 0x61, 0x73, 0x6b, 0x18, 0x14, 0x20, 0x01, 0x28, - 0x04, 0x52, 0x09, 0x71, 0x75, 0x65, 0x72, 0x79, 0x4d, 0x61, 0x73, 0x6b, 0x12, 0x24, 0x0a, 0x0e, - 0x6d, 0x65, 0x6d, 0x5f, 0x66, 0x69, 0x6c, 0x65, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x18, 0x15, - 0x20, 0x01, 0x28, 0x04, 0x52, 0x0c, 0x6d, 0x65, 0x6d, 0x46, 0x69, 0x6c, 0x65, 0x42, 0x79, 0x74, - 0x65, 0x73, 0x12, 0x1d, 0x0a, 0x0a, 0x64, 0x65, 0x61, 0x64, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, - 0x18, 0x16, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x64, 0x65, 0x61, 0x64, 0x52, 0x61, 0x6e, 0x6b, - 0x73, 0x12, 0x27, 0x0a, 0x10, 0x6d, 0x64, 0x5f, 0x6f, 0x6e, 0x5f, 0x73, 0x73, 0x64, 0x5f, 0x61, - 0x63, 0x74, 0x69, 0x76, 0x65, 0x18, 0x17, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0d, 0x6d, 0x64, 0x4f, - 0x6e, 0x53, 0x73, 0x64, 0x41, 0x63, 0x74, 0x69, 0x76, 0x65, 0x12, 0x28, 0x0a, 0x10, 0x73, 0x65, - 0x6c, 0x66, 0x5f, 0x68, 0x65, 0x61, 0x6c, 0x5f, 0x70, 0x6f, 0x6c, 0x69, 0x63, 0x79, 0x18, 0x18, - 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x73, 0x65, 0x6c, 0x66, 0x48, 0x65, 0x61, 0x6c, 0x50, 0x6f, - 0x6c, 0x69, 0x63, 0x79, 0x12, 0x2f, 0x0a, 0x14, 0x73, 0x79, 0x73, 0x5f, 0x73, 0x65, 0x6c, 0x66, - 0x5f, 0x68, 0x65, 0x61, 0x6c, 0x5f, 0x70, 0x6f, 0x6c, 0x69, 0x63, 0x79, 0x18, 0x19, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x11, 0x73, 0x79, 0x73, 0x53, 0x65, 0x6c, 0x66, 0x48, 0x65, 0x61, 0x6c, 0x50, - 0x6f, 0x6c, 0x69, 0x63, 0x79, 0x4a, 0x04, 0x08, 0x09, 0x10, 0x0a, 0x52, 0x0b, 0x74, 0x6f, 0x74, - 0x61, 0x6c, 0x5f, 0x6e, 0x6f, 0x64, 0x65, 0x73, 0x22, 0x63, 0x0a, 0x0c, 0x50, 0x6f, 0x6f, 0x6c, - 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x79, 0x12, 0x16, 0x0a, 0x06, 0x6e, 0x75, 0x6d, 0x62, - 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x06, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, - 0x12, 0x18, 0x0a, 0x06, 0x73, 0x74, 0x72, 0x76, 0x61, 0x6c, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, - 0x48, 0x00, 0x52, 0x06, 0x73, 0x74, 0x72, 0x76, 0x61, 0x6c, 0x12, 0x18, 0x0a, 0x06, 0x6e, 0x75, - 0x6d, 0x76, 0x61, 0x6c, 0x18, 0x03, 0x20, 0x01, 0x28, 0x04, 0x48, 0x00, 0x52, 0x06, 0x6e, 0x75, - 0x6d, 0x76, 0x61, 0x6c, 0x42, 0x07, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x22, 0x83, 0x01, - 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x71, - 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, - 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, - 0x69, 0x64, 0x12, 0x32, 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, - 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, - 0x6f, 0x6c, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x79, 0x52, 0x0a, 0x70, 0x72, 0x6f, 0x70, - 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, - 0x6e, 0x6b, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, - 0x6e, 0x6b, 0x73, 0x22, 0x29, 0x0a, 0x0f, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x65, 0x74, 0x50, 0x72, - 0x6f, 0x70, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x22, 0x83, - 0x01, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x47, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, - 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, - 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x02, 0x69, 0x64, 0x12, 0x32, 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, - 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, - 0x6f, 0x6f, 0x6c, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x79, 0x52, 0x0a, 0x70, 0x72, 0x6f, - 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, - 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, - 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x5d, 0x0a, 0x0f, 0x50, 0x6f, 0x6f, 0x6c, 0x47, 0x65, 0x74, 0x50, - 0x72, 0x6f, 0x70, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, - 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, - 0x32, 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x18, 0x02, 0x20, + 0x65, 0x12, 0x1a, 0x0a, 0x08, 0x64, 0x65, 0x67, 0x72, 0x61, 0x64, 0x65, 0x64, 0x18, 0x06, 0x20, + 0x01, 0x28, 0x08, 0x52, 0x08, 0x64, 0x65, 0x67, 0x72, 0x61, 0x64, 0x65, 0x64, 0x22, 0x59, 0x0a, + 0x05, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x08, 0x0a, 0x04, 0x42, 0x55, 0x53, 0x59, 0x10, 0x00, + 0x12, 0x08, 0x0a, 0x04, 0x49, 0x44, 0x4c, 0x45, 0x10, 0x01, 0x12, 0x08, 0x0a, 0x04, 0x44, 0x4f, + 0x4e, 0x45, 0x10, 0x02, 0x12, 0x0c, 0x0a, 0x08, 0x53, 0x54, 0x4f, 0x50, 0x50, 0x49, 0x4e, 0x47, + 0x10, 0x03, 0x12, 0x0b, 0x0a, 0x07, 0x53, 0x54, 0x4f, 0x50, 0x50, 0x45, 0x44, 0x10, 0x04, 0x12, + 0x0b, 0x0a, 0x07, 0x46, 0x41, 0x49, 0x4c, 0x49, 0x4e, 0x47, 0x10, 0x05, 0x12, 0x0a, 0x0a, 0x06, + 0x46, 0x41, 0x49, 0x4c, 0x45, 0x44, 0x10, 0x06, 0x22, 0x89, 0x07, 0x0a, 0x0d, 0x50, 0x6f, 0x6f, + 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, + 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, + 0x75, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x75, 0x75, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x04, 0x75, 0x75, 0x69, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x18, + 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x12, 0x23, 0x0a, 0x0d, + 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x18, 0x04, 0x20, + 0x01, 0x28, 0x0d, 0x52, 0x0c, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, + 0x73, 0x12, 0x25, 0x0a, 0x0e, 0x61, 0x63, 0x74, 0x69, 0x76, 0x65, 0x5f, 0x74, 0x61, 0x72, 0x67, + 0x65, 0x74, 0x73, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0d, 0x61, 0x63, 0x74, 0x69, 0x76, + 0x65, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x12, 0x29, 0x0a, 0x10, 0x64, 0x69, 0x73, 0x61, + 0x62, 0x6c, 0x65, 0x64, 0x5f, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x18, 0x06, 0x20, 0x01, + 0x28, 0x0d, 0x52, 0x0f, 0x64, 0x69, 0x73, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x54, 0x61, 0x72, 0x67, + 0x65, 0x74, 0x73, 0x12, 0x31, 0x0a, 0x07, 0x72, 0x65, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x18, 0x07, + 0x20, 0x01, 0x28, 0x0b, 0x32, 0x17, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, + 0x52, 0x65, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x07, 0x72, + 0x65, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x12, 0x36, 0x0a, 0x0a, 0x74, 0x69, 0x65, 0x72, 0x5f, 0x73, + 0x74, 0x61, 0x74, 0x73, 0x18, 0x08, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x17, 0x2e, 0x6d, 0x67, 0x6d, + 0x74, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x55, 0x73, 0x61, 0x67, 0x65, 0x53, 0x74, + 0x61, 0x74, 0x73, 0x52, 0x09, 0x74, 0x69, 0x65, 0x72, 0x53, 0x74, 0x61, 0x74, 0x73, 0x12, 0x18, + 0x0a, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x0d, 0x52, + 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x16, 0x0a, 0x06, 0x6c, 0x65, 0x61, 0x64, + 0x65, 0x72, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x06, 0x6c, 0x65, 0x61, 0x64, 0x65, 0x72, + 0x12, 0x23, 0x0a, 0x0d, 0x65, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x5f, 0x72, 0x61, 0x6e, 0x6b, + 0x73, 0x18, 0x0c, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x65, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x64, + 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x25, 0x0a, 0x0e, 0x64, 0x69, 0x73, 0x61, 0x62, 0x6c, 0x65, + 0x64, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x0d, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x64, + 0x69, 0x73, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x23, 0x0a, 0x0d, + 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x65, 0x6e, 0x67, 0x69, 0x6e, 0x65, 0x73, 0x18, 0x0e, 0x20, + 0x01, 0x28, 0x0d, 0x52, 0x0c, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x45, 0x6e, 0x67, 0x69, 0x6e, 0x65, + 0x73, 0x12, 0x26, 0x0a, 0x0f, 0x70, 0x6f, 0x6f, 0x6c, 0x5f, 0x6c, 0x61, 0x79, 0x6f, 0x75, 0x74, + 0x5f, 0x76, 0x65, 0x72, 0x18, 0x0f, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0d, 0x70, 0x6f, 0x6f, 0x6c, + 0x4c, 0x61, 0x79, 0x6f, 0x75, 0x74, 0x56, 0x65, 0x72, 0x12, 0x2c, 0x0a, 0x12, 0x75, 0x70, 0x67, + 0x72, 0x61, 0x64, 0x65, 0x5f, 0x6c, 0x61, 0x79, 0x6f, 0x75, 0x74, 0x5f, 0x76, 0x65, 0x72, 0x18, + 0x10, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x10, 0x75, 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, 0x4c, 0x61, + 0x79, 0x6f, 0x75, 0x74, 0x56, 0x65, 0x72, 0x12, 0x2c, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, + 0x18, 0x11, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x16, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, + 0x6f, 0x6c, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, + 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x17, 0x0a, 0x07, 0x73, 0x76, 0x63, 0x5f, 0x6c, 0x64, 0x72, + 0x18, 0x12, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x06, 0x73, 0x76, 0x63, 0x4c, 0x64, 0x72, 0x12, 0x19, + 0x0a, 0x08, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x65, 0x70, 0x73, 0x18, 0x13, 0x20, 0x03, 0x28, 0x0d, + 0x52, 0x07, 0x73, 0x76, 0x63, 0x52, 0x65, 0x70, 0x73, 0x12, 0x1d, 0x0a, 0x0a, 0x71, 0x75, 0x65, + 0x72, 0x79, 0x5f, 0x6d, 0x61, 0x73, 0x6b, 0x18, 0x14, 0x20, 0x01, 0x28, 0x04, 0x52, 0x09, 0x71, + 0x75, 0x65, 0x72, 0x79, 0x4d, 0x61, 0x73, 0x6b, 0x12, 0x24, 0x0a, 0x0e, 0x6d, 0x65, 0x6d, 0x5f, + 0x66, 0x69, 0x6c, 0x65, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x18, 0x15, 0x20, 0x01, 0x28, 0x04, + 0x52, 0x0c, 0x6d, 0x65, 0x6d, 0x46, 0x69, 0x6c, 0x65, 0x42, 0x79, 0x74, 0x65, 0x73, 0x12, 0x1d, + 0x0a, 0x0a, 0x64, 0x65, 0x61, 0x64, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x16, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x09, 0x64, 0x65, 0x61, 0x64, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x12, 0x27, 0x0a, + 0x10, 0x6d, 0x64, 0x5f, 0x6f, 0x6e, 0x5f, 0x73, 0x73, 0x64, 0x5f, 0x61, 0x63, 0x74, 0x69, 0x76, + 0x65, 0x18, 0x17, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0d, 0x6d, 0x64, 0x4f, 0x6e, 0x53, 0x73, 0x64, + 0x41, 0x63, 0x74, 0x69, 0x76, 0x65, 0x12, 0x28, 0x0a, 0x10, 0x73, 0x65, 0x6c, 0x66, 0x5f, 0x68, + 0x65, 0x61, 0x6c, 0x5f, 0x70, 0x6f, 0x6c, 0x69, 0x63, 0x79, 0x18, 0x18, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x0e, 0x73, 0x65, 0x6c, 0x66, 0x48, 0x65, 0x61, 0x6c, 0x50, 0x6f, 0x6c, 0x69, 0x63, 0x79, + 0x12, 0x2f, 0x0a, 0x14, 0x73, 0x79, 0x73, 0x5f, 0x73, 0x65, 0x6c, 0x66, 0x5f, 0x68, 0x65, 0x61, + 0x6c, 0x5f, 0x70, 0x6f, 0x6c, 0x69, 0x63, 0x79, 0x18, 0x19, 0x20, 0x01, 0x28, 0x09, 0x52, 0x11, + 0x73, 0x79, 0x73, 0x53, 0x65, 0x6c, 0x66, 0x48, 0x65, 0x61, 0x6c, 0x50, 0x6f, 0x6c, 0x69, 0x63, + 0x79, 0x4a, 0x04, 0x08, 0x09, 0x10, 0x0a, 0x52, 0x0b, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x6e, + 0x6f, 0x64, 0x65, 0x73, 0x22, 0x63, 0x0a, 0x0c, 0x50, 0x6f, 0x6f, 0x6c, 0x50, 0x72, 0x6f, 0x70, + 0x65, 0x72, 0x74, 0x79, 0x12, 0x16, 0x0a, 0x06, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x0d, 0x52, 0x06, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x12, 0x18, 0x0a, 0x06, + 0x73, 0x74, 0x72, 0x76, 0x61, 0x6c, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x48, 0x00, 0x52, 0x06, + 0x73, 0x74, 0x72, 0x76, 0x61, 0x6c, 0x12, 0x18, 0x0a, 0x06, 0x6e, 0x75, 0x6d, 0x76, 0x61, 0x6c, + 0x18, 0x03, 0x20, 0x01, 0x28, 0x04, 0x48, 0x00, 0x52, 0x06, 0x6e, 0x75, 0x6d, 0x76, 0x61, 0x6c, + 0x42, 0x07, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x22, 0x83, 0x01, 0x0a, 0x0e, 0x50, 0x6f, + 0x6f, 0x6c, 0x53, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, + 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, + 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x32, + 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x18, 0x03, 0x20, 0x03, + 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x50, 0x72, + 0x6f, 0x70, 0x65, 0x72, 0x74, 0x79, 0x52, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, + 0x65, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, + 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, + 0x29, 0x0a, 0x0f, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, + 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x22, 0x83, 0x01, 0x0a, 0x0e, 0x50, + 0x6f, 0x6f, 0x6c, 0x47, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, + 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, + 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, + 0x32, 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x79, 0x52, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, - 0x69, 0x65, 0x73, 0x22, 0x4f, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x55, 0x70, 0x67, 0x72, 0x61, - 0x64, 0x65, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, - 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, - 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x81, 0x01, 0x0a, 0x12, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, - 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, - 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, - 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x12, 0x0a, - 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, - 0x6b, 0x12, 0x18, 0x0a, 0x07, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x18, 0x04, 0x20, 0x03, - 0x28, 0x0d, 0x52, 0x07, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x73, - 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, - 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x75, 0x0a, 0x12, 0x53, 0x74, 0x6f, 0x72, - 0x61, 0x67, 0x65, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x55, 0x73, 0x61, 0x67, 0x65, 0x12, 0x14, - 0x0a, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x04, 0x52, 0x05, 0x74, - 0x6f, 0x74, 0x61, 0x6c, 0x12, 0x12, 0x0a, 0x04, 0x66, 0x72, 0x65, 0x65, 0x18, 0x02, 0x20, 0x01, - 0x28, 0x04, 0x52, 0x04, 0x66, 0x72, 0x65, 0x65, 0x12, 0x35, 0x0a, 0x0a, 0x6d, 0x65, 0x64, 0x69, - 0x61, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x16, 0x2e, 0x6d, - 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x4d, 0x65, 0x64, 0x69, 0x61, - 0x54, 0x79, 0x70, 0x65, 0x52, 0x09, 0x6d, 0x65, 0x64, 0x69, 0x61, 0x54, 0x79, 0x70, 0x65, 0x22, - 0xb8, 0x02, 0x0a, 0x13, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, - 0x67, 0x65, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x12, 0x3b, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, - 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x25, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, - 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x49, 0x6e, 0x66, - 0x6f, 0x2e, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, 0x73, - 0x74, 0x61, 0x74, 0x65, 0x12, 0x2e, 0x0a, 0x05, 0x73, 0x70, 0x61, 0x63, 0x65, 0x18, 0x03, 0x20, - 0x03, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x61, - 0x67, 0x65, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x55, 0x73, 0x61, 0x67, 0x65, 0x52, 0x05, 0x73, - 0x70, 0x61, 0x63, 0x65, 0x12, 0x24, 0x0a, 0x0e, 0x6d, 0x65, 0x6d, 0x5f, 0x66, 0x69, 0x6c, 0x65, - 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x04, 0x52, 0x0c, 0x6d, 0x65, - 0x6d, 0x46, 0x69, 0x6c, 0x65, 0x42, 0x79, 0x74, 0x65, 0x73, 0x12, 0x27, 0x0a, 0x10, 0x6d, 0x64, - 0x5f, 0x6f, 0x6e, 0x5f, 0x73, 0x73, 0x64, 0x5f, 0x61, 0x63, 0x74, 0x69, 0x76, 0x65, 0x18, 0x05, - 0x20, 0x01, 0x28, 0x08, 0x52, 0x0d, 0x6d, 0x64, 0x4f, 0x6e, 0x53, 0x73, 0x64, 0x41, 0x63, 0x74, - 0x69, 0x76, 0x65, 0x22, 0x5f, 0x0a, 0x0b, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x53, 0x74, 0x61, - 0x74, 0x65, 0x12, 0x11, 0x0a, 0x0d, 0x53, 0x54, 0x41, 0x54, 0x45, 0x5f, 0x55, 0x4e, 0x4b, 0x4e, - 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, 0x0c, 0x0a, 0x08, 0x44, 0x4f, 0x57, 0x4e, 0x5f, 0x4f, 0x55, - 0x54, 0x10, 0x01, 0x12, 0x08, 0x0a, 0x04, 0x44, 0x4f, 0x57, 0x4e, 0x10, 0x02, 0x12, 0x06, 0x0a, - 0x02, 0x55, 0x50, 0x10, 0x03, 0x12, 0x09, 0x0a, 0x05, 0x55, 0x50, 0x5f, 0x49, 0x4e, 0x10, 0x04, - 0x12, 0x07, 0x0a, 0x03, 0x4e, 0x45, 0x57, 0x10, 0x05, 0x12, 0x09, 0x0a, 0x05, 0x44, 0x52, 0x41, - 0x49, 0x4e, 0x10, 0x06, 0x4a, 0x04, 0x08, 0x01, 0x10, 0x02, 0x22, 0x5e, 0x0a, 0x13, 0x50, 0x6f, - 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x52, 0x65, 0x73, - 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, - 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x2f, 0x0a, 0x05, 0x69, 0x6e, 0x66, - 0x6f, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, - 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x49, - 0x6e, 0x66, 0x6f, 0x52, 0x05, 0x69, 0x6e, 0x66, 0x6f, 0x73, 0x22, 0x54, 0x0a, 0x13, 0x50, 0x6f, - 0x6f, 0x6c, 0x52, 0x65, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x53, 0x74, 0x61, 0x72, 0x74, 0x52, 0x65, + 0x69, 0x65, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, + 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, + 0x22, 0x5d, 0x0a, 0x0f, 0x50, 0x6f, 0x6f, 0x6c, 0x47, 0x65, 0x74, 0x50, 0x72, 0x6f, 0x70, 0x52, + 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x32, 0x0a, 0x0a, 0x70, + 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, + 0x12, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x50, 0x72, 0x6f, 0x70, 0x65, + 0x72, 0x74, 0x79, 0x52, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x22, + 0x4f, 0x0a, 0x0e, 0x50, 0x6f, 0x6f, 0x6c, 0x55, 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, - 0x22, 0x69, 0x0a, 0x12, 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x65, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x53, - 0x74, 0x6f, 0x70, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, - 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x66, 0x6f, 0x72, 0x63, - 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x08, 0x52, 0x05, 0x66, 0x6f, 0x72, 0x63, 0x65, 0x12, 0x1b, - 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, - 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x76, 0x0a, 0x13, 0x50, - 0x6f, 0x6f, 0x6c, 0x53, 0x65, 0x6c, 0x66, 0x48, 0x65, 0x61, 0x6c, 0x45, 0x76, 0x61, 0x6c, 0x52, + 0x22, 0x81, 0x01, 0x0a, 0x12, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, + 0x72, 0x67, 0x65, 0x74, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, + 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x61, 0x6e, + 0x6b, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x72, 0x61, 0x6e, 0x6b, 0x12, 0x18, 0x0a, + 0x07, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x07, + 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, + 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, + 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x75, 0x0a, 0x12, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x54, + 0x61, 0x72, 0x67, 0x65, 0x74, 0x55, 0x73, 0x61, 0x67, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x74, 0x6f, + 0x74, 0x61, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x04, 0x52, 0x05, 0x74, 0x6f, 0x74, 0x61, 0x6c, + 0x12, 0x12, 0x0a, 0x04, 0x66, 0x72, 0x65, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x04, 0x52, 0x04, + 0x66, 0x72, 0x65, 0x65, 0x12, 0x35, 0x0a, 0x0a, 0x6d, 0x65, 0x64, 0x69, 0x61, 0x5f, 0x74, 0x79, + 0x70, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x16, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, + 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x4d, 0x65, 0x64, 0x69, 0x61, 0x54, 0x79, 0x70, 0x65, + 0x52, 0x09, 0x6d, 0x65, 0x64, 0x69, 0x61, 0x54, 0x79, 0x70, 0x65, 0x22, 0xb8, 0x02, 0x0a, 0x13, + 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x49, + 0x6e, 0x66, 0x6f, 0x12, 0x3b, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x02, 0x20, 0x01, + 0x28, 0x0e, 0x32, 0x25, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, + 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x2e, 0x54, 0x61, + 0x72, 0x67, 0x65, 0x74, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, + 0x12, 0x2e, 0x0a, 0x05, 0x73, 0x70, 0x61, 0x63, 0x65, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, + 0x18, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x54, 0x61, + 0x72, 0x67, 0x65, 0x74, 0x55, 0x73, 0x61, 0x67, 0x65, 0x52, 0x05, 0x73, 0x70, 0x61, 0x63, 0x65, + 0x12, 0x24, 0x0a, 0x0e, 0x6d, 0x65, 0x6d, 0x5f, 0x66, 0x69, 0x6c, 0x65, 0x5f, 0x62, 0x79, 0x74, + 0x65, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x04, 0x52, 0x0c, 0x6d, 0x65, 0x6d, 0x46, 0x69, 0x6c, + 0x65, 0x42, 0x79, 0x74, 0x65, 0x73, 0x12, 0x27, 0x0a, 0x10, 0x6d, 0x64, 0x5f, 0x6f, 0x6e, 0x5f, + 0x73, 0x73, 0x64, 0x5f, 0x61, 0x63, 0x74, 0x69, 0x76, 0x65, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, + 0x52, 0x0d, 0x6d, 0x64, 0x4f, 0x6e, 0x53, 0x73, 0x64, 0x41, 0x63, 0x74, 0x69, 0x76, 0x65, 0x22, + 0x5f, 0x0a, 0x0b, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x11, + 0x0a, 0x0d, 0x53, 0x54, 0x41, 0x54, 0x45, 0x5f, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, + 0x00, 0x12, 0x0c, 0x0a, 0x08, 0x44, 0x4f, 0x57, 0x4e, 0x5f, 0x4f, 0x55, 0x54, 0x10, 0x01, 0x12, + 0x08, 0x0a, 0x04, 0x44, 0x4f, 0x57, 0x4e, 0x10, 0x02, 0x12, 0x06, 0x0a, 0x02, 0x55, 0x50, 0x10, + 0x03, 0x12, 0x09, 0x0a, 0x05, 0x55, 0x50, 0x5f, 0x49, 0x4e, 0x10, 0x04, 0x12, 0x07, 0x0a, 0x03, + 0x4e, 0x45, 0x57, 0x10, 0x05, 0x12, 0x09, 0x0a, 0x05, 0x44, 0x52, 0x41, 0x49, 0x4e, 0x10, 0x06, + 0x4a, 0x04, 0x08, 0x01, 0x10, 0x02, 0x22, 0x5e, 0x0a, 0x13, 0x50, 0x6f, 0x6f, 0x6c, 0x51, 0x75, + 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, + 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, + 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x2f, 0x0a, 0x05, 0x69, 0x6e, 0x66, 0x6f, 0x73, 0x18, 0x02, + 0x20, 0x03, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x6d, 0x67, 0x6d, 0x74, 0x2e, 0x50, 0x6f, 0x6f, 0x6c, + 0x51, 0x75, 0x65, 0x72, 0x79, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x52, + 0x05, 0x69, 0x6e, 0x66, 0x6f, 0x73, 0x22, 0x54, 0x0a, 0x13, 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x65, + 0x62, 0x75, 0x69, 0x6c, 0x64, 0x53, 0x74, 0x61, 0x72, 0x74, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, + 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, + 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, + 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, + 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x69, 0x0a, 0x12, + 0x50, 0x6f, 0x6f, 0x6c, 0x52, 0x65, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x53, 0x74, 0x6f, 0x70, 0x52, 0x65, 0x71, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x02, 0x69, 0x64, 0x12, 0x20, 0x0a, 0x0c, 0x73, 0x79, 0x73, 0x5f, 0x70, 0x72, 0x6f, 0x70, - 0x5f, 0x76, 0x61, 0x6c, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x73, 0x79, 0x73, 0x50, - 0x72, 0x6f, 0x70, 0x56, 0x61, 0x6c, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, - 0x6e, 0x6b, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, - 0x6e, 0x6b, 0x73, 0x2a, 0x25, 0x0a, 0x10, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x4d, 0x65, - 0x64, 0x69, 0x61, 0x54, 0x79, 0x70, 0x65, 0x12, 0x07, 0x0a, 0x03, 0x53, 0x43, 0x4d, 0x10, 0x00, - 0x12, 0x08, 0x0a, 0x04, 0x4e, 0x56, 0x4d, 0x45, 0x10, 0x01, 0x2a, 0x5d, 0x0a, 0x10, 0x50, 0x6f, - 0x6f, 0x6c, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x0c, - 0x0a, 0x08, 0x43, 0x72, 0x65, 0x61, 0x74, 0x69, 0x6e, 0x67, 0x10, 0x00, 0x12, 0x09, 0x0a, 0x05, - 0x52, 0x65, 0x61, 0x64, 0x79, 0x10, 0x01, 0x12, 0x0e, 0x0a, 0x0a, 0x44, 0x65, 0x73, 0x74, 0x72, - 0x6f, 0x79, 0x69, 0x6e, 0x67, 0x10, 0x02, 0x12, 0x13, 0x0a, 0x0f, 0x54, 0x61, 0x72, 0x67, 0x65, - 0x74, 0x73, 0x45, 0x78, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x64, 0x10, 0x03, 0x12, 0x0b, 0x0a, 0x07, - 0x55, 0x6e, 0x6b, 0x6e, 0x6f, 0x77, 0x6e, 0x10, 0x04, 0x42, 0x3a, 0x5a, 0x38, 0x67, 0x69, 0x74, - 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2d, 0x73, 0x74, 0x61, - 0x63, 0x6b, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2f, 0x73, 0x72, 0x63, 0x2f, 0x63, 0x6f, 0x6e, 0x74, - 0x72, 0x6f, 0x6c, 0x2f, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, - 0x2f, 0x6d, 0x67, 0x6d, 0x74, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x52, 0x02, 0x69, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x66, 0x6f, 0x72, 0x63, 0x65, 0x18, 0x03, 0x20, + 0x01, 0x28, 0x08, 0x52, 0x05, 0x66, 0x6f, 0x72, 0x63, 0x65, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, + 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, + 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x22, 0x76, 0x0a, 0x13, 0x50, 0x6f, 0x6f, 0x6c, 0x53, + 0x65, 0x6c, 0x66, 0x48, 0x65, 0x61, 0x6c, 0x45, 0x76, 0x61, 0x6c, 0x52, 0x65, 0x71, 0x12, 0x10, + 0x0a, 0x03, 0x73, 0x79, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x79, 0x73, + 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, + 0x12, 0x20, 0x0a, 0x0c, 0x73, 0x79, 0x73, 0x5f, 0x70, 0x72, 0x6f, 0x70, 0x5f, 0x76, 0x61, 0x6c, + 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x73, 0x79, 0x73, 0x50, 0x72, 0x6f, 0x70, 0x56, + 0x61, 0x6c, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x76, 0x63, 0x5f, 0x72, 0x61, 0x6e, 0x6b, 0x73, 0x18, + 0x05, 0x20, 0x03, 0x28, 0x0d, 0x52, 0x08, 0x73, 0x76, 0x63, 0x52, 0x61, 0x6e, 0x6b, 0x73, 0x2a, + 0x25, 0x0a, 0x10, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x4d, 0x65, 0x64, 0x69, 0x61, 0x54, + 0x79, 0x70, 0x65, 0x12, 0x07, 0x0a, 0x03, 0x53, 0x43, 0x4d, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, + 0x4e, 0x56, 0x4d, 0x45, 0x10, 0x01, 0x2a, 0x5d, 0x0a, 0x10, 0x50, 0x6f, 0x6f, 0x6c, 0x53, 0x65, + 0x72, 0x76, 0x69, 0x63, 0x65, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x0c, 0x0a, 0x08, 0x43, 0x72, + 0x65, 0x61, 0x74, 0x69, 0x6e, 0x67, 0x10, 0x00, 0x12, 0x09, 0x0a, 0x05, 0x52, 0x65, 0x61, 0x64, + 0x79, 0x10, 0x01, 0x12, 0x0e, 0x0a, 0x0a, 0x44, 0x65, 0x73, 0x74, 0x72, 0x6f, 0x79, 0x69, 0x6e, + 0x67, 0x10, 0x02, 0x12, 0x13, 0x0a, 0x0f, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x45, 0x78, + 0x63, 0x6c, 0x75, 0x64, 0x65, 0x64, 0x10, 0x03, 0x12, 0x0b, 0x0a, 0x07, 0x55, 0x6e, 0x6b, 0x6e, + 0x6f, 0x77, 0x6e, 0x10, 0x04, 0x42, 0x3a, 0x5a, 0x38, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, + 0x63, 0x6f, 0x6d, 0x2f, 0x64, 0x61, 0x6f, 0x73, 0x2d, 0x73, 0x74, 0x61, 0x63, 0x6b, 0x2f, 0x64, + 0x61, 0x6f, 0x73, 0x2f, 0x73, 0x72, 0x63, 0x2f, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x2f, + 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x6d, 0x67, 0x6d, + 0x74, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( diff --git a/src/control/lib/daos/api/pool.go b/src/control/lib/daos/api/pool.go index 9eac8929555..4644ed243ba 100644 --- a/src/control/lib/daos/api/pool.go +++ b/src/control/lib/daos/api/pool.go @@ -123,10 +123,11 @@ func newPoolRebuildStatus(drs *C.struct_daos_rebuild_status) *daos.PoolRebuildSt } return &daos.PoolRebuildStatus{ - Status: int32(drs.rs_errno), - Objects: uint64(drs.rs_obj_nr), - Records: uint64(drs.rs_rec_nr), - State: compatRebuildState(), + Status: int32(drs.rs_errno), + Objects: uint64(drs.rs_obj_nr), + Records: uint64(drs.rs_rec_nr), + State: compatRebuildState(), + Degraded: (drs.rs_flags & C.DAOS_RSF_DEGRADED) != 0, } } diff --git a/src/control/lib/daos/pool.go b/src/control/lib/daos/pool.go index 88dc167058d..f6d7835d948 100644 --- a/src/control/lib/daos/pool.go +++ b/src/control/lib/daos/pool.go @@ -61,6 +61,7 @@ type ( Objects uint64 `json:"objects"` Records uint64 `json:"records"` TotalObjects uint64 `json:"total_objects"` + Degraded bool `json:"degraded"` } // PoolInfo contains information about the pool. diff --git a/src/include/daos_pool.h b/src/include/daos_pool.h index 0752278c458..35c018c93c6 100644 --- a/src/include/daos_pool.h +++ b/src/include/daos_pool.h @@ -1,6 +1,6 @@ /** * (C) Copyright 2020-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -94,6 +94,12 @@ enum daos_rebuild_state_t { DRS_COMPLETED = 2, }; +/** For daos_rebuild_status.rs_flags */ +enum daos_rebuild_status_flag { + /** Data redundancy degraded (the pool has one or more DOWN targets) */ + DAOS_RSF_DEGRADED = (1 << 0), +}; + /** Pool rebuild status */ struct daos_rebuild_status { /** pool map version in rebuilding or last completed rebuild */ @@ -111,8 +117,10 @@ struct daos_rebuild_status { }; /** Maximum supported layout version */ uint16_t rs_max_supported_layout_ver; - /** padding of rebuild status */ - int16_t rs_padding16; + /** See daos_rebuild_status_flag. */ + uint8_t rs_flags; + /** Do not access this field by name. */ + uint8_t rs_reserved_; /** Failure on which rank */ int32_t rs_fail_rank; diff --git a/src/mgmt/pool.pb-c.c b/src/mgmt/pool.pb-c.c index 82c188b5609..301e074e0d8 100644 --- a/src/mgmt/pool.pb-c.c +++ b/src/mgmt/pool.pb-c.c @@ -3365,7 +3365,7 @@ const ProtobufCEnumDescriptor mgmt__pool_rebuild_status__state__descriptor = mgmt__pool_rebuild_status__state__value_ranges, NULL,NULL,NULL,NULL /* reserved[1234] */ }; -static const ProtobufCFieldDescriptor mgmt__pool_rebuild_status__field_descriptors[5] = +static const ProtobufCFieldDescriptor mgmt__pool_rebuild_status__field_descriptors[6] = { { "status", @@ -3427,8 +3427,21 @@ static const ProtobufCFieldDescriptor mgmt__pool_rebuild_status__field_descripto 0, /* flags */ 0,NULL,NULL /* reserved1,reserved2, etc */ }, + { + "degraded", + 6, + PROTOBUF_C_LABEL_NONE, + PROTOBUF_C_TYPE_BOOL, + 0, /* quantifier_offset */ + offsetof(Mgmt__PoolRebuildStatus, degraded), + NULL, + NULL, + 0, /* flags */ + 0,NULL,NULL /* reserved1,reserved2, etc */ + }, }; static const unsigned mgmt__pool_rebuild_status__field_indices_by_name[] = { + 5, /* field[5] = degraded */ 4, /* field[4] = derived_state */ 2, /* field[2] = objects */ 3, /* field[3] = records */ @@ -3438,7 +3451,7 @@ static const unsigned mgmt__pool_rebuild_status__field_indices_by_name[] = { static const ProtobufCIntRange mgmt__pool_rebuild_status__number_ranges[1 + 1] = { { 1, 0 }, - { 0, 5 } + { 0, 6 } }; const ProtobufCMessageDescriptor mgmt__pool_rebuild_status__descriptor = { @@ -3448,7 +3461,7 @@ const ProtobufCMessageDescriptor mgmt__pool_rebuild_status__descriptor = "Mgmt__PoolRebuildStatus", "mgmt", sizeof(Mgmt__PoolRebuildStatus), - 5, + 6, mgmt__pool_rebuild_status__field_descriptors, mgmt__pool_rebuild_status__field_indices_by_name, 1, mgmt__pool_rebuild_status__number_ranges, diff --git a/src/mgmt/pool.pb-c.h b/src/mgmt/pool.pb-c.h index a41ebb658ce..a8596043bd9 100644 --- a/src/mgmt/pool.pb-c.h +++ b/src/mgmt/pool.pb-c.h @@ -775,10 +775,14 @@ struct _Mgmt__PoolRebuildStatus uint64_t objects; uint64_t records; Mgmt__PoolRebuildStatus__State derived_state; + /* + * data redundancy degraded + */ + protobuf_c_boolean degraded; }; #define MGMT__POOL_REBUILD_STATUS__INIT \ { PROTOBUF_C_MESSAGE_INIT (&mgmt__pool_rebuild_status__descriptor) \ - , 0, MGMT__POOL_REBUILD_STATUS__STATE__BUSY, 0, 0, MGMT__POOL_REBUILD_STATUS__STATE__BUSY } + , 0, MGMT__POOL_REBUILD_STATUS__STATE__BUSY, 0, 0, MGMT__POOL_REBUILD_STATUS__STATE__BUSY, 0 } /* diff --git a/src/mgmt/srv_drpc.c b/src/mgmt/srv_drpc.c index 2fedf826efd..6145dadc998 100644 --- a/src/mgmt/srv_drpc.c +++ b/src/mgmt/srv_drpc.c @@ -1764,6 +1764,8 @@ pool_rebuild_status_from_info(Mgmt__PoolRebuildStatus *rebuild, rebuild->state = MGMT__POOL_REBUILD_STATUS__STATE__DONE; else rebuild->state = MGMT__POOL_REBUILD_STATUS__STATE__BUSY; + + rebuild->degraded = !!(info->rs_flags & DAOS_RSF_DEGRADED); } static void diff --git a/src/pool/srv_pool.c b/src/pool/srv_pool.c index f3c7e62ddfc..2a9311e81f4 100644 --- a/src/pool/srv_pool.c +++ b/src/pool/srv_pool.c @@ -4156,6 +4156,26 @@ bulk_cb(const struct crt_bulk_cb_info *cb_info) return 0; } +static int +pool_query_set_rebuild_status_degraded(struct pool_svc *svc, struct daos_rebuild_status *rebuild_st) +{ + unsigned int down_tgts = 0; + int rc; + + ABT_rwlock_rdlock(svc->ps_pool->sp_lock); + rc = pool_map_find_down_tgts(svc->ps_pool->sp_map, NULL /* tgt_pp */, &down_tgts); + ABT_rwlock_unlock(svc->ps_pool->sp_lock); + if (rc != 0) + return rc; + + if (down_tgts > 0) + rebuild_st->rs_flags |= DAOS_RSF_DEGRADED; + else + rebuild_st->rs_flags &= ~DAOS_RSF_DEGRADED; + + return 0; +} + /* Currently we only maintain compatibility between 2 metadata layout versions */ #define NUM_POOL_VERSIONS 2 @@ -4387,6 +4407,12 @@ pool_connect_handler(crt_rpc_t *rpc, int handler_version) goto out_map_version; } + if (query_bits & DAOS_PO_QUERY_REBUILD_STATUS) { + rc = pool_query_set_rebuild_status_degraded(svc, &out->pco_rebuild_st); + if (rc != 0) + goto out_map_version; + } + transfer_map = true; if (skip_update) D_GOTO(out_map_version, rc = 0); @@ -5449,6 +5475,12 @@ pool_query_handler(crt_rpc_t *rpc, int handler_version) } } + if (query_bits & DAOS_PO_QUERY_REBUILD_STATUS) { + rc = pool_query_set_rebuild_status_degraded(svc, &out->pqo_rebuild_st); + if (rc != 0) + goto out_lock; + } + out_lock: ABT_rwlock_unlock(svc->ps_lock); rdb_tx_end(&tx); diff --git a/src/proto/mgmt/pool.proto b/src/proto/mgmt/pool.proto index 728fc112cbe..5fc7c8762c5 100644 --- a/src/proto/mgmt/pool.proto +++ b/src/proto/mgmt/pool.proto @@ -216,6 +216,7 @@ message PoolRebuildStatus { uint64 objects = 3; uint64 records = 4; State derived_state = 5; + bool degraded = 6; // data redundancy degraded } enum PoolServiceState { diff --git a/src/rebuild/README.md b/src/rebuild/README.md index 33f6c32d7ca..61407959f60 100644 --- a/src/rebuild/README.md +++ b/src/rebuild/README.md @@ -206,8 +206,10 @@ struct daos_rebuild_status { /** Maximum supported layout version */ uint16_t rs_max_supported_layout_ver; - /** padding of rebuild status */ - int16_t rs_padding16; + /** See daos_rebuild_status_flag. */ + uint8_t rs_flags; + /** Do not access this field by name. */ + uint8_t rs_reserved_; /* Failure on which rank */ int32_t rs_fail_rank; diff --git a/src/tests/ftest/control/dmg_pool_query_test.py b/src/tests/ftest/control/dmg_pool_query_test.py index bbb4ba30d9d..f2d8b544f09 100644 --- a/src/tests/ftest/control/dmg_pool_query_test.py +++ b/src/tests/ftest/control/dmg_pool_query_test.py @@ -80,7 +80,8 @@ def test_pool_query_basic(self): "derived_state": self.params.get("state", path="/run/exp_vals/rebuild/*"), "objects": self.params.get("objects", path="/run/exp_vals/rebuild/*"), "records": self.params.get("records", path="/run/exp_vals/rebuild/*"), - "total_objects": self.params.get("total_objects", path="/run/exp_vals/rebuild/*") + "total_objects": self.params.get("total_objects", path="/run/exp_vals/rebuild/*"), + 'degraded': self.params.get("degraded", path="/run/exp_vals/rebuild/*") }, "tier_stats": [ { diff --git a/src/tests/ftest/control/dmg_pool_query_test.yaml b/src/tests/ftest/control/dmg_pool_query_test.yaml index 6217024b764..17d5df77bad 100644 --- a/src/tests/ftest/control/dmg_pool_query_test.yaml +++ b/src/tests/ftest/control/dmg_pool_query_test.yaml @@ -46,6 +46,7 @@ exp_vals: objects: 0 records: 0 total_objects: 0 + degraded: False pool_uuids: uuids: diff --git a/src/tests/ftest/pool/list_verbose.py b/src/tests/ftest/pool/list_verbose.py index 9f784d2d197..3928b86de81 100644 --- a/src/tests/ftest/pool/list_verbose.py +++ b/src/tests/ftest/pool/list_verbose.py @@ -25,7 +25,7 @@ class ListVerboseTest(IorTestBase): def create_expected(self, pool, scm_free, nvme_free, scm_imbalance, nvme_imbalance, targets_disabled=0, scm_size=None, nvme_size=None, state=None, rebuild_state=None, - ranks_disabled=None): + ranks_disabled=None, rebuild_degraded=False): # pylint: disable=too-many-arguments """Create expected dmg pool list output to compare against the actual. @@ -42,6 +42,8 @@ def create_expected(self, pool, scm_free, nvme_free, scm_imbalance, state (str, optional): Expected pool state. Defaults to None. rebuild_state (str, optional): Expected pool rebuild state. Defaults to None. ranks_disabled (list, optional): List of disabled ranks. Defaults to None. + rebuild_degraded (bool, optional): Whether rebuild status flag `degraded` is set. + Defaults to False. Returns: dict: Expected in the same format of actual. @@ -83,7 +85,8 @@ def create_expected(self, pool, scm_free, nvme_free, scm_imbalance, "derived_state": rebuild_state, "objects": 0, "records": 0, - "total_objects": 0 + "total_objects": 0, + 'degraded': rebuild_degraded }, "self_heal_policy": "", # NB: tests should not expect min/max/mean values @@ -186,7 +189,7 @@ def verify_scm_size(self, actual, created, rank_count): self.assertTrue(diff < threshold, msg) def verify_pool_lists(self, targets_disabled, scm_size, nvme_size, state, rebuild_state, - ranks_disabled): + ranks_disabled, rebuild_degraded): """Call dmg pool list and verify. self.pool should be a list. The elements of the inputs should @@ -199,6 +202,7 @@ def verify_pool_lists(self, targets_disabled, scm_size, nvme_size, state, rebuil state (list): List of pool state for pools. rebuild_state (list): List of pool rebuild state for pools. ranks_disabled (list): List of disabled ranks for pools. + rebuild_degraded (list): List of rebuild status flag `degraded` for pools. Returns: list: a list of dictionaries containing information for each pool from the dmg @@ -245,7 +249,8 @@ def verify_pool_lists(self, targets_disabled, scm_size, nvme_size, state, rebuil nvme_size=nvme_size[index], state=state[index], rebuild_state=rebuild_state[index], - ranks_disabled=ranks_disabled[index])) + ranks_disabled=ranks_disabled[index], + rebuild_degraded=rebuild_degraded[index])) # Sort pools by UUID. actual_pools.sort(key=lambda item: item.get("uuid")) @@ -312,9 +317,11 @@ def test_fields_basic(self): state = ["Ready"] rebuild_state = ["idle"] ranks_disabled = [[]] + rebuild_degraded = [False] self.verify_pool_lists( targets_disabled=targets_disabled, scm_size=scm_size, nvme_size=nvme_size, - state=state, rebuild_state=rebuild_state, ranks_disabled=ranks_disabled) + state=state, rebuild_state=rebuild_state, ranks_disabled=ranks_disabled, + rebuild_degraded=rebuild_degraded) # 3. Create second pool. self.log_step("Create second pool") @@ -329,9 +336,11 @@ def test_fields_basic(self): state.append("Ready") rebuild_state.append("idle") ranks_disabled.append([]) + rebuild_degraded.append(False) self.verify_pool_lists( targets_disabled=targets_disabled, scm_size=scm_size, nvme_size=nvme_size, - state=state, rebuild_state=rebuild_state, ranks_disabled=ranks_disabled) + state=state, rebuild_state=rebuild_state, ranks_disabled=ranks_disabled, + rebuild_degraded=rebuild_degraded) # 5. Exclude target 7 in rank 1 of pool 1. self.log_step("Exclude target 7 in rank 1 of pool 1") @@ -349,10 +358,12 @@ def test_fields_basic(self): nvme_size[0] = reduced_nvme_size state[0] = "TargetsExcluded" rebuild_state[0] = "busy" + rebuild_degraded[0] = True self.verify_pool_lists( targets_disabled=targets_disabled, scm_size=scm_size, nvme_size=nvme_size, - state=state, rebuild_state=rebuild_state, ranks_disabled=ranks_disabled) + state=state, rebuild_state=rebuild_state, ranks_disabled=ranks_disabled, + rebuild_degraded=rebuild_degraded) # 7-11. Destroy and verify until the pools are gone. while self.pool: @@ -365,10 +376,12 @@ def test_fields_basic(self): scm_size.pop() nvme_size.pop() ranks_disabled.pop() + rebuild_degraded.pop() self.verify_pool_lists( targets_disabled=targets_disabled, scm_size=scm_size, nvme_size=nvme_size, - state=state, rebuild_state=rebuild_state, ranks_disabled=ranks_disabled) + state=state, rebuild_state=rebuild_state, ranks_disabled=ranks_disabled, + rebuild_degraded=rebuild_degraded) def verify_used_imbalance(self, storage): """Verification steps for test_used_imbalance. @@ -396,9 +409,11 @@ def verify_used_imbalance(self, storage): state = ["Ready"] rebuild_state = ["idle"] ranks_disabled = [[]] + rebuild_degraded = [False] actual_pools_before = self.verify_pool_lists( targets_disabled=targets_disabled, scm_size=scm_size, nvme_size=nvme_size, - state=state, rebuild_state=rebuild_state, ranks_disabled=ranks_disabled) + state=state, rebuild_state=rebuild_state, ranks_disabled=ranks_disabled, + rebuild_degraded=rebuild_degraded) # 3. Store free. free_before, _ = self.get_free_imbalance(actual_pools_before[0], storage) @@ -416,7 +431,8 @@ def verify_used_imbalance(self, storage): # obtained from actual. actual_pools_after = self.verify_pool_lists( targets_disabled=targets_disabled, scm_size=scm_size, nvme_size=nvme_size, - state=state, rebuild_state=rebuild_state, ranks_disabled=ranks_disabled) + state=state, rebuild_state=rebuild_state, ranks_disabled=ranks_disabled, + rebuild_degraded=rebuild_degraded) # Obtain the new free and imbalance. free_after, imbalance_after = self.get_free_imbalance( diff --git a/src/tests/ftest/util/test_utils_pool.py b/src/tests/ftest/util/test_utils_pool.py index a863c38cfdd..8013791f506 100644 --- a/src/tests/ftest/util/test_utils_pool.py +++ b/src/tests/ftest/util/test_utils_pool.py @@ -1,6 +1,6 @@ """ (C) Copyright 2018-2024 Intel Corporation. - (C) Copyright 2025 Hewlett Packard Enterprise Development LP + (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -1049,7 +1049,7 @@ def check_free_space(self, expected_scm=None, expected_nvme=None, timeout=30, in return False def check_rebuild_status(self, rs_version=None, rs_seconds=None, - rs_errno=None, rs_state=None, rs_padding16=None, + rs_errno=None, rs_state=None, rs_flags=None, rs_fail_rank=None, rs_toberb_obj_nr=None, rs_obj_nr=None, rs_rec_nr=None, rs_size=None): # pylint: disable=unused-argument @@ -1066,7 +1066,7 @@ def check_rebuild_status(self, rs_version=None, rs_seconds=None, rs_seconds (int, optional): rebuild seconds. Defaults to None. rs_errno (int, optional): rebuild error number. Defaults to None. rs_state (int, optional): rebuild state flag. Defaults to None. - rs_padding16 (int, optional): padding. Defaults to None. + rs_flags (int, optional): rebuild status flags. Defaults to None. rs_fail_rank (int, optional): rebuild fail target. Defaults to None. rs_toberb_obj_nr (int, optional): number of objects to be rebuilt. Defaults to None. From 9b9971fb55220813259e57aadcfe91a70c4db601 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 2 Feb 2026 12:20:37 -0800 Subject: [PATCH 171/253] DAOS-18553 cq: Bump github/codeql-action to 4.32.0 (#17480) Updates `github/codeql-action` from 4.31.11 to 4.32.0 Signed-off-by: dependabot[bot] --- .github/workflows/ossf-scorecard.yml | 2 +- .github/workflows/trivy.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ossf-scorecard.yml b/.github/workflows/ossf-scorecard.yml index 6ce378da7da..4fb3da4ec81 100644 --- a/.github/workflows/ossf-scorecard.yml +++ b/.github/workflows/ossf-scorecard.yml @@ -71,6 +71,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard (optional). # Commenting out will disable upload of results to your repo's Code Scanning dashboard - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@19b2f06db2b6f5108140aeb04014ef02b648f789 # v4.31.11 + uses: github/codeql-action/upload-sarif@b20883b0cd1f46c72ae0ba6d1090936928f9fa30 # v4.32.0 with: sarif_file: results.sarif diff --git a/.github/workflows/trivy.yml b/.github/workflows/trivy.yml index 4da0b87d1fb..e4c0103f69d 100644 --- a/.github/workflows/trivy.yml +++ b/.github/workflows/trivy.yml @@ -68,7 +68,7 @@ jobs: trivy-config: 'utils/trivy/trivy.yaml' - name: Upload Trivy scan results to GitHub Security tab - uses: github/codeql-action/upload-sarif@19b2f06db2b6f5108140aeb04014ef02b648f789 # v4.31.11 + uses: github/codeql-action/upload-sarif@b20883b0cd1f46c72ae0ba6d1090936928f9fa30 # v4.32.0 with: sarif_file: 'trivy-results.sarif' From c514c3380c56f31366675ac679482c96a8db87a4 Mon Sep 17 00:00:00 2001 From: Alexander Oganezov Date: Tue, 3 Feb 2026 09:21:01 -0800 Subject: [PATCH 172/253] DAOS-18527 cart: Handling of string env limits (#17466) - CRT_ENV_STR_MAX_SIZE introduced, set to 1024. All string envs are checked against this limit at the data_init() time. - Additional checks added for domain/interface not to exceed this limit, as those can be passed via crt_init_opt. Signed-off-by: Alexander A Oganezov --- src/cart/crt_init.c | 47 ++++++++++++++++++++++++-------- src/cart/crt_internal_types.h | 50 ++++++++++++++++++++++++++++++----- 2 files changed, 79 insertions(+), 18 deletions(-) diff --git a/src/cart/crt_init.c b/src/cart/crt_init.c index 21f9ea08891..e4556b8693d 100644 --- a/src/cart/crt_init.c +++ b/src/cart/crt_init.c @@ -1,7 +1,7 @@ /* * (C) Copyright 2016-2024 Intel Corporation. * (C) Copyright 2025 Google LLC - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -274,6 +274,9 @@ data_init(int server, crt_init_options_t *opt) crt_env_dump(); + if (!crt_env_list_valid()) + return -DER_INVAL; + /* Set context post init / post incr to tune number of pre-posted recvs */ crt_env_get(D_POST_INIT, &post_init); crt_gdata.cg_post_init = post_init; @@ -440,7 +443,7 @@ __split_arg(char *s_arg_to_split, const char *delim, char **first_arg, char **se return DER_SUCCESS; } - D_STRNDUP(arg_to_split, s_arg_to_split, 255); + D_STRNDUP(arg_to_split, s_arg_to_split, CRT_ENV_STR_MAX_SIZE); if (!arg_to_split) { *first_arg = NULL; *second_arg = NULL; @@ -793,25 +796,25 @@ crt_init_opt(crt_group_id_t grpid, uint32_t flags, crt_init_options_t *opt) * and processed in crt_na_config_init(). */ if (interface) { - D_STRNDUP(iface0, interface, 255); + D_STRNDUP(iface0, interface, CRT_ENV_STR_MAX_SIZE); if (!iface0) D_GOTO(unlock, rc = -DER_NOMEM); } if (domain) { - D_STRNDUP(domain0, domain, 255); + D_STRNDUP(domain0, domain, CRT_ENV_STR_MAX_SIZE); if (!domain0) D_GOTO(unlock, rc = -DER_NOMEM); } if (port) { - D_STRNDUP(port0, port, 255); + D_STRNDUP(port0, port, CRT_ENV_STR_MAX_SIZE); if (!port0) D_GOTO(unlock, rc = -DER_NOMEM); } if (auth_key) { - D_STRNDUP(auth_key0, auth_key, 255); + D_STRNDUP(auth_key0, auth_key, CRT_ENV_STR_MAX_SIZE); if (!auth_key0) D_GOTO(unlock, rc = -DER_NOMEM); } @@ -1190,24 +1193,34 @@ crt_na_config_init(bool primary, crt_provider_t provider, char *interface, char } if (interface) { - D_STRNDUP(na_cfg->noc_interface, interface, 64); + D_STRNDUP(na_cfg->noc_interface, interface, CRT_ENV_STR_MAX_SIZE); if (!na_cfg->noc_interface) D_GOTO(out, rc = -DER_NOMEM); } if (domain) { - D_STRNDUP(na_cfg->noc_domain, domain, 64); + D_STRNDUP(na_cfg->noc_domain, domain, CRT_ENV_STR_MAX_SIZE); if (!na_cfg->noc_domain) D_GOTO(out, rc = -DER_NOMEM); } if (auth_key) { - D_STRNDUP(na_cfg->noc_auth_key, auth_key, 255); + D_STRNDUP(na_cfg->noc_auth_key, auth_key, CRT_ENV_STR_MAX_SIZE); if (!na_cfg->noc_auth_key) D_GOTO(out, rc = -DER_NOMEM); } if (na_cfg->noc_interface) { + /* + * env checks limit strings to CRT_ENV_STR_MAX_SIZE, but an interface can + * be passed as an init argument + */ + if (strlen(na_cfg->noc_interface) + 1 >= CRT_ENV_STR_MAX_SIZE) { + D_ERROR("Interface value '%s' exceeds limit of %d characters\n", + na_cfg->noc_interface, CRT_ENV_STR_MAX_SIZE); + D_GOTO(out, rc = -DER_INVAL); + } + /* count number of ','-separated interfaces */ count = 1; save_ptr = na_cfg->noc_interface; @@ -1227,6 +1240,7 @@ crt_na_config_init(bool primary, crt_provider_t provider, char *interface, char idx = 0; token = strtok_r(na_cfg->noc_interface, ",", &save_ptr); while (token != NULL) { + /* TODO: If needed add filtering for duplicate interfaces here */ na_cfg->noc_iface_str[idx] = token; token = strtok_r(NULL, ",", &save_ptr); idx++; @@ -1238,6 +1252,16 @@ crt_na_config_init(bool primary, crt_provider_t provider, char *interface, char count = 0; if (na_cfg->noc_domain) { + /* + * env checks limit strings to CRT_ENV_STR_MAX_SIZE, but a domain can + * be passed as an init argument + */ + if (strlen(na_cfg->noc_domain) + 1 >= CRT_ENV_STR_MAX_SIZE) { + D_ERROR("Domain value '%s' exceeds limit of %d characters\n", + na_cfg->noc_domain, CRT_ENV_STR_MAX_SIZE); + D_GOTO(out, rc = -DER_INVAL); + } + /* count number of ','-separated domains */ count = 1; save_ptr = na_cfg->noc_domain; @@ -1267,8 +1291,9 @@ crt_na_config_init(bool primary, crt_provider_t provider, char *interface, char na_cfg->noc_domain_total = count; if (na_cfg->noc_domain_total > 0 && na_cfg->noc_domain_total != na_cfg->noc_iface_total) { - D_ERROR("Mismatched number of domains (%d) and interfaces (%d) specified\n", - na_cfg->noc_domain_total, na_cfg->noc_iface_total); + D_ERROR("Mismatched # of domains [%d]='%s' and interfaces [%d]='%s' specified\n", + na_cfg->noc_domain_total, na_cfg->noc_domain, na_cfg->noc_iface_total, + na_cfg->noc_interface); D_GOTO(out, rc = -DER_INVAL); } diff --git a/src/cart/crt_internal_types.h b/src/cart/crt_internal_types.h index 472d266fb06..9690213737b 100644 --- a/src/cart/crt_internal_types.h +++ b/src/cart/crt_internal_types.h @@ -1,7 +1,7 @@ /* * (C) Copyright 2016-2024 Intel Corporation. * (C) Copyright 2025 Google LLC - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -190,6 +190,8 @@ struct crt_event_cb_priv { #define CRT_CALLBACKS_NUM (4) /* start number of CBs */ #endif +#define CRT_ENV_STR_MAX_SIZE 1024 + /* * List of environment variables to read at CaRT library load time. * for integer envs use ENV() @@ -251,13 +253,13 @@ struct crt_event_cb_priv { #define ENV(x) \ unsigned int _##x; \ int _rc_##x; \ - int _no_print_##x; + bool _no_print_##x; /* char* env */ #define ENV_STR(x) \ char *_##x; \ int _rc_##x; \ - int _no_print_##x; + bool _no_print_##x; #define ENV_STR_NO_PRINT(x) ENV_STR(x) @@ -275,6 +277,7 @@ extern struct crt_envs crt_genvs; static inline void crt_env_fini(void); +/* init cart env structure */ static inline void crt_env_init(void) { @@ -285,19 +288,19 @@ crt_env_init(void) #define ENV(x) \ do { \ crt_genvs._rc_##x = d_getenv_uint(#x, &crt_genvs._##x); \ - crt_genvs._no_print_##x = 0; \ + crt_genvs._no_print_##x = false; \ } while (0); #define ENV_STR(x) \ do { \ crt_genvs._rc_##x = d_agetenv_str(&crt_genvs._##x, #x); \ - crt_genvs._no_print_##x = 0; \ + crt_genvs._no_print_##x = false; \ } while (0); #define ENV_STR_NO_PRINT(x) \ do { \ crt_genvs._rc_##x = d_agetenv_str(&crt_genvs._##x, #x); \ - crt_genvs._no_print_##x = 1; \ + crt_genvs._no_print_##x = true; \ } while (0); CRT_ENV_LIST; @@ -308,6 +311,7 @@ crt_env_init(void) crt_genvs.inited = true; } +/* fini cart envs */ static inline void crt_env_fini(void) { @@ -324,7 +328,7 @@ crt_env_fini(void) crt_genvs.inited = false; } -/* Returns value if env was present at load time */ +/* Returns value if env was present at load time and is part of CRT_ENV_LIST */ #define crt_env_get(name, val) \ do { \ D_ASSERT(crt_genvs.inited); \ @@ -332,6 +336,38 @@ crt_env_fini(void) *val = crt_genvs._##name; \ } while (0) +/* Check envs that contain strings to not exceed CRT_ENV_STR_MAX_SIZE */ +static inline bool +crt_env_list_valid(void) +{ +/* Ignore non-string envs in this check */ +#define ENV(x) + +/* if string env exceeds CRT_ENV_STR_MAX_SIZE - return false */ +#define ENV_STR(x) \ + if (crt_genvs._rc_##x == 0 && strlen(crt_genvs._##x) + 1 > CRT_ENV_STR_MAX_SIZE) { \ + D_ERROR("env '%s' (value='%s') exceeded max size %d\n", #x, crt_genvs._##x, \ + CRT_ENV_STR_MAX_SIZE); \ + return false; \ + } + +/* if string env exceeds CRT_ENV_STR_MAX_SIZE - return false */ +#define ENV_STR_NO_PRINT(x) \ + if (crt_genvs._rc_##x == 0 && strlen(crt_genvs._##x) + 1 > CRT_ENV_STR_MAX_SIZE) { \ + D_ERROR("env '%s' exceeded max size %d\n", #x, CRT_ENV_STR_MAX_SIZE); \ + return false; \ + } + + /* expand env list using the above ENV_* definitions */ + CRT_ENV_LIST; + return true; + +#undef ENV +#undef ENV_STR +#undef ENV_STR_NO_PRINT +} + +/* dump environment variables from the CRT_ENV_LIST */ static inline void crt_env_dump(void) { From fb8e92fd85318772954620b1654c56b474726e64 Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Tue, 3 Feb 2026 12:49:43 -0500 Subject: [PATCH 173/253] DAOS-18519 test: Update test pool size for new defaults (#17465) The default BS cluster size was increased from 32MB -> 128MB in MD on SSD mode. Updating the test pool sizes to work with this new default. Signed-off-by: Phil Henderson --- src/tests/ftest/container/boundary.yaml | 4 +-- src/tests/ftest/nvme/health.py | 32 ++++++++++++++++++++-- src/tests/ftest/telemetry/wal_metrics.yaml | 2 +- 3 files changed, 33 insertions(+), 5 deletions(-) diff --git a/src/tests/ftest/container/boundary.yaml b/src/tests/ftest/container/boundary.yaml index 2e3e7257aa9..8dfd797a69e 100644 --- a/src/tests/ftest/container/boundary.yaml +++ b/src/tests/ftest/container/boundary.yaml @@ -24,7 +24,7 @@ server_config: storage: auto pool: - scm_size: 200M + scm_size: 512M label: pool set_logmasks: False @@ -44,7 +44,7 @@ boundary_test: !mux num_containers: 30000 with_io: false test_2: - num_pools: 100 + num_pools: 45 num_containers: 200 with_io: false test_with_io: diff --git a/src/tests/ftest/nvme/health.py b/src/tests/ftest/nvme/health.py index d23cb8427a4..16460bf680b 100644 --- a/src/tests/ftest/nvme/health.py +++ b/src/tests/ftest/nvme/health.py @@ -1,5 +1,6 @@ ''' (C) Copyright 2020-2024 Intel Corporation. + (C) Copyright 2026 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent ''' @@ -47,24 +48,51 @@ def test_monitor_for_large_pools(self): # Calculate the potential number of pools and use up to the max from config potential_num_pools = int((nvme_per_engine / (min_nvme_per_target * targets_per_engine))) actual_num_pools = min(max_num_pools, potential_num_pools) + self.log.debug("Calculating pool size and quantity") + self.log.debug( + " scm_per_engine: %s * %s = %s", + space_per_engine['scm'], total_pool_percentage, scm_per_engine) + self.log.debug( + " nvme_per_engine: %s * %s = %s", + space_per_engine['nvme'], total_pool_percentage, nvme_per_engine) + self.log.debug( + " potential_num_pools: %s / (%s * %s) = %s", + nvme_per_engine, min_nvme_per_target, targets_per_engine, potential_num_pools) # consider 1GiB RDB memory consume for MD-on-SSD rdb_size = 1073741824 if self.server_managers[0].manager.job.using_control_metadata: - min_scm_per_pool = 104857600 + min_scm_per_pool = 1073741824 potential_num_pools = int(scm_per_engine / (min_scm_per_pool + rdb_size)) actual_num_pools = min(potential_num_pools, actual_num_pools) + self.log.debug( + " potential_num_pools (md on ssd): %s / (%s + %s) = %s", + scm_per_engine, scm_per_engine, rdb_size, potential_num_pools) + + self.log.debug( + " actual_num_pools: min(%s, %s) = %s", + max_num_pools, potential_num_pools, actual_num_pools) # Split available space across the number of pools to be created scm_per_pool = int(scm_per_engine / actual_num_pools) + self.log.debug( + " scm_per_pool: %s / %s = %s", + scm_per_engine, actual_num_pools, scm_per_pool) if self.server_managers[0].manager.job.using_control_metadata: + self.log.debug( + " scm_per_pool (md on ssd): %s - %s = %s", + scm_per_pool, rdb_size, int(scm_per_pool - rdb_size)) scm_per_pool = int(scm_per_pool - rdb_size) + nvme_per_pool = int(nvme_per_engine / actual_num_pools) + self.log.debug( + " nvme_per_pool: %s / %s = %s", + nvme_per_engine, actual_num_pools, nvme_per_pool) # Create the pools pool_list = [] for pool_num in range(actual_num_pools): - self.log.info("-- Creating pool number = %s", pool_num) + self.log.info("-- Creating pool number %s of %s", pool_num + 1, actual_num_pools) try: pool_list.append(self.get_pool(scm_size=scm_per_pool, nvme_size=nvme_per_pool)) except TestFail as error: diff --git a/src/tests/ftest/telemetry/wal_metrics.yaml b/src/tests/ftest/telemetry/wal_metrics.yaml index 71ba8cbc17b..1f991444383 100644 --- a/src/tests/ftest/telemetry/wal_metrics.yaml +++ b/src/tests/ftest/telemetry/wal_metrics.yaml @@ -11,7 +11,7 @@ server_config: storage: auto pool: - size: 20G + size: 40G container: control_method: daos From ed7d7a4fc512e7ba935d25c3669f973510ab7176 Mon Sep 17 00:00:00 2001 From: Jerome Soumagne Date: Tue, 3 Feb 2026 12:13:34 -0600 Subject: [PATCH 174/253] DAOS-18017 build: update mercury to 2.4.1 (#16943) Remove libfabric dependency on DAOS rpm Allow for debug log by keeping debug enabled Add patch for version info Add patch for libfabric plugin counters Add patch for libfabric auth key Signed-off-by: Jerome Soumagne --- .github/workflows/bullseye-coverage.yml | 2 + .github/workflows/rpm-build-and-test.yml | 2 + Jenkinsfile | 6 +- .../post_provision_config_common.sh | 4 +- ci/unit/required_packages.sh | 8 +- deps/patches/mercury/0001_dep_versions.patch | 74 + deps/patches/mercury/0001_na_ucx.patch | 110 -- .../mercury/0002_na_ucx_ep_flush.patch | 64 - deps/patches/mercury/0002_ofi_counters.patch | 1196 +++++++++++++++++ .../mercury/0003_combined_plugin_path.patch | 260 ---- deps/patches/mercury/0003_ofi_auth_key.patch | 25 + docs/admin/hardware.md | 7 +- docs/admin/ucx.md | 27 +- site_scons/components/__init__.py | 8 +- src/cart/utils/memcheck-cart.supp | 44 +- utils/build.config | 4 +- utils/rpms/daos.changelog | 4 + utils/rpms/daos.sh | 10 +- utils/rpms/daos.spec | 10 +- utils/rpms/mercury.changelog | 274 ++++ utils/rpms/mercury.sh | 15 +- utils/rpms/package_info.sh | 14 +- utils/test_memcheck.supp | 20 +- 23 files changed, 1652 insertions(+), 536 deletions(-) create mode 100644 deps/patches/mercury/0001_dep_versions.patch delete mode 100644 deps/patches/mercury/0001_na_ucx.patch delete mode 100644 deps/patches/mercury/0002_na_ucx_ep_flush.patch create mode 100644 deps/patches/mercury/0002_ofi_counters.patch delete mode 100644 deps/patches/mercury/0003_combined_plugin_path.patch create mode 100644 deps/patches/mercury/0003_ofi_auth_key.patch create mode 100644 utils/rpms/mercury.changelog diff --git a/.github/workflows/bullseye-coverage.yml b/.github/workflows/bullseye-coverage.yml index a4139fca908..a7d9d97ab35 100644 --- a/.github/workflows/bullseye-coverage.yml +++ b/.github/workflows/bullseye-coverage.yml @@ -563,11 +563,13 @@ jobs: STAGE_TAGS+=",provider" if [[ '${{ matrix.stage }}' = *\ Verbs\ * ]]; then FTEST_ARG+=' --provider ofi+verbs' + INST_RPMS+=' mercury-libfabric' elif [[ '${{ matrix.stage }}' = *\ UCX\ * ]]; then FTEST_ARG+=' --provider ucx+dc_x' INST_RPMS+=' mercury-ucx' elif [[ '${{ matrix.stage }}' = *\ TCP\ * ]]; then FTEST_ARG+=' --provider ofi+tcp' + INST_RPMS+=' mercury-libfabric' else echo 'Unknown provider in ${{ matrix.stage }}' exit 1 diff --git a/.github/workflows/rpm-build-and-test.yml b/.github/workflows/rpm-build-and-test.yml index 2483fbbaa7b..4bbb942d788 100644 --- a/.github/workflows/rpm-build-and-test.yml +++ b/.github/workflows/rpm-build-and-test.yml @@ -572,11 +572,13 @@ jobs: STAGE_TAGS+=",provider" if [[ '${{ matrix.stage }}' = *\ Verbs\ * ]]; then FTEST_ARG+=' --provider ofi+verbs' + INST_RPMS+=' mercury-libfabric' elif [[ '${{ matrix.stage }}' = *\ UCX\ * ]]; then FTEST_ARG+=' --provider ucx+dc_x' INST_RPMS+=' mercury-ucx' elif [[ '${{ matrix.stage }}' = *\ TCP\ * ]]; then FTEST_ARG+=' --provider ofi+tcp' + INST_RPMS+=' mercury-libfabric' else echo 'Unknown provider in ${{ matrix.stage }}' exit 1 diff --git a/Jenkinsfile b/Jenkinsfile index 8c52a25ae2a..f534692f483 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1048,7 +1048,8 @@ pipeline { steps { job_step_update( testRpm(inst_repos: daosRepos(), - daos_pkg_version: daosPackagesVersion(next_version())) + daos_pkg_version: daosPackagesVersion(next_version()), + inst_rpms: 'mercury-libfabric') ) } post { @@ -1100,7 +1101,8 @@ pipeline { } */ job_step_update( testRpm(inst_repos: daosRepos(), - daos_pkg_version: daosPackagesVersion(next_version())) + daos_pkg_version: daosPackagesVersion(next_version()), + inst_rpms: 'mercury-libfabric') ) } post { diff --git a/ci/provisioning/post_provision_config_common.sh b/ci/provisioning/post_provision_config_common.sh index 257e6dfe2d9..3cac657efaf 100755 --- a/ci/provisioning/post_provision_config_common.sh +++ b/ci/provisioning/post_provision_config_common.sh @@ -1,7 +1,7 @@ #!/bin/bash # # Copyright 2021-2023 Intel Corporation. -# Copyright 2025 Hewlett Packard Enterprise Development LP +# Copyright 2025-2026 Hewlett Packard Enterprise Development LP # # SPDX-License-Identifier: BSD-2-Clause-Patent # @@ -32,7 +32,7 @@ fi # shellcheck disable=SC1091 . /etc/os-release # shellcheck disable=SC2034 -EXCLUDE_UPGRADE=mercury,daos,daos-\* +EXCLUDE_UPGRADE=mercury,mercury-\*,daos,daos-\* if rpm -qa | grep mlnx; then # packages not to allow upgrading if MLNX OFED is installed EXCLUDE_UPGRADE+=,openmpi,\*mlnx\*,\*ucx\* diff --git a/ci/unit/required_packages.sh b/ci/unit/required_packages.sh index bbd3313155a..5a57b0cb054 100755 --- a/ci/unit/required_packages.sh +++ b/ci/unit/required_packages.sh @@ -1,5 +1,10 @@ #!/bin/bash - +# +# (C) Copyright 2025 Google LLC +# Copyright 2025-2026 Hewlett Packard Enterprise Development LP +# +# SPDX-License-Identifier: BSD-2-Clause-Patent +# set -eu # No longer used but provided by pipeline-lib @@ -24,6 +29,7 @@ pkgs="$(utils/rpms/package_version.sh argobots lib) \ $(utils/rpms/package_version.sh libfabric debug) \ $(utils/rpms/package_version.sh mercury dev) \ $(utils/rpms/package_version.sh mercury debug) \ + $(utils/rpms/package_version.sh mercury lib mercury_libfabric) \ $(utils/rpms/package_version.sh pmdk lib pmemobj) \ $(utils/rpms/package_version.sh pmdk debug pmemobj) \ $(utils/rpms/package_version.sh pmdk debug pmem) \ diff --git a/deps/patches/mercury/0001_dep_versions.patch b/deps/patches/mercury/0001_dep_versions.patch new file mode 100644 index 00000000000..6b1d6a71a80 --- /dev/null +++ b/deps/patches/mercury/0001_dep_versions.patch @@ -0,0 +1,74 @@ +From 0a7756d4ef2f329fa7caa8e4052a099a91816f2f Mon Sep 17 00:00:00 2001 +From: Jerome Soumagne +Date: Mon, 24 Nov 2025 16:44:14 -0600 +Subject: [PATCH 1/3] NA OFI/UCX: print version infos + +NA OFI: fix log warning +--- + src/na/na_ofi.c | 12 +++++++----- + src/na/na_ucx.c | 10 ++++++++++ + 2 files changed, 17 insertions(+), 5 deletions(-) + +diff --git a/src/na/na_ofi.c b/src/na/na_ofi.c +index c7c3e0b3..682efe65 100644 +--- a/src/na/na_ofi.c ++++ b/src/na/na_ofi.c +@@ -8048,8 +8048,10 @@ na_ofi_check_protocol(const char *protocol_name) + uint32_t runtime_version = fi_version(); + na_return_t na_ret; + +- NA_LOG_SUBSYS_DEBUG(cls, "Querying info on libfabric v%d.%d", +- FI_MAJOR(runtime_version), FI_MINOR(runtime_version)); ++ NA_LOG_SUBSYS_INFO(cls, ++ "Querying info on libfabric (runtime v%d.%d, API v%d.%d)", ++ FI_MAJOR(runtime_version), FI_MINOR(runtime_version), ++ FI_MAJOR(FI_COMPILE_VERSION), FI_MINOR(FI_COMPILE_VERSION)); + NA_CHECK_SUBSYS_ERROR(cls, FI_VERSION_LT(runtime_version, NA_OFI_VERSION), + out, accept, false, + "runtime libfabric version (v%d.%d) is lower than required version " +@@ -9105,7 +9107,7 @@ na_ofi_mem_handle_create(na_class_t NA_UNUSED *na_class, void *buf, + + NA_LOG_SUBSYS_DEBUG(mem, + "Created mem handle %p (iov_base=%p, iov_len=%zu, iovcnt=1, " +- "flags=0x%lx, len=%zu)", ++ "flags=0x%" PRIx8 ", len=%" PRIu64 ")", + (void *) na_ofi_mem_handle, na_ofi_mem_handle->desc.iov.s[0].iov_base, + na_ofi_mem_handle->desc.iov.s[0].iov_len, + na_ofi_mem_handle->desc.info.flags, na_ofi_mem_handle->desc.info.len); +@@ -9444,8 +9446,8 @@ na_ofi_mem_handle_deserialize(na_class_t NA_UNUSED *na_class, + na_ofi_mem_handle->desc.info.iovcnt); + + NA_LOG_SUBSYS_DEBUG(mem, +- "Deserialized mem handle %p (iov_base=%p, iov_len=%zu, iovcnt=%zu, " +- "flags=0x%lx, len=%zu)", ++ "Deserialized mem handle %p (iov_base=%p, iov_len=%zu, iovcnt=%" PRIu64 ++ ", flags=0x%" PRIx8 ", len=%" PRIu64 ")", + (void *) na_ofi_mem_handle, na_ofi_mem_handle->desc.iov.s[0].iov_base, + na_ofi_mem_handle->desc.iov.s[0].iov_len, + na_ofi_mem_handle->desc.info.iovcnt, na_ofi_mem_handle->desc.info.flags, +diff --git a/src/na/na_ucx.c b/src/na/na_ucx.c +index 96501b27..7c0ac8d4 100644 +--- a/src/na/na_ucx.c ++++ b/src/na/na_ucx.c +@@ -3433,8 +3433,18 @@ na_ucx_check_protocol(const char *protocol_name) + .field_mask = UCP_PARAM_FIELD_FEATURES, .features = NA_UCX_FEATURES}; + ucp_context_h context = NULL; + ucs_status_t status; ++ unsigned int runtime_major_version, runtime_minor_version, ++ runtime_patch_version; + bool accept = false; + ++ ucp_get_version( ++ &runtime_major_version, &runtime_minor_version, &runtime_patch_version); ++ ++ NA_LOG_SUBSYS_INFO(cls, ++ "Querying info on UCX (runtime v%u.%u.%u, API v%u.%u)", ++ runtime_major_version, runtime_minor_version, runtime_patch_version, ++ UCP_API_MAJOR, UCP_API_MINOR); ++ + status = ucp_config_read(NULL, NULL, &config); + NA_CHECK_SUBSYS_ERROR_NORET(cls, status != UCS_OK, done, + "ucp_config_read() failed (%s)", ucs_status_string(status)); +-- +2.52.0 + diff --git a/deps/patches/mercury/0001_na_ucx.patch b/deps/patches/mercury/0001_na_ucx.patch deleted file mode 100644 index 57b39feef9d..00000000000 --- a/deps/patches/mercury/0001_na_ucx.patch +++ /dev/null @@ -1,110 +0,0 @@ -diff --git a/src/na/na_ucx.c b/src/na/na_ucx.c -index 84eb8b0..e4b6676 100644 ---- a/src/na/na_ucx.c -+++ b/src/na/na_ucx.c -@@ -614,7 +614,7 @@ na_ucx_addr_map_update(struct na_ucx_class *na_ucx_class, - */ - static na_return_t - na_ucx_addr_map_remove( -- struct na_ucx_map *na_ucx_map, ucs_sock_addr_t *addr_key); -+ struct na_ucx_map *na_ucx_map, struct na_ucx_addr *remove_addr); - - /** - * Hash connection ID. -@@ -1688,8 +1688,12 @@ na_ucp_listener_conn_cb(ucp_conn_request_h conn_request, void *arg) - .addr = (const struct sockaddr *) &conn_request_attrs.client_address, - .addrlen = sizeof(conn_request_attrs.client_address)}; - na_ucx_addr = na_ucx_addr_map_lookup(&na_ucx_class->addr_map, &addr_key); -- NA_CHECK_SUBSYS_ERROR_NORET(addr, na_ucx_addr != NULL, error, -- "An entry is already present for this address"); -+ -+ if (na_ucx_addr != NULL) { -+ NA_LOG_SUBSYS_WARNING(addr, -+ "An entry is already present for this address"); -+ na_ucx_addr_map_remove(&na_ucx_class->addr_map, na_ucx_addr); -+ } - - /* Insert new entry and create new address */ - na_ret = na_ucx_addr_map_insert(na_ucx_class, &na_ucx_class->addr_map, -@@ -1937,10 +1941,14 @@ na_ucp_ep_error_cb( - static void - na_ucp_ep_close(ucp_ep_h ep) - { -- ucs_status_ptr_t status_ptr = ucp_ep_close_nb(ep, UCP_EP_CLOSE_MODE_FORCE); -+ const ucp_request_param_t close_params = { -+ .op_attr_mask = UCP_OP_ATTR_FIELD_FLAGS, -+ .flags = UCP_EP_CLOSE_FLAG_FORCE}; -+ ucs_status_ptr_t status_ptr = ucp_ep_close_nbx(ep, &close_params); -+ - NA_CHECK_SUBSYS_ERROR_DONE(addr, - status_ptr != NULL && UCS_PTR_IS_ERR(status_ptr), -- "ucp_ep_close_nb() failed (%s)", -+ "ucp_ep_close_nbx() failed (%s)", - ucs_status_string(UCS_PTR_STATUS(status_ptr))); - } - -@@ -2722,7 +2730,7 @@ unlock: - - /*---------------------------------------------------------------------------*/ - static na_return_t --na_ucx_addr_map_remove(struct na_ucx_map *na_ucx_map, ucs_sock_addr_t *addr_key) -+na_ucx_addr_map_remove(struct na_ucx_map *na_ucx_map, struct na_ucx_addr *remove_addr) - { - struct na_ucx_addr *na_ucx_addr = NULL; - na_return_t ret = NA_SUCCESS; -@@ -2731,13 +2739,14 @@ na_ucx_addr_map_remove(struct na_ucx_map *na_ucx_map, ucs_sock_addr_t *addr_key) - hg_thread_rwlock_wrlock(&na_ucx_map->lock); - - na_ucx_addr = hg_hash_table_lookup( -- na_ucx_map->key_map, (hg_hash_table_key_t) addr_key); -- if (na_ucx_addr == HG_HASH_TABLE_NULL) -+ na_ucx_map->key_map, (hg_hash_table_key_t) &remove_addr->addr_key); -+ -+ if (na_ucx_addr == HG_HASH_TABLE_NULL || na_ucx_addr->ucp_ep != remove_addr->ucp_ep) - goto unlock; - - /* Remove addr key from primary map */ - rc = hg_hash_table_remove( -- na_ucx_map->key_map, (hg_hash_table_key_t) addr_key); -+ na_ucx_map->key_map, (hg_hash_table_key_t) &na_ucx_addr->addr_key); - NA_CHECK_SUBSYS_ERROR(addr, rc != 1, unlock, ret, NA_NOENTRY, - "hg_hash_table_remove() failed"); - -@@ -2841,7 +2850,7 @@ na_ucx_addr_release(struct na_ucx_addr *na_ucx_addr) - NA_UCX_PRINT_ADDR_KEY_INFO("Removing address", &na_ucx_addr->addr_key); - - na_ucx_addr_map_remove( -- &na_ucx_addr->na_ucx_class->addr_map, &na_ucx_addr->addr_key); -+ &na_ucx_addr->na_ucx_class->addr_map, na_ucx_addr); - } - - if (na_ucx_addr->ucp_ep != NULL) { -@@ -3023,6 +3032,18 @@ na_ucx_rma(struct na_ucx_class NA_UNUSED *na_ucx_class, na_context_t *context, - - /* There is no need to have a fully resolved address to start an RMA. - * This is only necessary for two-sided communication. */ -+ /* The above assumption is now in question, so the following will resolve -+ * the address if required. */ -+ -+ /* Check addr to ensure the EP for that addr is still valid */ -+ if (!(hg_atomic_get32(&na_ucx_addr->status) & NA_UCX_ADDR_RESOLVED)) { -+ ret = na_ucx_addr_map_update( -+ na_ucx_class, &na_ucx_class->addr_map, na_ucx_addr); -+ NA_CHECK_SUBSYS_NA_ERROR( -+ addr, error, ret, "Could not update NA UCX address"); -+ } -+ NA_CHECK_SUBSYS_ERROR(msg, na_ucx_addr->ucp_ep == NULL, error, ret, -+ NA_ADDRNOTAVAIL, "UCP endpoint is NULL for that address"); - - /* TODO UCX requires the remote key to be bound to the origin, do we need a - * new API? */ -@@ -3061,6 +3082,9 @@ na_ucx_rma_key_resolve(ucp_ep_h ep, struct na_ucx_mem_handle *na_ucx_mem_handle, - - hg_thread_mutex_lock(&na_ucx_mem_handle->rkey_unpack_lock); - -+ NA_CHECK_SUBSYS_ERROR( -+ mem, ep == NULL, error, ret, NA_INVALID_ARG, "Invalid endpoint (%p)", ep); -+ - switch (hg_atomic_get32(&na_ucx_mem_handle->type)) { - case NA_UCX_MEM_HANDLE_REMOTE_PACKED: { - ucs_status_t status = ucp_ep_rkey_unpack(ep, diff --git a/deps/patches/mercury/0002_na_ucx_ep_flush.patch b/deps/patches/mercury/0002_na_ucx_ep_flush.patch deleted file mode 100644 index f7b38d304aa..00000000000 --- a/deps/patches/mercury/0002_na_ucx_ep_flush.patch +++ /dev/null @@ -1,64 +0,0 @@ -diff --git a/src/na/na_ucx.c b/src/na/na_ucx.c -index 6e9c3b0..2f157da 100644 ---- a/src/na/na_ucx.c -+++ b/src/na/na_ucx.c -@@ -441,6 +441,12 @@ na_ucp_ep_create(ucp_worker_h worker, ucp_ep_params_t *ep_params, - static void - na_ucp_ep_error_cb(void *arg, ucp_ep_h ep, ucs_status_t status); - -+/** -+ * Flush endpoint. -+ */ -+static ucs_status_ptr_t -+na_ucp_ep_flush(ucp_ep_h ep); -+ - /** - * Close endpoint. - */ -@@ -1940,6 +1946,21 @@ na_ucp_ep_error_cb( - na_ucx_addr_ref_decr(na_ucx_addr); - } - -+/*---------------------------------------------------------------------------*/ -+static ucs_status_ptr_t -+na_ucp_ep_flush(ucp_ep_h ep) -+{ -+ const ucp_request_param_t flush_params = { -+ .op_attr_mask = 0}; -+ ucs_status_ptr_t status_ptr = ucp_ep_flush_nbx(ep, &flush_params); -+ -+ NA_CHECK_SUBSYS_ERROR_DONE(addr, -+ status_ptr != NULL && UCS_PTR_IS_ERR(status_ptr), -+ "ucp_ep_flush_nb() failed (%s)", -+ ucs_status_string(UCS_PTR_STATUS(status_ptr))); -+ return status_ptr; -+} -+ - /*---------------------------------------------------------------------------*/ - static void - na_ucp_ep_close(ucp_ep_h ep) -@@ -2859,8 +2880,23 @@ na_ucx_addr_release(struct na_ucx_addr *na_ucx_addr) - if (na_ucx_addr->ucp_ep != NULL) { - /* NB. for deserialized addresses that are not "connected" addresses, do - * not close the EP */ -- if (na_ucx_addr->worker_addr == NULL) -+ if (na_ucx_addr->worker_addr == NULL) { -+ if (!na_ucx_addr->na_ucx_class->ucp_listener) { -+ ucs_status_ptr_t status_ptr = na_ucp_ep_flush(na_ucx_addr->ucp_ep); -+ -+ if (UCS_PTR_IS_PTR(status_ptr)) { -+ ucs_status_t status; -+ -+ do { -+ ucp_worker_progress(na_ucx_addr->na_ucx_class->ucp_worker); -+ status = ucp_request_check_status(status_ptr); -+ } while (status == UCS_INPROGRESS); -+ ucp_request_free(status_ptr); -+ } -+ } -+ - na_ucp_ep_close(na_ucx_addr->ucp_ep); -+ } - na_ucx_addr->ucp_ep = NULL; - } - diff --git a/deps/patches/mercury/0002_ofi_counters.patch b/deps/patches/mercury/0002_ofi_counters.patch new file mode 100644 index 00000000000..7cbdc543bd8 --- /dev/null +++ b/deps/patches/mercury/0002_ofi_counters.patch @@ -0,0 +1,1196 @@ +From 187759c6cd99205c52fc5b77fdb6c2d3536f9021 Mon Sep 17 00:00:00 2001 +From: Jerome Soumagne +Date: Thu, 18 Dec 2025 17:47:03 -0600 +Subject: [PATCH 2/3] NA OFI: add counters for monitoring tx/rx/rma/cq counts + +Monitor mr and addr counts + +NA: add NA_Diag_dump_counters() routine to dump counters +if HG_LOG_LEVEL>=min_debug is set + +HG Core: clean up counters + +HG util: add ability to remove counters + +NA OFI: finalize counters after fabric is closed + +HG util: bump minor version +--- + CMakeLists.txt | 2 +- + src/mercury.c | 1 + + src/mercury_core.c | 40 ++--- + src/na/CMakeLists.txt | 5 + + src/na/na.c | 9 + + src/na/na.h | 6 + + src/na/na_config.h.in | 1 + + src/na/na_ofi.c | 360 +++++++++++++++++++++++++++++++++------- + src/util/mercury_dlog.c | 37 ++++- + src/util/mercury_dlog.h | 18 ++ + src/util/mercury_log.c | 14 +- + src/util/mercury_log.h | 8 + + src/util/version.txt | 2 +- + 13 files changed, 413 insertions(+), 90 deletions(-) + +diff --git a/CMakeLists.txt b/CMakeLists.txt +index e4e79711..e71944f1 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -192,7 +192,7 @@ endif() + # Enable diagnostics counters separately from debug. + #------------------------------------------------------------------------------ + option(MERCURY_ENABLE_COUNTERS "Enable diagnostics counters." OFF) +-if(MERCURY_ENABLE_COUNTERS) ++if(MERCURY_ENABLE_COUNTERS AND NOT WIN32) + set(HG_HAS_DIAG 1) + else() + set(HG_HAS_DIAG 0) +diff --git a/src/mercury.c b/src/mercury.c +index 2c062384..6f0376ee 100644 +--- a/src/mercury.c ++++ b/src/mercury.c +@@ -1178,6 +1178,7 @@ HG_Diag_dump_counters(void) + #ifndef _WIN32 + hg_log_dump_counters(&HG_LOG_OUTLET(hg_diag)); + #endif ++ NA_Diag_dump_counters(); + } + + /*---------------------------------------------------------------------------*/ +diff --git a/src/mercury_core.c b/src/mercury_core.c +index 97cc4fb2..7abd8a47 100644 +--- a/src/mercury_core.c ++++ b/src/mercury_core.c +@@ -185,7 +185,7 @@ struct hg_core_private_class { + struct hg_core_map rpc_map; /* RPC Map */ + struct hg_core_more_data_cb more_data_cb; /* More data callbacks */ + na_tag_t request_max_tag; /* Max value for tag */ +-#if defined(HG_HAS_DIAG) && !defined(_WIN32) ++#ifdef HG_HAS_DIAG + struct hg_core_counters counters; /* Diag counters */ + #endif + hg_atomic_int32_t n_contexts; /* Total number of contexts */ +@@ -369,7 +369,7 @@ struct hg_core_private_handle { + uint8_t cookie; /* Cookie */ + bool multi_recv_copy; /* Copy on multi-recv */ + bool reuse; /* Re-use handle once ref_count is 0 */ +-#if defined(HG_HAS_DIAG) && !defined(_WIN32) ++#ifdef HG_HAS_DIAG + bool active; + #endif + }; +@@ -405,7 +405,7 @@ hg_core_op_type_to_string(enum hg_core_op_type op_type); + /** + * Init counters. + */ +-#if defined(HG_HAS_DIAG) && !defined(_WIN32) ++#ifdef HG_HAS_DIAG + static void + hg_core_counters_init(struct hg_core_counters *hg_core_counters); + #endif +@@ -447,7 +447,7 @@ hg_core_finalize(struct hg_core_private_class *hg_core_class); + /** + * Get counters. + */ +-#if defined(HG_HAS_DIAG) && !defined(_WIN32) ++#ifdef HG_HAS_DIAG + static void + hg_core_class_get_counters(const struct hg_core_counters *counters, + struct hg_diag_counters *diag_counters); +@@ -1091,7 +1091,7 @@ hg_core_op_type_to_string(enum hg_core_op_type op_type) + #endif + + /*---------------------------------------------------------------------------*/ +-#if defined(HG_HAS_DIAG) && !defined(_WIN32) ++#ifdef HG_HAS_DIAG + static void + hg_core_counters_init(struct hg_core_counters *hg_core_counters) + { +@@ -1325,7 +1325,7 @@ hg_core_init(const char *na_info_string, bool na_listen, unsigned int version, + hg_core_class->init_info.listen = na_listen; + + /* Stats / counters */ +-#if defined(HG_HAS_DIAG) && !defined(_WIN32) ++#ifdef HG_HAS_DIAG + hg_core_counters_init(&hg_core_class->counters); + #endif + +@@ -1521,7 +1521,7 @@ error: + } + + /*---------------------------------------------------------------------------*/ +-#if defined(HG_HAS_DIAG) && !defined(_WIN32) ++#ifdef HG_HAS_DIAG + static void + hg_core_class_get_counters(const struct hg_core_counters *counters, + struct hg_diag_counters *diag_counters) +@@ -3445,7 +3445,7 @@ hg_core_destroy(struct hg_core_private_handle *hg_core_handle) + return HG_SUCCESS; /* Cannot free yet */ + } + +-#if defined(HG_HAS_DIAG) && !defined(_WIN32) ++#ifdef HG_HAS_DIAG + if (hg_core_handle->active) { + hg_atomic_decr64(HG_CORE_HANDLE_CLASS(hg_core_handle) + ->counters.rpc_req_recv_active_count); +@@ -4048,7 +4048,7 @@ hg_core_forward(struct hg_core_private_handle *hg_core_handle, + hg_core_handle->request_callback = callback; + hg_core_handle->request_arg = arg; + +-#if defined(HG_HAS_DIAG) && !defined(_WIN32) ++#ifdef HG_HAS_DIAG + /* Increment counter */ + hg_atomic_incr64( + HG_CORE_HANDLE_CLASS(hg_core_handle)->counters.rpc_req_sent_count); +@@ -4263,7 +4263,7 @@ hg_core_respond(struct hg_core_private_handle *hg_core_handle, + hg_core_handle->response_callback = callback; + hg_core_handle->response_arg = arg; + +-#if defined(HG_HAS_DIAG) && !defined(_WIN32) ++#ifdef HG_HAS_DIAG + /* Increment counter */ + hg_atomic_incr64( + HG_CORE_HANDLE_CLASS(hg_core_handle)->counters.rpc_resp_sent_count); +@@ -4499,7 +4499,7 @@ hg_core_recv_input_cb(const struct na_cb_info *callback_info) + hg_thread_spin_lock(&hg_core_handle_pool->pending_list.lock); + LIST_REMOVE(hg_core_handle, pending); + hg_thread_spin_unlock(&hg_core_handle_pool->pending_list.lock); +-#if defined(HG_HAS_DIAG) && !defined(_WIN32) ++#ifdef HG_HAS_DIAG + /* Increment counter */ + hg_atomic_incr64(HG_CORE_HANDLE_CLASS(hg_core_handle) + ->counters.rpc_req_recv_active_count); +@@ -4608,7 +4608,7 @@ hg_core_multi_recv_input_cb(const struct na_cb_info *callback_info) + ret = hg_core_handle_pool_get(context->handle_pool, &hg_core_handle); + HG_CHECK_SUBSYS_HG_ERROR( + rpc, error, ret, "Could not get handle from pool"); +-#if defined(HG_HAS_DIAG) && !defined(_WIN32) ++#ifdef HG_HAS_DIAG + /* Increment counter */ + hg_atomic_incr64(HG_CORE_HANDLE_CLASS(hg_core_handle) + ->counters.rpc_req_recv_active_count); +@@ -4665,7 +4665,7 @@ hg_core_multi_recv_input_cb(const struct na_cb_info *callback_info) + "Copying multi-recv payload of size %zu for handle (%p)", + hg_core_handle->core_handle.in_buf_used, + (void *) hg_core_handle); +-#if defined(HG_HAS_DIAG) && !defined(_WIN32) ++#ifdef HG_HAS_DIAG + /* Increment counter */ + hg_atomic_incr64(HG_CORE_CONTEXT_CLASS(context) + ->counters.rpc_multi_recv_copy_count); +@@ -4763,7 +4763,7 @@ hg_core_process_input(struct hg_core_private_handle *hg_core_handle) + uint32_t flags = (uint32_t) hg_atomic_get32(&hg_core_handle->flags); + hg_return_t ret; + +-#if defined(HG_HAS_DIAG) && !defined(_WIN32) ++#ifdef HG_HAS_DIAG + /* Increment counter */ + hg_atomic_incr64(hg_core_class->counters.rpc_req_recv_count); + #endif +@@ -4812,7 +4812,7 @@ hg_core_process_input(struct hg_core_private_handle *hg_core_handle) + "Handle (%p) expected_count incr to %" PRId32, + (void *) hg_core_handle, expected_count); + +-#if defined(HG_HAS_DIAG) && !defined(_WIN32) ++#ifdef HG_HAS_DIAG + /* Increment counter */ + hg_atomic_incr64(hg_core_class->counters.rpc_req_extra_count); + #endif +@@ -4936,7 +4936,7 @@ hg_core_process_output(struct hg_core_private_handle *hg_core_handle) + uint32_t flags = (uint32_t) hg_atomic_get32(&hg_core_handle->flags); + hg_return_t ret; + +-#if defined(HG_HAS_DIAG) && !defined(_WIN32) ++#ifdef HG_HAS_DIAG + /* Increment counter */ + hg_atomic_incr64(hg_core_class->counters.rpc_resp_recv_count); + #endif +@@ -4980,7 +4980,7 @@ hg_core_process_output(struct hg_core_private_handle *hg_core_handle) + "Handle (%p) expected_count incr to %" PRId32, + (void *) hg_core_handle, expected_count); + +-#if defined(HG_HAS_DIAG) && !defined(_WIN32) ++#ifdef HG_HAS_DIAG + /* Increment counter */ + hg_atomic_incr64(hg_core_class->counters.rpc_resp_extra_count); + #endif +@@ -5319,7 +5319,7 @@ hg_core_completion_add(struct hg_core_context *core_context, + struct hg_core_completion_queue *backfill_queue = &context->backfill_queue; + int rc; + +-#if defined(HG_HAS_DIAG) && !defined(_WIN32) ++#ifdef HG_HAS_DIAG + /* Increment counter */ + if (hg_completion_entry->op_type == HG_BULK) + hg_atomic_incr64(HG_CORE_CONTEXT_CLASS(context)->counters.bulk_count); +@@ -6212,7 +6212,7 @@ hg_return_t + HG_Core_class_get_counters(const hg_core_class_t *hg_core_class, + struct hg_diag_counters *diag_counters) + { +-#if defined(HG_HAS_DIAG) && !defined(_WIN32) ++#ifdef HG_HAS_DIAG + const struct hg_core_private_class *private_class = + (const struct hg_core_private_class *) hg_core_class; + #endif +@@ -6222,7 +6222,7 @@ HG_Core_class_get_counters(const hg_core_class_t *hg_core_class, + HG_INVALID_ARG, "NULL HG core class"); + HG_CHECK_SUBSYS_ERROR(cls, diag_counters == NULL, error, ret, + HG_INVALID_ARG, "NULL pointer to diag_counters"); +-#if defined(HG_HAS_DIAG) && !defined(_WIN32) ++#ifdef HG_HAS_DIAG + hg_core_class_get_counters(&private_class->counters, diag_counters); + #else + HG_LOG_SUBSYS_ERROR(cls, "Counters not supported in current build, please " +diff --git a/src/na/CMakeLists.txt b/src/na/CMakeLists.txt +index 115e70bb..d99ed05d 100644 +--- a/src/na/CMakeLists.txt ++++ b/src/na/CMakeLists.txt +@@ -59,6 +59,11 @@ set(NA_BUILD_INCLUDE_DEPENDENCIES + #------------------------------------------------------------------------------ + # Internal dependencies + #------------------------------------------------------------------------------ ++# Diagnostics counters ++if(HG_HAS_DIAG) ++ set(NA_HAS_DIAG 1) ++endif() ++ + # Multi progress + if(NOT HG_ALLOW_MULTI_PROGRESS) + option(NA_ALLOW_MULTI_PROGRESS "Allow concurrent progress on single context." ON) +diff --git a/src/na/na.c b/src/na/na.c +index d54db626..61abf0a3 100644 +--- a/src/na/na.c ++++ b/src/na/na.c +@@ -1010,6 +1010,15 @@ NA_Set_log_level(const char *level) + hg_log_set_subsys_level(NA_SUBSYS_NAME_STRING, hg_log_name_to_level(level)); + } + ++/*---------------------------------------------------------------------------*/ ++void ++NA_Diag_dump_counters(void) ++{ ++#ifndef _WIN32 ++ hg_log_dump_counters(&HG_LOG_OUTLET(NA_SUBSYS_NAME)); ++#endif ++} ++ + /*---------------------------------------------------------------------------*/ + na_context_t * + NA_Context_create(na_class_t *na_class) +diff --git a/src/na/na.h b/src/na/na.h +index ff65ceb2..3d335fe7 100644 +--- a/src/na/na.h ++++ b/src/na/na.h +@@ -153,6 +153,12 @@ NA_Has_opt_feature( + NA_PUBLIC void + NA_Set_log_level(const char *level); + ++/** ++ * Dump diagnostic counters into the existing log stream. ++ */ ++NA_PUBLIC void ++NA_Diag_dump_counters(void); ++ + /** + * Return the name of the NA class. + * +diff --git a/src/na/na_config.h.in b/src/na/na_config.h.in +index 8468de8c..1419bee6 100644 +--- a/src/na/na_config.h.in ++++ b/src/na/na_config.h.in +@@ -85,6 +85,7 @@ + + /* Build Options */ + #cmakedefine NA_HAS_DEBUG ++#cmakedefine NA_HAS_DIAG + #cmakedefine NA_HAS_MULTI_PROGRESS + + /* HWLOC */ +diff --git a/src/na/na_ofi.c b/src/na/na_ofi.c +index 682efe65..d385d279 100644 +--- a/src/na/na_ofi.c ++++ b/src/na/na_ofi.c +@@ -262,6 +262,9 @@ static unsigned long const na_ofi_prov_flags[] = {NA_OFI_PROV_TYPES}; + /* Prov info array init count */ + #define NA_OFI_PROV_INFO_COUNT (32) + ++/* Max counter name length */ ++#define NA_OFI_MAX_COUNTER_NAME (64) ++ + /* Address / URI max len */ + #define NA_OFI_MAX_URI_LEN (128) + +@@ -787,7 +790,6 @@ struct na_ofi_domain { + hg_atomic_int64_t requested_key; /* Requested key if not FI_MR_PROV_KEY */ + int64_t max_key; /* Max key if not FI_MR_PROV_KEY */ + uint64_t max_tag; /* Max tag from CQ data size */ +- hg_atomic_int32_t mr_reg_count; /* Number of MR registered */ + bool no_wait; /* Wait disabled on domain */ + bool av_auth_key; /* Use FI_AV_AUTH_KEY */ + bool av_user_id; /* Use FI_AV_USER_ID */ +@@ -830,15 +832,26 @@ struct na_ofi_verify_info { + enum na_ofi_prov_type prov_type; /* Provider type */ + }; + +-/* OFI class */ +-struct na_ofi_class { +- struct na_ofi_addr_pool addr_pool; /* Addr pool */ +- struct fi_info *fi_info; /* OFI info */ +- struct na_ofi_fabric *fabric; /* Fabric pointer */ +- struct na_ofi_domain *domain; /* Domain pointer */ +- struct na_ofi_endpoint *endpoint; /* Endpoint pointer */ +- struct hg_mem_pool *send_pool; /* Msg send buf pool */ +- struct hg_mem_pool *recv_pool; /* Msg recv buf pool */ ++#ifdef NA_HAS_DIAG ++/* OFI counters */ ++struct na_ofi_counters { ++ char tx_count_string[NA_OFI_MAX_COUNTER_NAME]; /* TX count string */ ++ char rx_count_string[NA_OFI_MAX_COUNTER_NAME]; /* RX count string */ ++ char rma_count_string[NA_OFI_MAX_COUNTER_NAME]; /* RMA count string */ ++ char mr_count_string[NA_OFI_MAX_COUNTER_NAME]; /* MR count string */ ++ char addr_count_string[NA_OFI_MAX_COUNTER_NAME]; /* Addr count string */ ++ char cq_count_string[NA_OFI_MAX_COUNTER_NAME]; /* CQ count string */ ++ hg_atomic_int32_t *tx_count; /* Number of active sends */ ++ hg_atomic_int32_t *rx_count; /* Number of active receives */ ++ hg_atomic_int32_t *rma_count; /* Number of active RMAs */ ++ hg_atomic_int32_t *mr_count; /* Number of active MRs */ ++ hg_atomic_int32_t *addr_count; /* Number of addresses inserted */ ++ hg_atomic_int32_t *cq_count; /* Number of CQ events */ ++}; ++#endif ++ ++/* OFI ops */ ++struct na_ofi_ops { + na_return_t (*msg_send_unexpected)( + struct fid_ep *, const struct na_ofi_msg_info *, void *); + na_return_t (*msg_recv_unexpected)( +@@ -853,14 +866,29 @@ struct na_ofi_class { + const char *msg_recv_unexpected_string; /* Error log string */ + const char *msg_send_expected_string; /* Error log string */ + const char *msg_recv_expected_string; /* Error log string */ +- unsigned long opt_features; /* Optional feature flags */ +- hg_atomic_int32_t n_contexts; /* Number of context */ +- unsigned int op_retry_timeout; /* Retry timeout */ +- unsigned int op_retry_period; /* Time elapsed until next retry */ +- uint8_t context_max; /* Max number of contexts */ +- bool no_wait; /* Ignore wait object */ +- bool use_sep; /* Use scalable endpoints */ +- bool finalizing; /* Class being destroyed */ ++}; ++ ++/* OFI class */ ++struct na_ofi_class { ++ struct na_ofi_addr_pool addr_pool; /* Addr pool */ ++ struct fi_info *fi_info; /* OFI info */ ++ struct na_ofi_fabric *fabric; /* Fabric pointer */ ++ struct na_ofi_domain *domain; /* Domain pointer */ ++ struct na_ofi_endpoint *endpoint; /* Endpoint pointer */ ++ struct hg_mem_pool *send_pool; /* Msg send buf pool */ ++ struct hg_mem_pool *recv_pool; /* Msg recv buf pool */ ++ struct na_ofi_ops ops; /* OFI operations */ ++#ifdef NA_HAS_DIAG ++ struct na_ofi_counters counters; /* OFI counters */ ++#endif ++ unsigned long opt_features; /* Optional feature flags */ ++ hg_atomic_int32_t n_contexts; /* Number of context */ ++ unsigned int op_retry_timeout; /* Retry timeout */ ++ unsigned int op_retry_period; /* Time elapsed until next retry */ ++ uint8_t context_max; /* Max number of contexts */ ++ bool no_wait; /* Ignore wait object */ ++ bool use_sep; /* Use scalable endpoints */ ++ bool finalizing; /* Class being destroyed */ + }; + + /********************/ +@@ -1145,6 +1173,20 @@ na_ofi_class_alloc(void); + static na_return_t + na_ofi_class_free(struct na_ofi_class *na_ofi_class); + ++#ifdef NA_HAS_DIAG ++/** ++ * Init counters. ++ */ ++static void ++na_ofi_counters_init(struct na_ofi_counters *counters, int class_id); ++ ++/** ++ * Finalize counters. ++ */ ++static void ++na_ofi_counters_finalize(struct na_ofi_counters *counters); ++#endif ++ + /** + * Configure class parameters from environment variables. + */ +@@ -1762,6 +1804,14 @@ static void + na_ofi_op_retry_abort_addr( + struct na_ofi_context *na_ofi_context, fi_addr_t fi_addr, na_return_t ret); + ++/** ++ * Process counters. ++ */ ++#ifdef NA_HAS_DIAG ++static void ++na_ofi_cq_process_counters(struct na_ofi_op_id *na_ofi_op_id); ++#endif ++ + /** + * Complete operation ID. + */ +@@ -3307,6 +3357,12 @@ na_ofi_addr_map_insert(struct na_ofi_class *na_ofi_class, + na_ofi_errno_to_na(-rc), + "fi_av_remove(%" PRIu64 ") failed, rc: %d (%s)", + na_ofi_addr->fi_addr, rc, fi_strerror(-rc)); ++ ++#ifdef NA_HAS_DIAG ++ /* Counters */ ++ hg_atomic_decr32(na_ofi_class->counters.addr_count); ++#endif ++ + addr_map_exist = true; + } + } else { +@@ -3353,6 +3409,11 @@ na_ofi_addr_map_insert(struct na_ofi_class *na_ofi_class, + addr_str, &addr_str_len), + rc); + ++#ifdef NA_HAS_DIAG ++ /* Counters */ ++ hg_atomic_incr32(na_ofi_class->counters.addr_count); ++#endif ++ + #if FI_VERSION_GE(FI_COMPILE_VERSION, FI_VERSION(1, 20)) + if (na_ofi_class->domain->av_auth_key) { + size_t addrlen = sizeof(na_ofi_addr->addr_key.addr); +@@ -3458,6 +3519,11 @@ na_ofi_addr_map_remove( + "fi_av_remove(%" PRIu64 ") failed, rc: %d (%s)", na_ofi_addr->fi_addr, + rc, fi_strerror(-rc)); + ++#ifdef NA_HAS_DIAG ++ /* Counters */ ++ hg_atomic_decr32(na_ofi_addr->class->counters.addr_count); ++#endif ++ + NA_LOG_SUBSYS_DEBUG( + addr, "Removed addr for FI addr %" PRIu64, na_ofi_addr->fi_addr); + +@@ -4229,6 +4295,9 @@ static struct na_ofi_class * + na_ofi_class_alloc(void) + { + struct na_ofi_class *na_ofi_class = NULL; ++#ifdef NA_HAS_DIAG ++ static int class_id = 0; ++#endif + int rc; + + /* Create private data */ +@@ -4237,6 +4306,10 @@ na_ofi_class_alloc(void) + "Could not allocate NA private data class"); + hg_atomic_init32(&na_ofi_class->n_contexts, 0); + ++#ifdef NA_HAS_DIAG ++ na_ofi_counters_init(&na_ofi_class->counters, class_id++); ++#endif ++ + /* Initialize addr pool */ + rc = hg_thread_spin_init(&na_ofi_class->addr_pool.lock); + NA_CHECK_SUBSYS_ERROR_NORET( +@@ -4301,6 +4374,11 @@ na_ofi_class_free(struct na_ofi_class *na_ofi_class) + na_ofi_class->fabric = NULL; + } + ++#ifdef NA_HAS_DIAG ++ /* Remove counters */ ++ na_ofi_counters_finalize(&na_ofi_class->counters); ++#endif ++ + /* Free info */ + if (na_ofi_class->fi_info) + na_ofi_freeinfo(na_ofi_class->fi_info); +@@ -4313,6 +4391,50 @@ out: + return ret; + } + ++/*---------------------------------------------------------------------------*/ ++#ifdef NA_HAS_DIAG ++static void ++na_ofi_counters_init(struct na_ofi_counters *counters, int class_id) ++{ ++ snprintf(counters->tx_count_string, sizeof(counters->tx_count_string), ++ "[%d] na_ofi_tx_count ", class_id); ++ snprintf(counters->rx_count_string, sizeof(counters->rx_count_string), ++ "[%d] na_ofi_rx_count ", class_id); ++ snprintf(counters->rma_count_string, sizeof(counters->rma_count_string), ++ "[%d] na_ofi_rma_count ", class_id); ++ snprintf(counters->mr_count_string, sizeof(counters->mr_count_string), ++ "[%d] na_ofi_mr_count ", class_id); ++ snprintf(counters->addr_count_string, sizeof(counters->addr_count_string), ++ "[%d] na_ofi_addr_count", class_id); ++ snprintf(counters->cq_count_string, sizeof(counters->cq_count_string), ++ "[%d] na_ofi_cq_count ", class_id); ++ HG_LOG_ADD_COUNTER32(na, &counters->tx_count, counters->tx_count_string, ++ "Number of active sends"); ++ HG_LOG_ADD_COUNTER32(na, &counters->rx_count, counters->rx_count_string, ++ "Number of active recvs"); ++ HG_LOG_ADD_COUNTER32(na, &counters->rma_count, counters->rma_count_string, ++ "Number of active RMAs"); ++ HG_LOG_ADD_COUNTER32(na, &counters->mr_count, counters->mr_count_string, ++ "Number of active MRs"); ++ HG_LOG_ADD_COUNTER32(na, &counters->addr_count, counters->addr_count_string, ++ "Number of addresses inserted"); ++ HG_LOG_ADD_COUNTER32(na, &counters->cq_count, counters->cq_count_string, ++ "Number of events still in CQ"); ++} ++ ++/*---------------------------------------------------------------------------*/ ++static void ++na_ofi_counters_finalize(struct na_ofi_counters *counters) ++{ ++ HG_LOG_DEL_COUNTER32(na, counters->tx_count); ++ HG_LOG_DEL_COUNTER32(na, counters->rx_count); ++ HG_LOG_DEL_COUNTER32(na, counters->rma_count); ++ HG_LOG_DEL_COUNTER32(na, counters->mr_count); ++ HG_LOG_DEL_COUNTER32(na, counters->addr_count); ++ HG_LOG_DEL_COUNTER32(na, counters->cq_count); ++} ++#endif ++ + /*---------------------------------------------------------------------------*/ + static na_return_t + na_ofi_class_env_config(struct na_ofi_class *na_ofi_class) +@@ -4323,26 +4445,26 @@ na_ofi_class_env_config(struct na_ofi_class *na_ofi_class) + /* Set unexpected msg callbacks */ + env = getenv("NA_OFI_UNEXPECTED_TAG_MSG"); + if (env == NULL || env[0] == '0' || tolower(env[0]) == 'n') { +- na_ofi_class->msg_send_unexpected = na_ofi_msg_send; +- na_ofi_class->msg_send_unexpected_string = "fi_senddata"; +- na_ofi_class->msg_recv_unexpected = na_ofi_msg_recv; +- na_ofi_class->msg_recv_unexpected_string = "fi_recv"; ++ na_ofi_class->ops.msg_send_unexpected = na_ofi_msg_send; ++ na_ofi_class->ops.msg_send_unexpected_string = "fi_senddata"; ++ na_ofi_class->ops.msg_recv_unexpected = na_ofi_msg_recv; ++ na_ofi_class->ops.msg_recv_unexpected_string = "fi_recv"; + } else { + NA_LOG_SUBSYS_DEBUG(cls, + "NA_OFI_UNEXPECTED_TAG_MSG set to %s, forcing unexpected messages " + "to use tagged recvs", + env); +- na_ofi_class->msg_send_unexpected = na_ofi_tag_send; +- na_ofi_class->msg_send_unexpected_string = "fi_tsend"; +- na_ofi_class->msg_recv_unexpected = na_ofi_tag_recv; +- na_ofi_class->msg_recv_unexpected_string = "fi_trecv"; ++ na_ofi_class->ops.msg_send_unexpected = na_ofi_tag_send; ++ na_ofi_class->ops.msg_send_unexpected_string = "fi_tsend"; ++ na_ofi_class->ops.msg_recv_unexpected = na_ofi_tag_recv; ++ na_ofi_class->ops.msg_recv_unexpected_string = "fi_trecv"; + } + + /* Set expected msg callbacks */ +- na_ofi_class->msg_send_expected = na_ofi_tag_send; +- na_ofi_class->msg_send_expected_string = "fi_tsend"; +- na_ofi_class->msg_recv_expected = na_ofi_tag_recv; +- na_ofi_class->msg_recv_expected_string = "fi_trecv"; ++ na_ofi_class->ops.msg_send_expected = na_ofi_tag_send; ++ na_ofi_class->ops.msg_send_expected_string = "fi_tsend"; ++ na_ofi_class->ops.msg_recv_expected = na_ofi_tag_recv; ++ na_ofi_class->ops.msg_recv_expected_string = "fi_trecv"; + + /* Default retry timeouts in ms */ + if ((env = getenv("NA_OFI_OP_RETRY_TIMEOUT")) != NULL) { +@@ -5073,7 +5195,6 @@ na_ofi_domain_open(const struct na_ofi_fabric *na_ofi_fabric, + hg_atomic_init64(&na_ofi_domain->requested_key, 0); + /* No need to take a refcount on fabric */ + na_ofi_domain->fabric = na_ofi_fabric; +- hg_atomic_init32(&na_ofi_domain->mr_reg_count, 0); + + /* Dup name */ + na_ofi_domain->name = strdup(domain_attr->name); +@@ -6056,6 +6177,11 @@ na_ofi_mem_buf_register(const void *buf, size_t len, unsigned long flags, + + /* Register memory if FI_MR_LOCAL is set and provider uses it */ + if (na_ofi_class->fi_info->domain_attr->mr_mode & FI_MR_LOCAL) { ++#ifdef NA_HAS_DIAG ++ int32_t mr_cnt = hg_atomic_get32(na_ofi_class->counters.mr_count); ++#else ++ int32_t mr_cnt = -1; ++#endif + struct fid_mr *mr_hdl = NULL; + uint64_t access = 0; + int rc; +@@ -6072,10 +6198,11 @@ na_ofi_mem_buf_register(const void *buf, size_t len, unsigned long flags, + NA_CHECK_SUBSYS_ERROR(mem, rc != 0, out, ret, HG_UTIL_FAIL, + "fi_mr_reg(buf=%p, len=%zu, flags=%lu) failed, rc: %d (%s), " + "mr_reg_count: %d", +- buf, len, flags, rc, fi_strerror(-rc), +- hg_atomic_get32(&na_ofi_class->domain->mr_reg_count)); ++ buf, len, flags, rc, fi_strerror(-rc), mr_cnt); + +- hg_atomic_incr32(&na_ofi_class->domain->mr_reg_count); ++#ifdef NA_HAS_DIAG ++ hg_atomic_incr32(na_ofi_class->counters.mr_count); ++#endif + *handle_p = (void *) mr_hdl; + } else + *handle_p = NULL; +@@ -6093,11 +6220,17 @@ na_ofi_mem_buf_deregister(void *handle, void *arg) + /* Release MR handle is there was any */ + if (handle) { + struct fid_mr *mr_hdl = (struct fid_mr *) handle; ++#ifdef NA_HAS_DIAG + struct na_ofi_class *na_ofi_class = (struct na_ofi_class *) arg; ++#else ++ (void) arg; ++#endif + int rc = fi_close(&mr_hdl->fid); + NA_CHECK_SUBSYS_ERROR(mem, rc != 0, out, ret, HG_UTIL_FAIL, + "fi_close() mr_hdl failed, rc: %d (%s)", rc, fi_strerror(-rc)); +- hg_atomic_decr32(&na_ofi_class->domain->mr_reg_count); ++#ifdef NA_HAS_DIAG ++ hg_atomic_decr32(na_ofi_class->counters.mr_count); ++#endif + } + + out: +@@ -6159,6 +6292,11 @@ na_ofi_msg_send_common(struct na_ofi_class *na_ofi_class, + if ((int) na_ofi_class->fi_info->addr_format == FI_ADDR_OPX) + na_ofi_op_id->fi_ctx[0].internal[0] = &na_ofi_addr->addr_key.addr.opx; + ++#ifdef NA_HAS_DIAG ++ /* Counters */ ++ hg_atomic_incr32(na_ofi_class->counters.tx_count); ++#endif ++ + ret = msg_op( + na_ofi_context->fi_tx, &na_ofi_op_id->info.msg, &na_ofi_op_id->fi_ctx); + if (ret != NA_SUCCESS) { +@@ -6166,8 +6304,12 @@ na_ofi_msg_send_common(struct na_ofi_class *na_ofi_class, + na_ofi_op_id->retry_op.msg = msg_op; + na_ofi_op_retry( + na_ofi_context, na_ofi_class->op_retry_timeout, na_ofi_op_id); +- } else ++ } else { ++#ifdef NA_HAS_DIAG ++ hg_atomic_decr32(na_ofi_class->counters.tx_count); ++#endif + NA_GOTO_SUBSYS_ERROR_NORET(msg, release, "Could not post msg send"); ++ } + } + + return NA_SUCCESS; +@@ -6224,6 +6366,11 @@ na_ofi_msg_recv_common(struct na_ofi_class *na_ofi_class, + .tag = tag, + .tag_mask = tag_mask}; + ++#ifdef NA_HAS_DIAG ++ /* Counters */ ++ hg_atomic_incr32(na_ofi_class->counters.rx_count); ++#endif ++ + ret = msg_op( + na_ofi_context->fi_rx, &na_ofi_op_id->info.msg, &na_ofi_op_id->fi_ctx); + if (ret != NA_SUCCESS) { +@@ -6231,8 +6378,12 @@ na_ofi_msg_recv_common(struct na_ofi_class *na_ofi_class, + na_ofi_op_id->retry_op.msg = msg_op; + na_ofi_op_retry( + na_ofi_context, na_ofi_class->op_retry_timeout, na_ofi_op_id); +- } else ++ } else { ++#ifdef NA_HAS_DIAG ++ hg_atomic_decr32(na_ofi_class->counters.rx_count); ++#endif + NA_GOTO_SUBSYS_ERROR_NORET(msg, release, "Could not post msg recv"); ++ } + } + + return NA_SUCCESS; +@@ -6620,6 +6771,11 @@ na_ofi_rma_common(struct na_ofi_class *na_ofi_class, na_context_t *context, + NA_OFI_SEP_RX_CTX_BITS) + : na_ofi_addr->fi_addr; + ++#ifdef NA_HAS_DIAG ++ /* Counters */ ++ hg_atomic_incr32(na_ofi_class->counters.rma_count); ++#endif ++ + /* Post the OFI RMA operation */ + ret = + na_ofi_rma_post(na_ofi_context->fi_tx, rma_info, &na_ofi_op_id->fi_ctx); +@@ -6628,8 +6784,12 @@ na_ofi_rma_common(struct na_ofi_class *na_ofi_class, na_context_t *context, + na_ofi_op_id->retry_op.rma = na_ofi_rma_post; + na_ofi_op_retry( + na_ofi_context, na_ofi_class->op_retry_timeout, na_ofi_op_id); +- } else ++ } else { ++#ifdef NA_HAS_DIAG ++ hg_atomic_decr32(na_ofi_class->counters.rma_count); ++#endif + NA_GOTO_SUBSYS_ERROR_NORET(rma, release, "Could not post RMA op"); ++ } + } + + return NA_SUCCESS; +@@ -6991,6 +7151,10 @@ na_ofi_cq_process_canceled(const struct na_ofi_class *na_ofi_class, + cq_err->err, fi_strerror(cq_err->err), (void *) na_ofi_op_id, + na_cb_type_to_string(na_ofi_op_id->type)); + ++#ifdef NA_HAS_DIAG ++ na_ofi_cq_process_counters(na_ofi_op_id); ++#endif ++ + /* When tearing down connections, it is possible that operations will be + canceled by libfabric itself. + NA_CHECK_SUBSYS_WARNING(op, +@@ -7014,6 +7178,10 @@ na_ofi_cq_process_canceled(const struct na_ofi_class *na_ofi_class, + &na_ofi_op_id->completion_data->callback_info.info + .multi_recv_unexpected, + complete); ++#ifdef NA_HAS_DIAG ++ if (complete) ++ hg_atomic_decr32(na_ofi_class->counters.rx_count); ++#endif + } else + complete = true; + +@@ -7151,6 +7319,10 @@ na_ofi_cq_process_error( + NA_OFI_OP_CANCELED) + return NA_SUCCESS; /* already handled */ + ++#ifdef NA_HAS_DIAG ++ na_ofi_cq_process_counters(na_ofi_op_id); ++#endif ++ + /* Abort other retries if peer is unreachable */ + if (na_ret == NA_HOSTUNREACH && na_ofi_op_id->addr) + na_ofi_op_retry_abort_addr(na_ofi_op_id->na_ofi_context, +@@ -7172,6 +7344,10 @@ na_ofi_cq_process_error( + &na_ofi_op_id->completion_data->callback_info.info + .multi_recv_unexpected, + complete); ++#ifdef NA_HAS_DIAG ++ if (complete) ++ hg_atomic_decr32(na_ofi_class->counters.rx_count); ++#endif + } else + complete = true; + +@@ -7346,6 +7522,10 @@ na_ofi_cq_process_event(struct na_ofi_class *na_ofi_class, + cq_event->op_context, cq_event->flags, cq_event->len, cq_event->buf, + cq_event->data, cq_event->tag); + ++#ifdef NA_HAS_DIAG ++ na_ofi_cq_process_counters(na_ofi_op_id); ++#endif ++ + switch (na_ofi_op_id->type) { + case NA_CB_RECV_UNEXPECTED: + /* Default to cq_event->tag for backward compatibility */ +@@ -7361,6 +7541,10 @@ na_ofi_cq_process_event(struct na_ofi_class *na_ofi_class, + break; + case NA_CB_MULTI_RECV_UNEXPECTED: + complete = cq_event->flags & FI_MULTI_RECV; ++#ifdef HG_HAS_DIAG ++ if (complete) ++ hg_atomic_decr32(na_ofi_class->counters.rx_count); ++#endif + + ret = na_ofi_cq_process_multi_recv_unexpected(na_ofi_class, + &na_ofi_op_id->info.msg, +@@ -7628,6 +7812,10 @@ na_ofi_cq_process_retries( + NA_LOG_SUBSYS_ERROR(op, "retry operation of %p (%s) failed", + (void *) na_ofi_op_id, na_cb_type_to_string(cb_type)); + ++#ifdef NA_HAS_DIAG ++ na_ofi_cq_process_counters(na_ofi_op_id); ++#endif ++ + /* Force internal completion in error mode */ + hg_atomic_or32(&na_ofi_op_id->status, NA_OFI_OP_ERRORED); + na_ofi_op_id->complete(na_ofi_op_id, true, ret); +@@ -7690,6 +7878,32 @@ na_ofi_op_retry_abort_addr( + hg_thread_spin_unlock(&op_queue->lock); + } + ++/*---------------------------------------------------------------------------*/ ++#ifdef NA_HAS_DIAG ++static void ++na_ofi_cq_process_counters(struct na_ofi_op_id *na_ofi_op_id) ++{ ++ switch (na_ofi_op_id->type) { ++ case NA_CB_RECV_UNEXPECTED: ++ case NA_CB_RECV_EXPECTED: ++ hg_atomic_decr32(na_ofi_op_id->na_ofi_class->counters.rx_count); ++ break; ++ case NA_CB_SEND_UNEXPECTED: ++ case NA_CB_SEND_EXPECTED: ++ hg_atomic_decr32(na_ofi_op_id->na_ofi_class->counters.tx_count); ++ break; ++ case NA_CB_PUT: ++ case NA_CB_GET: ++ hg_atomic_decr32(na_ofi_op_id->na_ofi_class->counters.rma_count); ++ break; ++ case NA_CB_MULTI_RECV_UNEXPECTED: ++ /* TODO currently treated outside of switch */ ++ default: ++ break; ++ } ++} ++#endif ++ + /*---------------------------------------------------------------------------*/ + static NA_INLINE void + na_ofi_op_complete_single(struct na_ofi_op_id *na_ofi_op_id, +@@ -7711,6 +7925,9 @@ na_ofi_op_complete_single(struct na_ofi_op_id *na_ofi_op_id, + completion_data->plugin_callback = na_ofi_op_release_single; + + NA_LOG_SUBSYS_DEBUG(op, "Adding completion data to queue"); ++#ifdef NA_HAS_DIAG ++ hg_atomic_incr32(na_ofi_op_id->na_ofi_class->counters.cq_count); ++#endif + + /* Add OP to NA completion queue */ + na_cb_completion_add( +@@ -7728,6 +7945,9 @@ na_ofi_op_release_single(void *arg) + (!(hg_atomic_get32(&na_ofi_op_id->status) & NA_OFI_OP_COMPLETED)), + "Releasing resources from an uncompleted operation"); + ++#ifdef NA_HAS_DIAG ++ hg_atomic_decr32(na_ofi_op_id->na_ofi_class->counters.cq_count); ++#endif + if (na_ofi_op_id->addr) { + na_ofi_addr_ref_decr(na_ofi_op_id->addr); + na_ofi_op_id->addr = NULL; +@@ -7775,6 +7995,10 @@ na_ofi_op_complete_multi( + op, na_ofi_op_id->completion_data == NULL, error, "Queue is full"); + + NA_LOG_SUBSYS_DEBUG(op, "Adding completion data to queue"); ++#ifdef NA_HAS_DIAG ++ hg_atomic_incr32(na_ofi_op_id->na_ofi_class->counters.cq_count); ++#endif ++ + /* Add OP to NA completion queue */ + na_cb_completion_add( + na_ofi_op_id->na_ofi_context->context, completion_data); +@@ -7789,6 +8013,9 @@ na_ofi_op_release_multi(void *arg) + { + struct na_ofi_op_id *na_ofi_op_id = (struct na_ofi_op_id *) arg; + ++#ifdef NA_HAS_DIAG ++ hg_atomic_decr32(na_ofi_op_id->na_ofi_class->counters.cq_count); ++#endif + na_ofi_completion_multi_pop(&na_ofi_op_id->completion_data_storage.multi); + } + +@@ -8230,15 +8457,15 @@ na_ofi_initialize( + + /* Set/check optional features */ + if ((na_ofi_prov_extra_caps[prov_type] & FI_MULTI_RECV) && +- (na_ofi_class->msg_recv_unexpected == na_ofi_msg_recv)) { ++ (na_ofi_class->ops.msg_recv_unexpected == na_ofi_msg_recv)) { + NA_CHECK_SUBSYS_ERROR(cls, + !(na_ofi_class->fi_info->caps & FI_MULTI_RECV), error, ret, + NA_PROTONOSUPPORT, "FI_MULTI_RECV is not supported by provider"); + na_ofi_class->opt_features |= NA_OPT_MULTI_RECV; + } +- na_ofi_class->cq_poll = (na_ofi_class->fi_info->caps & FI_SOURCE_ERR) +- ? na_ofi_cq_poll_fi_source +- : na_ofi_cq_poll_no_source; ++ na_ofi_class->ops.cq_poll = (na_ofi_class->fi_info->caps & FI_SOURCE_ERR) ++ ? na_ofi_cq_poll_fi_source ++ : na_ofi_cq_poll_no_source; + + /* Open fabric */ + ret = na_ofi_fabric_open( +@@ -8959,8 +9186,8 @@ na_ofi_msg_send_unexpected(na_class_t *na_class, na_context_t *context, + { + return na_ofi_msg_send_common(NA_OFI_CLASS(na_class), + NA_OFI_CONTEXT(context), NA_CB_SEND_UNEXPECTED, callback, arg, +- NA_OFI_CLASS(na_class)->msg_send_unexpected, +- NA_OFI_CLASS(na_class)->msg_send_unexpected_string, buf, buf_size, ++ NA_OFI_CLASS(na_class)->ops.msg_send_unexpected, ++ NA_OFI_CLASS(na_class)->ops.msg_send_unexpected_string, buf, buf_size, + NA_OFI_CLASS(na_class)->endpoint->unexpected_msg_size_max, + (struct na_ofi_msg_buf_handle *) plugin_data, + (struct na_ofi_addr *) dest_addr, dest_id, +@@ -8975,8 +9202,8 @@ na_ofi_msg_recv_unexpected(na_class_t *na_class, na_context_t *context, + { + return na_ofi_msg_recv_common(NA_OFI_CLASS(na_class), + NA_OFI_CONTEXT(context), NA_CB_RECV_UNEXPECTED, callback, arg, +- NA_OFI_CLASS(na_class)->msg_recv_unexpected, +- NA_OFI_CLASS(na_class)->msg_recv_unexpected_string, buf, buf_size, ++ NA_OFI_CLASS(na_class)->ops.msg_recv_unexpected, ++ NA_OFI_CLASS(na_class)->ops.msg_recv_unexpected_string, buf, buf_size, + NA_OFI_CLASS(na_class)->endpoint->unexpected_msg_size_max, + (struct na_ofi_msg_buf_handle *) plugin_data, NULL, 0, + NA_OFI_UNEXPECTED_TAG, NA_OFI_TAG_MASK, (struct na_ofi_op_id *) op_id); +@@ -9027,9 +9254,17 @@ na_ofi_msg_multi_recv_unexpected(na_class_t *na_class, na_context_t *context, + .tag = 0 /* unused */, + .tag_mask = 0 /* unused */}; + ++#ifdef NA_HAS_DIAG ++ /* Counters */ ++ hg_atomic_incr32(na_ofi_class->counters.rx_count); ++#endif ++ + ret = na_ofi_msg_multi_recv( + na_ofi_context->fi_rx, &na_ofi_op_id->info.msg, &na_ofi_op_id->fi_ctx); + if (ret != NA_SUCCESS) { ++#ifdef NA_HAS_DIAG ++ hg_atomic_decr32(na_ofi_class->counters.rx_count); ++#endif + if (ret == NA_AGAIN) { + na_ofi_op_id->retry_op.msg = na_ofi_msg_multi_recv; + na_ofi_op_retry( +@@ -9061,8 +9296,8 @@ na_ofi_msg_send_expected(na_class_t *na_class, na_context_t *context, + { + return na_ofi_msg_send_common(NA_OFI_CLASS(na_class), + NA_OFI_CONTEXT(context), NA_CB_SEND_EXPECTED, callback, arg, +- NA_OFI_CLASS(na_class)->msg_send_expected, +- NA_OFI_CLASS(na_class)->msg_send_expected_string, buf, buf_size, ++ NA_OFI_CLASS(na_class)->ops.msg_send_expected, ++ NA_OFI_CLASS(na_class)->ops.msg_send_expected_string, buf, buf_size, + NA_OFI_CLASS(na_class)->endpoint->expected_msg_size_max, + (struct na_ofi_msg_buf_handle *) plugin_data, + (struct na_ofi_addr *) dest_addr, dest_id, (uint64_t) tag, +@@ -9077,8 +9312,8 @@ na_ofi_msg_recv_expected(na_class_t *na_class, na_context_t *context, + { + return na_ofi_msg_recv_common(NA_OFI_CLASS(na_class), + NA_OFI_CONTEXT(context), NA_CB_RECV_EXPECTED, callback, arg, +- NA_OFI_CLASS(na_class)->msg_recv_expected, +- NA_OFI_CLASS(na_class)->msg_recv_expected_string, buf, buf_size, ++ NA_OFI_CLASS(na_class)->ops.msg_recv_expected, ++ NA_OFI_CLASS(na_class)->ops.msg_recv_expected_string, buf, buf_size, + NA_OFI_CLASS(na_class)->endpoint->expected_msg_size_max, + (struct na_ofi_msg_buf_handle *) plugin_data, + (struct na_ofi_addr *) source_addr, source_id, (uint64_t) tag, 0, +@@ -9214,7 +9449,11 @@ na_ofi_mem_register(na_class_t *na_class, na_mem_handle_t *mem_handle, + (struct na_ofi_mem_handle *) mem_handle; + struct na_ofi_domain *domain = NA_OFI_CLASS(na_class)->domain; + const struct fi_info *fi_info = NA_OFI_CLASS(na_class)->fi_info; +- int32_t mr_cnt = hg_atomic_get32(&domain->mr_reg_count); ++#ifdef NA_HAS_DIAG ++ int32_t mr_cnt = hg_atomic_get32(NA_OFI_CLASS(na_class)->counters.mr_count); ++#else ++ int32_t mr_cnt = -1; ++#endif + struct fi_mr_attr fi_mr_attr = { + .mr_iov = NA_OFI_IOV( + na_ofi_mem_handle->desc.iov, na_ofi_mem_handle->desc.info.iovcnt), +@@ -9283,7 +9522,9 @@ na_ofi_mem_register(na_class_t *na_class, na_mem_handle_t *mem_handle, + fi_mr_attr.mr_iov[0].iov_base, fi_mr_attr.mr_iov[0].iov_len, + fi_mr_attr.iov_count, fi_mr_attr.access, fi_mr_attr.iface, + fi_mr_attr.requested_key, rc, fi_strerror(-rc), mr_cnt); +- mr_cnt = hg_atomic_incr32(&domain->mr_reg_count); ++#ifdef NA_HAS_DIAG ++ mr_cnt = hg_atomic_incr32(NA_OFI_CLASS(na_class)->counters.mr_count); ++#endif + + /* Attach MR to endpoint when provider requests it */ + if (fi_info->domain_attr->mr_mode & FI_MR_ENDPOINT) { +@@ -9327,7 +9568,9 @@ na_ofi_mem_register(na_class_t *na_class, na_mem_handle_t *mem_handle, + error: + if (na_ofi_mem_handle->fi_mr) { + (void) fi_close(&na_ofi_mem_handle->fi_mr->fid); +- hg_atomic_decr32(&domain->mr_reg_count); ++#ifdef NA_HAS_DIAG ++ hg_atomic_decr32(NA_OFI_CLASS(na_class)->counters.mr_count); ++#endif + } + return ret; + } +@@ -9336,7 +9579,6 @@ error: + static na_return_t + na_ofi_mem_deregister(na_class_t *na_class, na_mem_handle_t *mem_handle) + { +- struct na_ofi_domain *domain = NA_OFI_CLASS(na_class)->domain; + struct na_ofi_mem_handle *na_ofi_mem_handle = + (struct na_ofi_mem_handle *) mem_handle; + na_return_t ret; +@@ -9344,14 +9586,18 @@ na_ofi_mem_deregister(na_class_t *na_class, na_mem_handle_t *mem_handle) + + /* close MR handle */ + if (na_ofi_mem_handle->fi_mr != NULL) { +- int32_t NA_DEBUG_LOG_USED mr_cnt; ++ int32_t NA_DEBUG_LOG_USED mr_cnt = -1; + const struct iovec NA_DEBUG_LOG_USED *mr_iov = NA_OFI_IOV( + na_ofi_mem_handle->desc.iov, na_ofi_mem_handle->desc.info.iovcnt); + + rc = fi_close(&na_ofi_mem_handle->fi_mr->fid); + NA_CHECK_SUBSYS_ERROR(mem, rc != 0, error, ret, na_ofi_errno_to_na(-rc), + "fi_close() mr_hdl failed, rc: %d (%s)", rc, fi_strerror(-rc)); +- mr_cnt = hg_atomic_decr32(&domain->mr_reg_count); ++#ifdef NA_HAS_DIAG ++ mr_cnt = hg_atomic_decr32(NA_OFI_CLASS(na_class)->counters.mr_count); ++#else ++ (void) na_class; ++#endif + + NA_LOG_SUBSYS_DEBUG(mem, + "Deregistered memory region: mr_iov[0].iov_base=%p, " +@@ -9575,7 +9821,7 @@ na_ofi_poll(na_class_t *na_class, na_context_t *context, unsigned int *count_p) + return NA_SUCCESS; + + /* Read from CQ and process events */ +- ret = na_ofi_class->cq_poll(na_ofi_class, na_ofi_context, &count); ++ ret = na_ofi_class->ops.cq_poll(na_ofi_class, na_ofi_context, &count); + NA_CHECK_SUBSYS_NA_ERROR(poll, error, ret, "Could not poll context CQ"); + + /* Attempt to process retries */ +diff --git a/src/util/mercury_dlog.c b/src/util/mercury_dlog.c +index 042a0157..fbdc114f 100644 +--- a/src/util/mercury_dlog.c ++++ b/src/util/mercury_dlog.c +@@ -138,6 +138,40 @@ hg_dlog_mkcount64(struct hg_dlog *d, hg_atomic_int64_t **cptr, const char *name, + hg_thread_mutex_unlock(&d->dlock); + } + ++/*---------------------------------------------------------------------------*/ ++void ++hg_dlog_rmcount32(struct hg_dlog *d, hg_atomic_int32_t *cptr) ++{ ++ struct hg_dlog_dcount32 *dcnt; ++ ++ hg_thread_mutex_lock(&d->dlock); ++ TAILQ_FOREACH (dcnt, &d->cnts32, l) { ++ if (&dcnt->c == cptr) { ++ TAILQ_REMOVE(&d->cnts32, dcnt, l); ++ free(dcnt); ++ break; ++ } ++ } ++ hg_thread_mutex_unlock(&d->dlock); ++} ++ ++/*---------------------------------------------------------------------------*/ ++void ++hg_dlog_rmcount64(struct hg_dlog *d, hg_atomic_int64_t *cptr) ++{ ++ struct hg_dlog_dcount64 *dcnt; ++ ++ hg_thread_mutex_lock(&d->dlock); ++ TAILQ_FOREACH (dcnt, &d->cnts64, l) { ++ if (&dcnt->c == cptr) { ++ TAILQ_REMOVE(&d->cnts64, dcnt, l); ++ free(dcnt); ++ break; ++ } ++ } ++ hg_thread_mutex_unlock(&d->dlock); ++} ++ + /*---------------------------------------------------------------------------*/ + unsigned int + hg_dlog_addlog(struct hg_dlog *d, const char *file, unsigned int line, +@@ -210,8 +244,9 @@ hg_dlog_dump(struct hg_dlog *d, int (*log_func)(FILE *, const char *, ...), + "### (%s) debug log summary\n" + "### ----------------------\n", + (d->dlog_magic + strlen(HG_DLOG_STDMAGIC))); +- if (!TAILQ_EMPTY(&d->cnts32) && !TAILQ_EMPTY(&d->cnts64)) { ++ if (!TAILQ_EMPTY(&d->cnts32) || !TAILQ_EMPTY(&d->cnts64)) { + log_func(stream, "# Counters\n"); ++ + TAILQ_FOREACH (dc32, &d->cnts32, l) { + log_func(stream, "# %s: %" PRId32 " [%s]\n", dc32->name, + hg_atomic_get32(&dc32->c), dc32->descr); +diff --git a/src/util/mercury_dlog.h b/src/util/mercury_dlog.h +index 88944b05..43c30a4f 100644 +--- a/src/util/mercury_dlog.h ++++ b/src/util/mercury_dlog.h +@@ -155,6 +155,15 @@ HG_UTIL_PUBLIC void + hg_dlog_mkcount32(struct hg_dlog *d, hg_atomic_int32_t **cptr, const char *name, + const char *descr); + ++/** ++ * remove a 32-bit counter from a dlog. ++ * ++ * \param d [IN] dlog to remove the counter from ++ * \param cptr [IN] pointer to counter to remove ++ */ ++HG_UTIL_PUBLIC void ++hg_dlog_rmcount32(struct hg_dlog *d, hg_atomic_int32_t *cptr); ++ + /** + * make a named atomic64 counter in a dlog and return a pointer to + * it. we use the dlock to ensure a counter under a given name only +@@ -178,6 +187,15 @@ HG_UTIL_PUBLIC void + hg_dlog_mkcount64(struct hg_dlog *d, hg_atomic_int64_t **cptr, const char *name, + const char *descr); + ++/** ++ * remove a 64-bit counter from a dlog. ++ * ++ * \param d [IN] dlog to remove the counter from ++ * \param cptr [IN] pointer to counter to remove ++ */ ++HG_UTIL_PUBLIC void ++hg_dlog_rmcount64(struct hg_dlog *d, hg_atomic_int64_t *cptr); ++ + /** + * attempt to add a log record to a dlog. the id and msg should point + * to static strings that are valid throughout the life of the program +diff --git a/src/util/mercury_log.c b/src/util/mercury_log.c +index 483922bf..2fc13548 100644 +--- a/src/util/mercury_log.c ++++ b/src/util/mercury_log.c +@@ -500,16 +500,9 @@ hg_log_outlet_deregister(struct hg_log_outlet *hg_log_outlet) + + if (hg_log_outlet->debug_log && + !(hg_log_outlet->parent && +- hg_log_outlet->parent->debug_log == hg_log_outlet->debug_log)) { +- if (hg_log_outlet->level >= HG_LOG_LEVEL_MIN_DEBUG) { +- FILE *stream = hg_log_streams_g[hg_log_outlet->level] +- ? hg_log_streams_g[hg_log_outlet->level] +- : *hg_log_std_streams_g[hg_log_outlet->level]; +- hg_dlog_dump_counters( +- hg_log_outlet->debug_log, hg_log_func_g, stream, 0); +- } ++ hg_log_outlet->parent->debug_log == hg_log_outlet->debug_log)) + hg_dlog_free(hg_log_outlet->debug_log); +- } ++ + STAILQ_REMOVE(&hg_log_outlets_g, hg_log_outlet, hg_log_outlet, entry); + hg_log_outlet->registered = false; + } +@@ -593,7 +586,8 @@ hg_log_vwrite(struct hg_log_outlet *hg_log_outlet, enum hg_log_level log_level, + no_return ? "" : "\n"); + #endif + +- if (log_level == HG_LOG_LEVEL_ERROR && hg_log_outlet->debug_log && ++ if ((log_level == HG_LOG_LEVEL_ERROR || log_level == HG_LOG_LEVEL_FATAL) && ++ hg_log_outlet->debug_log && + hg_log_outlet->level >= HG_LOG_LEVEL_MIN_DEBUG) { + hg_dlog_dump(hg_log_outlet->debug_log, hg_log_func_g, stream, 0); + hg_dlog_resetlog(hg_log_outlet->debug_log); +diff --git a/src/util/mercury_log.h b/src/util/mercury_log.h +index ceba0c7a..1ed01429 100644 +--- a/src/util/mercury_log.h ++++ b/src/util/mercury_log.h +@@ -308,11 +308,19 @@ + hg_dlog_mkcount32(HG_LOG_OUTLET(name).debug_log, counter_ptr, \ + counter_name, counter_desc) + ++/* HG_LOG_DEL_COUNTER32: delete 32-bit debug log counter */ ++#define HG_LOG_DEL_COUNTER32(name, counter_ptr) \ ++ hg_dlog_rmcount32(HG_LOG_OUTLET(name).debug_log, counter_ptr) ++ + /* HG_LOG_ADD_COUNTER64: add 64-bit debug log counter */ + #define HG_LOG_ADD_COUNTER64(name, counter_ptr, counter_name, counter_desc) \ + hg_dlog_mkcount64(HG_LOG_OUTLET(name).debug_log, counter_ptr, \ + counter_name, counter_desc) + ++/* HG_LOG_DEL_COUNTER64: delete 64-bit debug log counter */ ++#define HG_LOG_DEL_COUNTER64(name, counter_ptr) \ ++ hg_dlog_rmcount64(HG_LOG_OUTLET(name).debug_log, counter_ptr) ++ + /*************************************/ + /* Public Type and Struct Definition */ + /*************************************/ +diff --git a/src/util/version.txt b/src/util/version.txt +index fcdb2e10..ee74734a 100644 +--- a/src/util/version.txt ++++ b/src/util/version.txt +@@ -1 +1 @@ +-4.0.0 ++4.1.0 +-- +2.52.0 + diff --git a/deps/patches/mercury/0003_combined_plugin_path.patch b/deps/patches/mercury/0003_combined_plugin_path.patch deleted file mode 100644 index 476598dbc84..00000000000 --- a/deps/patches/mercury/0003_combined_plugin_path.patch +++ /dev/null @@ -1,260 +0,0 @@ -diff --git a/src/na/CMakeLists.txt b/src/na/CMakeLists.txt -index 5c4ec2f..625c07c 100644 ---- a/src/na/CMakeLists.txt -+++ b/src/na/CMakeLists.txt -@@ -82,9 +82,10 @@ if(NA_USE_DYNAMIC_PLUGINS) - if(NOT BUILD_SHARED_LIBS) - message(FATAL_ERROR "Using dynamic plugins requires BUILD_SHARED_LIBS to be ON.") - endif() -+ cmake_path(SET NA_PLUGIN_RELATIVE_PATH ${NA_INSTALL_PLUGIN_DIR}) -+ cmake_path(RELATIVE_PATH NA_PLUGIN_RELATIVE_PATH BASE_DIRECTORY ${NA_INSTALL_LIB_DIR}) -+ message(STATUS "NA plugin install directory: ${NA_INSTALL_PLUGIN_DIR} (relative path to libraries: ${NA_PLUGIN_RELATIVE_PATH})") - set(NA_HAS_DYNAMIC_PLUGINS 1) -- set(NA_DEFAULT_PLUGIN_PATH ${NA_INSTALL_PLUGIN_DIR} CACHE PATH "Default path used to load plugins.") -- mark_as_advanced(NA_DEFAULT_PLUGIN_PATH) - endif() - - # BMI -diff --git a/src/na/na.c b/src/na/na.c -index 781f4c8..b60d305 100644 ---- a/src/na/na.c -+++ b/src/na/na.c -@@ -20,6 +20,9 @@ - # include - # else - # include -+# include -+# include -+# include - # endif - #endif - -@@ -138,6 +141,10 @@ na_plugin_check_protocol(const struct na_class_ops *const class_ops[], - const struct na_class_ops **ops_p); - - #ifdef NA_HAS_DYNAMIC_PLUGINS -+/* Resolve plugin search path */ -+static na_return_t -+na_plugin_resolve_path(const char *offset, char *path, size_t path_size); -+ - /* Scan a given path and return a list of plugins */ - static na_return_t - na_plugin_scan_path(const char *path, struct na_plugin_entry **entries_p); -@@ -265,15 +272,26 @@ static void - na_initialize(void) - { - const char *plugin_path = getenv("NA_PLUGIN_PATH"); -+ char resolved_path[NA_PLUGIN_PATH_MAX]; - na_return_t ret; - -- if (plugin_path == NULL) -- plugin_path = NA_DEFAULT_PLUGIN_PATH; -+ if (plugin_path == NULL) { -+ ret = na_plugin_resolve_path( -+ NA_PLUGIN_RELATIVE_PATH, resolved_path, sizeof(resolved_path)); -+ NA_CHECK_SUBSYS_NA_ERROR(cls, done, ret, -+ "Could not resolve plugin path using offset (%s)", -+ NA_PLUGIN_RELATIVE_PATH); -+ plugin_path = resolved_path; -+ } - - ret = na_plugin_scan_path(plugin_path, &na_plugin_dynamic_g); - NA_CHECK_SUBSYS_WARNING(fatal, ret != NA_SUCCESS, -- "No plugin found in path (%s), consider setting NA_PLUGIN_PATH.", -+ "No usable plugin found in path (%s), consider setting NA_PLUGIN_PATH " -+ "if path indicated is not valid.", - plugin_path); -+ -+done: -+ return; - } - - /*---------------------------------------------------------------------------*/ -@@ -472,6 +490,44 @@ error: - /*---------------------------------------------------------------------------*/ - #ifdef NA_HAS_DYNAMIC_PLUGINS - # ifdef _WIN32 -+# define PATH_MAX MAX_PATH -+# define realpath(N, R) _fullpath((R), (N), PATH_MAX) -+# endif -+static na_return_t -+na_plugin_resolve_path(const char *offset, char *path, size_t path_size) -+{ -+ static int placeholder; -+ char libpath[PATH_MAX]; -+ char *slash; -+ na_return_t ret; -+ int rc; -+ -+ rc = hg_dl_get_path(&placeholder, path, path_size); -+ NA_CHECK_SUBSYS_ERROR( -+ cls, rc != 0, error, ret, NA_NOENTRY, "hg_dl_get_path() failed"); -+ -+ NA_CHECK_SUBSYS_ERROR(cls, realpath(path, libpath) == NULL, error, ret, -+ NA_NOENTRY, "realpath() failed, %s", strerror(errno)); -+ -+ slash = strrchr(libpath, '/'); -+ NA_CHECK_SUBSYS_ERROR(cls, slash == NULL, error, ret, NA_INVALID_ARG, -+ "Could not find last '/' in %s", libpath); -+ *slash = '\0'; -+ -+ rc = snprintf(path, path_size, "%s/%s", libpath, offset); -+ NA_CHECK_SUBSYS_ERROR(cls, rc < 0 || rc > (int) path_size, error, ret, -+ NA_OVERFLOW, -+ "snprintf() failed or name truncated, rc: %d (expected %zu)", rc, -+ path_size); -+ -+ return NA_SUCCESS; -+ -+error: -+ return ret; -+} -+ -+/*---------------------------------------------------------------------------*/ -+# ifdef _WIN32 - static na_return_t - na_plugin_scan_path(const char *path, struct na_plugin_entry **entries_p) - { -@@ -494,7 +550,7 @@ na_plugin_scan_path(const char *path, struct na_plugin_entry **entries_p) - struct dirent **plugin_list; - struct na_plugin_entry *entries = NULL; - na_return_t ret; -- int n, n_entries = 0; -+ int n, opened_plugins = 0; - - n = scandir(path, &plugin_list, na_plugin_filter, alphasort); - NA_CHECK_SUBSYS_ERROR( -@@ -504,16 +560,20 @@ na_plugin_scan_path(const char *path, struct na_plugin_entry **entries_p) - (struct na_plugin_entry *) calloc((size_t) n + 1, sizeof(*entries)); - NA_CHECK_SUBSYS_ERROR(cls, entries == NULL, error, ret, NA_NOMEM, - "Could not allocate %d plugin entries", n); -- n_entries = n; - - while (n--) { - ret = na_plugin_open(path, plugin_list[n]->d_name, &entries[n]); - free(plugin_list[n]); -- NA_CHECK_SUBSYS_NA_ERROR(cls, error, ret, "Could not open plugin (%s)", -- plugin_list[n]->d_name); -+ if (ret == NA_SUCCESS) -+ opened_plugins++; -+ else -+ NA_CHECK_SUBSYS_NA_ERROR( -+ cls, error, ret, "Could not open plugin (%s)", plugin_list[n]->d_name); - } - - free(plugin_list); -+ NA_CHECK_SUBSYS_ERROR(cls, opened_plugins == 0, error, ret, NA_NOENTRY, -+ "No usable plugin found in path (%s)", path); - - *entries_p = entries; - -@@ -521,19 +581,11 @@ na_plugin_scan_path(const char *path, struct na_plugin_entry **entries_p) - - error: - if (n > 0) { -- if (entries != NULL) { -- int i; -- -- /* close entry */ -- for (i = n + 1; i < n_entries; i++) -- na_plugin_close(&entries[i]); -- free(entries); -- } -- - while (n--) - free(plugin_list[n]); - free(plugin_list); - } -+ free(entries); - - return ret; - } -diff --git a/src/na/na_config.h.in b/src/na/na_config.h.in -index 30d0e08..50dd443 100644 ---- a/src/na/na_config.h.in -+++ b/src/na/na_config.h.in -@@ -80,7 +80,7 @@ - # define NA_PLUGIN - # define NA_PLUGIN_VISIBILITY NA_PRIVATE - #endif --#cmakedefine NA_DEFAULT_PLUGIN_PATH "@NA_DEFAULT_PLUGIN_PATH@" -+#cmakedefine NA_PLUGIN_RELATIVE_PATH "@NA_PLUGIN_RELATIVE_PATH@" - - /* Build Options */ - #cmakedefine NA_HAS_DEBUG -diff --git a/src/util/CMakeLists.txt b/src/util/CMakeLists.txt -index 44fe6b9..32f51d1 100644 ---- a/src/util/CMakeLists.txt -+++ b/src/util/CMakeLists.txt -@@ -193,6 +193,7 @@ configure_file( - #------------------------------------------------------------------------------ - set(MERCURY_UTIL_SRCS - ${CMAKE_CURRENT_SOURCE_DIR}/mercury_atomic_queue.c -+ ${CMAKE_CURRENT_SOURCE_DIR}/mercury_dl.c - ${CMAKE_CURRENT_SOURCE_DIR}/mercury_dlog.c - ${CMAKE_CURRENT_SOURCE_DIR}/mercury_event.c - ${CMAKE_CURRENT_SOURCE_DIR}/mercury_hash_table.c -diff --git a/src/util/mercury_dl.h b/src/util/mercury_dl.h -index b86932a..0cda094 100644 ---- a/src/util/mercury_dl.h -+++ b/src/util/mercury_dl.h -@@ -62,6 +62,18 @@ hg_dl_close(HG_DL_HANDLE handle); - static HG_UTIL_INLINE void * - hg_dl_sym(HG_DL_HANDLE handle, const char *name); - -+/** -+ * Retrieve library path. -+ * -+ * \param addr [IN] address of the symbol -+ * \param path [OUT] buffer to store the path -+ * \param path_size [IN] size of the buffer -+ * -+ * \return Non-negative on success or negative on failure -+ */ -+HG_UTIL_PUBLIC int -+hg_dl_get_path(const void *addr, char *path, size_t path_size); -+ - /*---------------------------------------------------------------------------*/ - static HG_UTIL_INLINE const char * - hg_dl_error(void) -diff --git a/src/util/mercury_dl.c b/src/util/mercury_dl.c -new file mode 100644 -index 0000000..6ed4666 ---- /dev/null -+++ b/src/util/mercury_dl.c -@@ -0,0 +1,35 @@ -+/** -+ * Copyright (c) 2013-2022 UChicago Argonne, LLC and The HDF Group. -+ * Copyright (c) 2022-2023 Intel Corporation. -+ * -+ * SPDX-License-Identifier: BSD-3-Clause -+ */ -+ -+#if !defined(_WIN32) && !defined(_GNU_SOURCE) -+# define _GNU_SOURCE -+#endif -+#include "mercury_dl.h" -+ -+#include -+ -+/*---------------------------------------------------------------------------*/ -+int -+hg_dl_get_path(const void *addr, char *path, size_t path_size) -+{ -+#ifdef _WIN32 -+ HMODULE module; -+ if (GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | -+ GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, -+ (LPCSTR) addr, &module)) -+ return GetModuleFileNameA(module, path, (DWORD) path_size); -+#else -+ Dl_info info; -+ if (dladdr(addr, &info) && info.dli_fname) { -+ strncpy(path, info.dli_fname, path_size); -+ path[path_size - 1] = '\0'; -+ return HG_UTIL_SUCCESS; -+ } -+#endif -+ -+ return HG_UTIL_FAIL; -+} diff --git a/deps/patches/mercury/0003_ofi_auth_key.patch b/deps/patches/mercury/0003_ofi_auth_key.patch new file mode 100644 index 00000000000..480f6518dd9 --- /dev/null +++ b/deps/patches/mercury/0003_ofi_auth_key.patch @@ -0,0 +1,25 @@ +From cd678a20fb21b6e5a5b8f05619427dc79aa1246b Mon Sep 17 00:00:00 2001 +From: Jerome Soumagne +Date: Mon, 26 Jan 2026 12:27:27 -0600 +Subject: [PATCH 3/3] NA OFI: ensure domain auth key remains valid + +--- + src/na/na_ofi.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/na/na_ofi.c b/src/na/na_ofi.c +index d385d279..68a443e8 100644 +--- a/src/na/na_ofi.c ++++ b/src/na/na_ofi.c +@@ -5220,7 +5220,7 @@ na_ofi_domain_open(const struct na_ofi_fabric *na_ofi_fabric, + NA_NOMEM, "Could not allocate auth_key"); + memcpy(na_ofi_domain->auth_key, &base_auth_key, sizeof(base_auth_key)); + +- domain_attr->auth_key = (void *) &base_auth_key; ++ domain_attr->auth_key = (void *) na_ofi_domain->auth_key; + domain_attr->auth_key_size = auth_key_size; + } + +-- +2.52.0 + diff --git a/docs/admin/hardware.md b/docs/admin/hardware.md index 3f4658cf40b..c35b00af025 100644 --- a/docs/admin/hardware.md +++ b/docs/admin/hardware.md @@ -35,9 +35,10 @@ validated on a regular basis. An RDMA-capable fabric is preferred for best performance. The DAOS data plane relies on [OFI libfabric](https://ofiwg.github.io/libfabric/) and supports OFI providers for Ethernet/tcp and InfiniBand/verbs. -Starting with a Technology Preview in DAOS 2.2, [UCX](https://www.openucx.org/) -is also supported as an alternative network stack for DAOS. -Refer to [UCX Fabric Support (DAOS 2.2 Technology Preview)](./ucx.md) +[UCX](https://www.openucx.org/) +is also supported as an alternative network stack for DAOS on InfiniBand/verbs +platforms. +Refer to [UCX Fabric Support](./ucx.md) for details on setting up DAOS with UCX support. DAOS supports multiple network interfaces on the servers diff --git a/docs/admin/ucx.md b/docs/admin/ucx.md index b6529c7f1ee..c366fb1cf06 100644 --- a/docs/admin/ucx.md +++ b/docs/admin/ucx.md @@ -16,33 +16,26 @@ the following steps are needed: for information about supported MLNX\_OFED releases. * The `mercury-ucx` RPM package needs to be **manually** selected for - installation: - - - The base `mercury` RPM package ships with the libfabric plugin. - This RPM will be installed by default and is a dependency of the - `mercury-ucx` RPM. - - - The additional `mercury-ucx` RPM is also provided. This RPM contains - the UCX plugin that is required for enabling UCX support. - This RPM **must** be used in - InfiniBand environments when the intention is to use - UCX. - Attempts to install this RPM in non-Infiniband environments - will fail, because it has a dependency on UCX packages. + installation. The base `mercury` RPM package ships with no plugins. + The `mercury-ucx` RPM contains the UCX plugin that is required for + enabling UCX support. + This RPM **must** be used in InfiniBand environments when the intention + is to use UCX. Attempts to install this RPM in non-Infiniband environments + will fail, because it has a dependency on UCX packages. * At DAOS **installation** time, to enable UCX support the `mercury-ucx` RPM package must be explicitly listed. - For example, using the `yum`/`dnf` package manager on EL8: + For example, using the `dnf`/`yum` package manager on EL8: ```bash # on DAOS_ADMIN nodes: - yum install mercury-ucx daos-admin + dnf install mercury-ucx daos-admin # on DAOS_SERVER nodes: - yum install mercury-ucx daos-server + dnf install mercury-ucx daos-server # on DAOS_CLIENT nodes: - yum install mercury-ucx daos-client + dnf install mercury-ucx daos-client ``` After UCX support has been enabled by installing the `mercury-ucx` diff --git a/site_scons/components/__init__.py b/site_scons/components/__init__.py index da2518a9928..f6ab99d6245 100644 --- a/site_scons/components/__init__.py +++ b/site_scons/components/__init__.py @@ -1,6 +1,6 @@ # Copyright 2016-2024 Intel Corporation # Copyright 2025 Google LLC -# Copyright 2025 Hewlett Packard Enterprise Development LP +# Copyright 2025-2026 Hewlett Packard Enterprise Development LP # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -202,17 +202,13 @@ def define_mercury(reqs): '-DMERCURY_USE_SYSTEM_BOOST:BOOL=ON', '-DMERCURY_USE_CHECKSUMS:BOOL=OFF', '-DMERCURY_ENABLE_COUNTERS:BOOL=ON', + '-DMERCURY_ENABLE_DEBUG:BOOL=ON', '-DNA_USE_DYNAMIC_PLUGINS:BOOL=ON', '-DNA_USE_SM:BOOL=ON', '-DNA_USE_OFI:BOOL=ON', '-DNA_USE_UCX:BOOL=ON', '../mercury'] - if reqs.target_type == 'debug': - mercury_build.append('-DMERCURY_ENABLE_DEBUG:BOOL=ON') - else: - mercury_build.append('-DMERCURY_ENABLE_DEBUG:BOOL=OFF') - reqs.define('mercury', retriever=GitRepoRetriever(True), commands=[mercury_build, diff --git a/src/cart/utils/memcheck-cart.supp b/src/cart/utils/memcheck-cart.supp index 9676c1966f2..a2f73cfcdad 100644 --- a/src/cart/utils/memcheck-cart.supp +++ b/src/cart/utils/memcheck-cart.supp @@ -512,6 +512,14 @@ fun:hg_dlog_mkcount32 ... } +{ + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:hg_dlog_mkcount64 + ... +} { FI leak 9 Memcheck:Leak @@ -527,18 +535,6 @@ fun:HG_Init_opt fun:crt_hg_class_init } -{ - Tcp provider - Memcheck:Param - sendmsg(msg.msg_iov[1]) - ... - fun:sendmsg - fun:ofi_sockapi_sendv_socket - fun:ofi_bsock_sendv - ... - fun:fi_senddata - ... -} { Tcp provider with ofi rxm Memcheck:Param @@ -546,20 +542,14 @@ ... fun:ofi_bsock_sendv ... - fun:fi_tsend - ... } { Tcp provider with ofi rxm 2 Memcheck:Param sendmsg(msg.msg_iov[2]) ... - fun:sendmsg - fun:ofi_sockapi_sendv_socket fun:ofi_bsock_sendv ... - fun:fi_tsend - ... } { Go syscall. @@ -777,24 +767,6 @@ fun:__tsan_go_atomic64_fetch_add fun:racecall } -{ - DAOS-17006 - mercury leak - Memcheck:Leak - match-leak-kinds: reachable - fun:malloc - fun:hg_dlog_mkcount64 - fun:hg_core_counters_init - fun:hg_core_init - fun:HG_Core_init_opt2 - fun:HG_Init_opt2 - fun:crt_hg_class_init - fun:crt_hg_ctx_init - fun:crt_context_provider_create - fun:daos_eq_lib_init - fun:daos_init - fun:_cgo_b590e4e2531a_Cfunc_daos_init - fun:runtime.asmcgocall.abi0 -} { getpwnam_r() leak Memcheck:Leak diff --git a/utils/build.config b/utils/build.config index c36fbd2a043..42b49fcd125 100644 --- a/utils/build.config +++ b/utils/build.config @@ -9,7 +9,7 @@ isal=v2.31.1 isal_crypto=v2.25.0 spdk=v24.09 ofi=v1.22.0 -mercury=v2.4.0 +mercury=v2.4.1 protobufc=v1.3.3 ucx=v1.14.1 @@ -27,5 +27,5 @@ ucx=https://github.com/openucx/ucx.git [patch_versions] spdk=0001_3428322b812fe31cc3e1d0308a7f5bd4b06b9886.diff,0002_spdk_rwf_nowait.patch,0003_external_isal.patch -mercury=0001_na_ucx.patch,0002_na_ucx_ep_flush.patch,0003_combined_plugin_path.patch +mercury=0001_dep_versions.patch,0002_ofi_counters.patch,0003_ofi_auth_key.patch argobots=0001_411e5b344642ebc82190fd8b125db512e5b449d1.diff,0002_bb0c908abfac4bfe37852eee621930634183c6aa.diff diff --git a/utils/rpms/daos.changelog b/utils/rpms/daos.changelog index 40e3d18fde6..3a0a53f5251 100644 --- a/utils/rpms/daos.changelog +++ b/utils/rpms/daos.changelog @@ -1,4 +1,8 @@ %changelog +* Fri Jan 16 2026 Jerome Soumagne 2.7.103-2 +- Drop libfabric-devel build requirement +- Drop libfabric requirement that is already provided by mercury-libfabric + * Fri Dec 19 2025 Dalton Bohning 2.7.103-1 - Bump version to 2.7.103 diff --git a/utils/rpms/daos.sh b/utils/rpms/daos.sh index 8a28b2a187d..1171a079746 100755 --- a/utils/rpms/daos.sh +++ b/utils/rpms/daos.sh @@ -1,6 +1,10 @@ #!/bin/bash -# (C) Copyright 2025 Google LLC -# WORK IN PROGRESS +# +# (C) Copyright 2025 Google LLC +# Copyright 2025-2026 Hewlett Packard Enterprise Development LP +# +# SPDX-License-Identifier: BSD-2-Clause-Patent +# set -eEuo pipefail root="$(realpath "$(dirname "${BASH_SOURCE[0]}")")" . "${root}/fpm_common.sh" @@ -64,7 +68,7 @@ install_list+=("${tmp}${sysconfdir}/daos/certs=${sysconfdir}/daos") EXTRA_OPTS+=("--rpm-attr" "0755,root,root:${sysconfdir}/daos/certs") -DEPENDS=( "mercury >= ${mercury_full}" "${libfabric_lib} >= ${libfabric_full}" ) +DEPENDS=( "mercury >= ${mercury_version}" ) DEPENDS+=( "${isal_crypto_lib} >= ${isal_crypto_version}" ) build_package "daos" diff --git a/utils/rpms/daos.spec b/utils/rpms/daos.spec index a8f5da38d1f..2a1827b0176 100644 --- a/utils/rpms/daos.spec +++ b/utils/rpms/daos.spec @@ -12,7 +12,6 @@ %global daos_build_args client test %endif %global mercury_version 2.4 -%global libfabric_version 1.15.1-1 %global argobots_version 1.2 %global __python %{__python3} %global daos_log_dir "/var/log/daos" @@ -25,7 +24,7 @@ Name: daos Version: 2.7.103 -Release: 1%{?relval}%{?dist} +Release: 2%{?relval}%{?dist} Summary: DAOS Storage Engine License: BSD-2-Clause-Patent @@ -40,7 +39,6 @@ BuildRequires: python3-scons >= 2.4 %else BuildRequires: scons >= 2.4 %endif -BuildRequires: libfabric-devel >= %{libfabric_version} BuildRequires: mercury-devel >= %{mercury_version} BuildRequires: gcc-c++ %if (0%{?rhel} >= 8) @@ -167,12 +165,10 @@ Requires: ndctl %if (0%{?suse_version} >= 1500) Requires: ipmctl >= 03.00.00.0423 Requires: libpmemobj1 >= 2.1.0-1.suse1500 -Requires: libfabric1 >= %{libfabric_version} %else Requires: ipmctl >= 03.00.00.0468 Requires: libpmemobj >= 2.1.0-1%{?dist} %endif -Requires: libfabric >= %{libfabric_version} Requires: mercury >= %{mercury_version} Requires(post): /sbin/ldconfig Requires(postun): /sbin/ldconfig @@ -195,10 +191,6 @@ This package contains DAOS administrative tools (e.g. dmg). Summary: The DAOS client Requires: %{name}%{?_isa} = %{version}-%{release} Requires: mercury >= %{mercury_version} -Requires: libfabric >= %{libfabric_version} -%if (0%{?suse_version} >= 1500) -Requires: libfabric1 >= %{libfabric_version} -%endif Requires: /usr/bin/fusermount3 %{?systemd_requires} diff --git a/utils/rpms/mercury.changelog b/utils/rpms/mercury.changelog new file mode 100644 index 00000000000..c00f682eb61 --- /dev/null +++ b/utils/rpms/mercury.changelog @@ -0,0 +1,274 @@ +%changelog +* Mon Jan 26 2026 Jerome Soumagne - 2.4.1-1 +- Update to 2.4.1 +- Separate libfabric plugin from main build to align with ucx plugin +- Add patches for runtime version checks and libfabric plugin counters +- Add patch for libfabric auth key + +* Wed Jun 25 2025 Joseph Moore - 2.4.0-5 +- Update release number to differentiate from test RPMs for prior issue. + +* Tue Mar 11 2025 Joseph Moore - 2.4.0-4 +- Change to addr_release for handling of "already present" warning. + +* Wed Jan 15 2025 Joseph Moore - 2.4.0-3 +- Add patch to na_ucx.c to flush end point prior to close. + +* Tue Jan 07 2025 Joseph Moore - 2.4.0-2 +- Enable debug RPMs for Leap sub-packages. + +* Mon Nov 04 2024 Jerome Soumagne - 2.4.0-1 +- Update to 2.4.0 +- Update required libfabric version (>= 1.20) + +* Mon Oct 07 2024 Joseph Moore - 2.4.0~rc5-5 +- Update patch to na_ucx.c to set thread-safe on clients. + +* Thu Sep 26 2024 Joseph Moore - 2.4.0~rc5-4 +- Update patch to na_ucx.c to add fix for connection accept. + +* Wed Sep 04 2024 Brian J. Murrell - 2.4.0~rc5-3 +- Add --without ucx build switch + +* Thu Aug 29 2024 Joseph Moore - 2.4.0~rc5-2 +- Add patch to na_ucx.c to check ep in key_resolve. + +* Mon Aug 26 2024 Jerome Soumagne - 2.4.0~rc5-1 +- Update to 2.4.0rc5 + +* Fri Aug 02 2024 Jerome Soumagne - 2.4.0~rc4-1 +- Update to 2.4.0rc4 +- Remove previous patches now included in 2.4 +- Require libfabric >= 1.15 + +* Tue Mar 19 2024 Jerome Soumagne - 2.3.1-3 +- Add patch to fix ucx hg_info +- Add patch to remove ofi cxi MR warnings +- Add patch to fix potential segfault on log free + +* Wed Nov 22 2023 Jerome Soumagne - 2.3.1-2 +- Rebuild for EL 8.8 and Leap 15.5 + +* Fri Oct 27 2023 Jerome Soumagne - 2.3.1-1 +- Update to 2.3.1 +- Add json-c dependency for hg_info JSON output support +- Drop support for CentOS7 + +* Tue Sep 26 2023 Joseph Moore - 2.3.1~rc1-2 +- Add patch to na_ucx.c to force retry of out-of-memory error. + +* Tue Aug 29 2023 Jerome Soumagne - 2.3.1~rc1-1 +- Update to 2.3.1rc1 + +* Thu Jun 22 2023 Brian J. Murrell - 2.3.0-2 +- Rebuild for EL9 + +* Wed Jun 7 2023 Jerome Soumagne - 2.3.0-1 +- Update to 2.3.0 +- Add hg_info tool +- Fix pie flags on CentOS7 +- Remove na_ucx_src_port.patch and old patches + +* Tue Apr 25 2023 Jerome Soumagne - 2.3.0~rc5-1 +- Update to 2.3.0rc5 +- Remove na_ucx.c patch and add temporary na_ucx_src_port.patch +- Update build to make use of NA dynamic plugins +- Fix source URL and package perf tests + +* Thu Dec 22 2022 Joseph Moore - 2.2.0-6 +- Regenerate packages for LEAP15.4 + +* Thu Nov 17 2022 Joseph Moore - 2.2.0-5 +- Update na_ucx.c patch to support reconnection following a disconnect. + +* Wed Oct 05 2022 Joseph Moore - 2.2.0-4 +- Update na_ucx.c patch to include UCX status to NA error mapping. + +* Tue Sep 20 2022 Joseph Moore - 2.2.0-3 +- Fix defect in connect function. + +* Fri Sep 09 2022 Joseph Moore - 2.2.0-2 +- Add na_ucx.c patch to change ep creation for single IB device. + +* Fri Aug 5 2022 Jerome Soumagne - 2.2.0-1 +- Update to 2.2.0 + +* Mon Aug 1 2022 Jerome Soumagne - 2.2.0~rc6-2 +- Rebuild after libfabric rpm dropped CXI compat patch +- Drop CXI compat patch + +* Mon Jun 27 2022 Jerome Soumagne - 2.2.0~rc6-1 +- Update to 2.2.0rc6 +- Skip install rpath, enable debug log. +- Remove openpa dependency. + +* Fri Apr 22 2022 Joseph Moore - 2.1.0~rc4-9 +- Change ucx unified mode to off (updated UCX patch file). + +* Fri Apr 1 2022 Brian J. Murrell - 2.1.0~rc4-8 +- Build with ucx subpackage on supported platforms +- Removed invalid build options: + * MERCURY_ENABLE_VERBOSE_ERROR + * MERCURY_USE_SELF_FORWARD + +* Thu Mar 31 2022 Joseph Moore - 2.1.0~rc4-7 +- Apply daos-9679 address parsing change and active message revision to na_ucx.c. + +* Fri Mar 11 2022 Alexander Oganezov - 2.1.0~rc4-6 +- Apply cxi provider patch + +* Tue Feb 22 2022 Alexander Oganezov - 2.1.0~rc4-5 +- Apply doas-9561 workaround + +* Thu Feb 17 2022 Brian J. Murrell - 2.1.0~rc4-4 +- Fix issues with %%post* ldconfig + - No lines are allowed after %%post -p + - These are not needed on EL8 as it's glibc does the work + +* Thu Dec 23 2021 Alexander Oganezov - 2.1.0~rc4-3 +- Remove daos-9173 workaround +- Apply cpu usage fix to mercury + +* Tue Dec 7 2021 Alexander Oganezov - 2.1.0~rc4-2 +- Apply DAOS-9173 workaround patch to na_ofi.c + +* Tue Nov 30 2021 Alexander Oganezov - 2.1.0~rc4-1 +- Update to version v2.1.0rc4 + +* Tue Oct 12 2021 Alexander Oganezov - 2.1.0~rc2-1 +- Update to version v2.1.0rc2 + +* Fri May 14 2021 Alexander Oganezov - 2.0.1-1 +- Update to version v2.0.1 + +* Mon May 10 2021 Brian J. Murryyell - 2.0.1~rc1-2 +- Enable debuginfo package building for SUSE + +* Wed Jan 20 2021 Alexander Oganezov - 2.0.1~rc1-1 +- Update to version v2.0.1rc1 + +* Wed Nov 18 2020 Alexander Oganezov - 2.0.0-1 +- Update to release v2.0.0 + +* Wed Oct 28 2020 Alexander Oganezov - 2.0.0~rc3-1 +- Update to release v2.0.0rc3 + +* Mon Oct 12 2020 Alexander Oganezov - 2.0.0~rc2-1 +- Update to release v2.0.0rc2 + +* Tue Aug 18 2020 Brian J. Murryyell - 2.0.0~rc1-2 +- Use release tarball and not individual submodule tarballs + +* Mon Jul 6 2020 Alexander A Oganezov - 2.0.0~rc1-1 +- Update to release v2.0.0rc1 + +* Mon Jun 22 2020 Brian J. Murryyell - 2.0.0~a1-2 +- Fix License: +- Add %%license + +* Thu May 07 2020 Brian J. Murrell - 2.0.0~a1-1 +- Fix pre-release tag in Version: +- Add Requires: libfabric-devel to devel package + +* Thu Apr 9 2020 Alexander A Oganezov - 2.0.0a1-0.8 +- Update to 4871023058887444d47ead4d089c99db979f3d93 + +* Tue Mar 17 2020 Alexander A Oganezov - 2.0.0a1-0.7 +- Update to 41caa143a07ed179a3149cac4af0dc7aa3f946fd + +* Thu Mar 12 2020 Alexander A Oganezov - 2.0.0a1-0.6 +- Update to 299b06d47e6c1d59a45985dcbbebe3caca0189d0 + +* Tue Mar 10 2020 Alexander A Oganezov - 2.0.0a1-0.5 +- Updated to ad5a3b3dbf171a97e1ca5f1683299db1c69b03ea + +* Thu Mar 05 2020 Vikram Chhabra - 2.0.0a1-0.4 +- Updated to latest master with HG_Forward fix. + +* Tue Feb 11 2020 Yulu Jia - 2.0.0a1-0.3 +- Remove nameserver patch + +* Sun Feb 09 2020 Yulu Jia - 2.0.0a1-0.2 +- Update patch to enable ip:port URI format for psm2 + +* Tue Feb 04 2020 Brian J. Murrell - 2.0.0a1-0.1 +- Update to 2.0.0a1 + +* Tue Jan 28 2020 Yulu Jia - 1.0.1-22 +- Update to c2c2628 +- Apply patch to enable ip:port URI format for psm2 + +* Mon Dec 2 2019 Alexander Oganezov - 1.0.1-21 +- Removed sl_patch on top of 7b529b +- Updated to 9889a0 + +* Thu Oct 31 2019 Alexander Oganezov - 1.0.1-20 +- sl_patch on top of 7b529b + +* Wed Oct 23 2019 Alexander Oganezov - 1.0.1-19 +- Update to 7b529b + +* Tue Oct 22 2019 Alexander Oganezov - 1.0.1-18 +- Reverting from 6a8b693 due to mercury segfaults + +* Mon Oct 21 2019 Alexander Oganezov - 1.0.1-17 +- Update to 6a8b693 + +* Wed Oct 16 2019 Alexander Oganezov - 1.0.1-16 +- Fixed spec to apply patch for 616fee properly + +* Tue Oct 15 2019 Alexander Oganezov - 1.0.1-15 +- Update to 616fee to get latest changes + +* Wed Oct 02 2019 Brian J. Murrell - 1.0.1-14 +- Update to cc0807 to include the HG_Cancel() fix. +- Update to f0b9f9 to get latest changes + +* Wed Oct 02 2019 Brian J. Murrell - 1.0.1-13 +- Once again revert previous update + +* Wed Oct 02 2019 Brian J. Murrell - 1.0.1-12 +- Update to cc0807 to include the HG_Cancel() fix. +- Update to f0b9f9 to get latest changes + +* Wed Sep 25 2019 Brian J. Murrell - 1.0.1-11 +- Back out previous update + - not all consumers are ready for it yet so they need to + pin their BR + +* Fri Sep 20 2019 Brian J. Murrell - 1.0.1-10 +- Update to cc0807 to include the HG_Cancel() fix. +- Update to f0b9f9 to get latest changes + +* Thu Aug 08 2019 Brian J. Murrell - 1.0.1-9 +- Revert previous update + +* Fri Aug 02 2019 Yulu Jia - 1.0.1-8 +- Update to cc0807 to include the HG_Cancel() fix. +- Roll the version number back to 1.0.1 + +* Fri Aug 02 2019 Brian J. Murrell - 1.0.1-7 +- Revert back to the 1.0.1-4 release as the upgrade included + in -5 (and the subsequent fix in -6) was premature + +* Thu Aug 01 2019 Brian J. Murrell - 1.0.1-6 +- Roll the version number back to 1.0.1 + +* Fri Jul 26 2019 Yulu Jia - 1.0.1-5 +- Update to cc0807 to include the HG_Cancel() fix. + +* Thu May 02 2019 Brian J. Murrell - 1.0.1-4 +- Devel package needs to require the lib package + +* Fri Mar 15 2019 Brian J. Murrell - 1.0.1-2 +- Add patch to revert back to Dec 06, 2018 c68870f + +* Mon Mar 11 2019 Brian J. Murrell - 1.0.1-1 +- Update to 1.0.1 +- Add patch for "HG Core: fix missing static inline in mercury_core.h" + +* Wed Oct 24 2018 Brian J. Murrell - 0.9.0-1.git.0f8f25b +- Update mercury to git sha1 0f8f25bb3d57f117979de65cc3c05cf192cf4b31 + +* Mon Aug 20 2018 Brian J. Murrell - 0.9.0-1.git.f7f6955 +- Initial package diff --git a/utils/rpms/mercury.sh b/utils/rpms/mercury.sh index f14fe057043..5f30dd89372 100755 --- a/utils/rpms/mercury.sh +++ b/utils/rpms/mercury.sh @@ -1,5 +1,10 @@ #!/bin/bash -# (C) Copyright 2025 Google LLC +# +# (C) Copyright 2025 Google LLC +# Copyright 2025-2026 Hewlett Packard Enterprise Development LP +# +# SPDX-License-Identifier: BSD-2-Clause-Patent +# set -eEuo pipefail root="$(realpath "$(dirname "${BASH_SOURCE[0]}")")" . "${root}/fpm_common.sh" @@ -24,6 +29,7 @@ Access (RMA). Its interface is generic and allows any function call to be serialized. Since code generation is done using the C preprocessor, no external tool is required." URL="http://mercury-hpc.github.io" +RPM_CHANGELOG="mercury.changelog" files=() TARGET_PATH="${bindir}" @@ -36,14 +42,17 @@ list_files files "${SL_MERCURY_PREFIX}/lib64/lib*.so.*" clean_bin "${files[@]}" append_install_list "${files[@]}" +ARCH="${isa}" +build_package "mercury" + TARGET_PATH="${libdir}/mercury" list_files files "${SL_MERCURY_PREFIX}/lib64/mercury/libna_plugin_ofi.so" clean_bin "${files[@]}" append_install_list "${files[@]}" ARCH="${isa}" -DEPENDS=("${libfabric_lib} >= ${libfabric_version}") -build_package "mercury" +DEPENDS=("${libfabric_lib} >= ${libfabric_min_version}") +build_package "mercury-libfabric" DEPENDS=() TARGET_PATH="${libdir}/mercury" diff --git a/utils/rpms/package_info.sh b/utils/rpms/package_info.sh index cc3be377607..ac6356c3c72 100644 --- a/utils/rpms/package_info.sh +++ b/utils/rpms/package_info.sh @@ -1,5 +1,10 @@ #!/bin/bash -# (C) Copyright 2025 Google LLC +# +# (C) Copyright 2025 Google LLC +# Copyright 2025-2026 Hewlett Packard Enterprise Development LP +# +# SPDX-License-Identifier: BSD-2-Clause-Patent +# root="$(realpath "$(dirname "$(dirname "$(dirname "${BASH_SOURCE[0]}")")")")" set_lib_name() { comp="$1"; shift @@ -35,11 +40,12 @@ daos_release="$(grep "^Release: " "${root}/utils/rpms/daos.spec" | \ sed 's/^Release: *//' | sed 's/%.*//')${DAOS_RELVAL:-}${distro_name}" export daos_release +export libfabric_min_version="1.20" export libfabric_version="1.22.0" export libfabric_release="5${distro_name}" export libfabric_full="${libfabric_version}-${libfabric_release}" -export mercury_version="2.4.0" -export mercury_release="8${distro_name}" +export mercury_version="2.4.1" +export mercury_release="1${distro_name}" export mercury_full="${mercury_version}-${mercury_release}" export argobots_version="1.2" export argobots_release="4${distro_name}" @@ -94,6 +100,8 @@ set_lib_name mercury dev mercury mercury mercury export mercury_dev set_lib_name mercury lib mercury mercury mercury export mercury_lib +set_lib_name mercury_libfabric lib mercury-libfabric mercury-libfabric mercury-libfabric +export mercury_libfabric_lib set_lib_name pmemobj lib libpmemobj libpmemobj1 libpmemobj1 set_lib_name pmemobj dev libpmemobj libpmemobj1 libpmemobj1 diff --git a/utils/test_memcheck.supp b/utils/test_memcheck.supp index 4f5e2bca077..ef69713271a 100644 --- a/utils/test_memcheck.supp +++ b/utils/test_memcheck.supp @@ -271,15 +271,11 @@ ... } { - Tcp provider - Memcheck:Param - sendmsg(msg.msg_iov[1]) - ... - fun:sendmsg - fun:ofi_sockapi_sendv_socket - fun:ofi_bsock_sendv - ... - fun:fi_senddata + + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:hg_dlog_mkcount64 ... } { @@ -289,20 +285,14 @@ ... fun:ofi_bsock_sendv ... - fun:fi_tsend - ... } { Tcp provider with ofi rxm 2 Memcheck:Param sendmsg(msg.msg_iov[2]) ... - fun:sendmsg - fun:ofi_sockapi_sendv_socket fun:ofi_bsock_sendv ... - fun:fi_tsend - ... } { par_init mpi or dlopen leak From 6ff2f9b57c5cd6bc30e82a2e0b25cce1215bf93f Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Tue, 3 Feb 2026 14:10:38 -0500 Subject: [PATCH 175/253] DAOS-17922 test: Verify DTX is involved with MD on SSD Phase 2 pool (#17385) Add test to verify DTX is involved with MD on SSD Phase 2 pool. Signed-off-by: Phil Henderson --- src/tests/ftest/pool/verify_dtx.py | 152 +++++++++++++++++++++++++++ src/tests/ftest/pool/verify_dtx.yaml | 64 +++++++++++ 2 files changed, 216 insertions(+) create mode 100644 src/tests/ftest/pool/verify_dtx.py create mode 100644 src/tests/ftest/pool/verify_dtx.yaml diff --git a/src/tests/ftest/pool/verify_dtx.py b/src/tests/ftest/pool/verify_dtx.py new file mode 100644 index 00000000000..869d0c42a27 --- /dev/null +++ b/src/tests/ftest/pool/verify_dtx.py @@ -0,0 +1,152 @@ +""" + (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP + + SPDX-License-Identifier: BSD-2-Clause-Patent +""" +import json +import math + +from job_manager_utils import get_job_manager +from mdtest_utils import MDTEST_NAMESPACE, run_mdtest +from telemetry_test_base import TestWithTelemetry + + +class VerifyDTXMetrics(TestWithTelemetry): + """ + Ensures DTX is involved with MD on SSD phase 2 pool. + + :avocado: recursive + """ + + def test_verify_dtx_metrics(self): + """Ensure DTX is involved with MD on SSD phase 2 pool. + + 1. Create a pool with a mem ratio of 100% (for pmem or phase 1) or 25% (for phase 2) + 2. Collect a baseline for the DTX metrics + 3. Run mdtest -a DFS to write data with different object classes + 4. Collect new DTX metrics + 5. Verify DTX metrics + + :avocado: tags=all,full_regression + :avocado: tags=hw,large + :avocado: tags=pool + :avocado: tags=VerifyDTXMetrics,test_verify_dtx_metrics + """ + # pylint: disable=too-many-branches + write_bytes = self.params.get('write_bytes', MDTEST_NAMESPACE, None) + processes = self.params.get('processes', MDTEST_NAMESPACE, None) + ppn = self.params.get('ppn', MDTEST_NAMESPACE, None) + object_classes = self.params.get('object_classes', '/run/*') + + dtx_metrics = list(self.telemetry.ENGINE_POOL_VOS_CACHE_METRICS[:1]) + dtx_metrics += list(self.telemetry.ENGINE_IO_DTX_COMMITTED_METRICS) + + self.log_step('Creating a pool (dmg pool create)') + pool = self.get_pool(connect=False) + try: + _result = json.loads(pool.dmg.result.stdout) + tier_bytes_scm = int(_result['response']['tier_bytes'][0]) + mem_file_bytes = int(_result['response']['mem_file_bytes']) + total_engines = len(_result['response']['tgt_ranks']) + except Exception as error: # pylint: disable=broad-except + self.fail(f'Error extracting data for dmg pool create output: {error}') + + # Calculate the mdtest files_per_process based upon the scm size and other mdtest params + _write_procs = processes + _mdtest_cmds = len(object_classes) + if ppn is not None: + _write_procs = ppn * len(self.host_info.clients.hosts) + files_per_process = math.floor(mem_file_bytes / (write_bytes * _write_procs * _mdtest_cmds)) + if tier_bytes_scm > mem_file_bytes: + # Write more (225%) files to exceed mem_file_bytes and cause eviction + num_of_files_dirs = math.ceil(files_per_process * 2.25) + else: + # Write less (75%) files to avoid out of space errors + num_of_files_dirs = math.floor(files_per_process * 0.75) + + self.log.debug("-" * 60) + self.log.debug("Pool %s create data:", pool) + self.log.debug(" tier_bytes_scm (per engine/total): %s / %s", + tier_bytes_scm, tier_bytes_scm * total_engines) + self.log.debug(" mem_file_bytes (per engine/total): %s / %s", + mem_file_bytes, mem_file_bytes * total_engines) + self.log.debug(" mem_ratio.value: %s", pool.mem_ratio.value) + self.log.debug(" total_engines: %s", total_engines) + self.log.debug("Mdtest write parameters:") + self.log.debug(" write_bytes per mdtest: %s", write_bytes) + if ppn is not None: + self.log.debug(" processes (ppn * nodes): %s * %s = %s", + ppn, len(self.host_info.clients.hosts), _write_procs) + else: + self.log.debug(" processes: %s", processes) + self.log.debug(" files_per_process per mtest: %s", files_per_process) + self.log.debug(" number of mdtest commands: %s", _mdtest_cmds) + self.log.debug(" num_of_files_dirs per mdtest: %s", num_of_files_dirs) + self.log.debug(" total expected to write: %s", + _mdtest_cmds * _write_procs * write_bytes * num_of_files_dirs) + self.log.debug("-" * 60) + + self.log_step('Collect DTX metrics after creating a pool (dmg telemetry metrics query)') + expected_ranges = self.telemetry.collect_data(dtx_metrics) + for metric in expected_ranges: + for label in expected_ranges[metric]: + expected_ranges[metric][label] = [0, 0] # 0 only + if pool.mem_ratio.value is not None: + suffixes = [ + '_dtx_committed_max', + '_dtx_committed_mean', + '_dtx_committed_samples', + '_dtx_committed_stddev', + '_dtx_committed_sum', + '_dtx_committed_sumsquares' + ] + if any(map(metric.endswith, suffixes)): + expected_ranges[metric][label] = [0] # 0 or greater (phase 2) + self.log.debug('%s expected_ranges: %s', pool, expected_ranges) + + self.log_step('Verify DTX metrics after pool creation') + if not self.telemetry.verify_data(expected_ranges): + self.fail('DTX metrics verification failed after pool creation') + + manager = get_job_manager(self, subprocess=False, timeout=None) + processes = self.params.get('processes', MDTEST_NAMESPACE, None) + ppn = self.params.get('ppn', MDTEST_NAMESPACE, None) + for oclass in object_classes: + self.log_step(f'Write data into a containers with the {oclass} object classes (mdtest)') + container = self.get_container(pool, oclass=oclass, dir_oclass=oclass) + run_mdtest( + self, self.hostlist_clients, self.workdir, None, container, processes, ppn, manager, + mdtest_params={'dfs_oclass': oclass, 'dfs_dir_oclass': oclass, + 'num_of_files_dirs': num_of_files_dirs}) + + self.log_step('Collect DTX metrics after writing data (dmg telemetry metrics query)') + expected_ranges = self.telemetry.collect_data(dtx_metrics) + for metric in expected_ranges: + for label in expected_ranges[metric]: + if metric.endswith('_dtx_committed'): + expected_ranges[metric][label] = [0] # 0 or greater + elif metric.endswith('_dtx_committed_max'): + expected_ranges[metric][label] = [100] # 100 or greater + elif metric.endswith('_dtx_committed_mean'): + expected_ranges[metric][label] = [50] # 50 or greater + elif metric.endswith('_dtx_committed_min'): + expected_ranges[metric][label] = [0] # 0 or greater + elif metric.endswith('_dtx_committed_sum'): + expected_ranges[metric][label] = [1000] # 1000 or greater + elif metric.endswith('_dtx_committed_sumsquares'): + expected_ranges[metric][label] = [100000] # 100,000 or greater + elif metric.endswith('_vos_cache_page_evict'): + if pool.mem_ratio.value is None: + expected_ranges[metric][label] = [0, 0] # 0 only (phase 1) + else: + expected_ranges[metric][label] = [1] # 1 or greater (phase 2) + else: + # e.g. *_dtx_committed_samples, *_dtx_committed_stddev + expected_ranges[metric][label] = [1] # 1 or greater + self.log.debug('%s expected_ranges: %s', pool, expected_ranges) + + self.log_step('Verify DTX metrics after writing data') + if not self.telemetry.verify_data(expected_ranges): + self.fail('DTX metrics verification failed after writing data') + + self.log_step('Test passed') diff --git a/src/tests/ftest/pool/verify_dtx.yaml b/src/tests/ftest/pool/verify_dtx.yaml new file mode 100644 index 00000000000..e0947402b6b --- /dev/null +++ b/src/tests/ftest/pool/verify_dtx.yaml @@ -0,0 +1,64 @@ +launch: + !filter-only : /run/pool/default # yamllint disable-line rule:colons + +hosts: + test_servers: 5 + test_clients: 3 + +timeout: 600 + +server_config: + name: daos_server + engines_per_host: 2 + engines: + 0: + pinned_numa_node: 0 + nr_xs_helpers: 0 + log_file: daos_server0.log + storage: auto + 1: + pinned_numa_node: 1 + nr_xs_helpers: 0 + log_file: daos_server1.log + storage: auto + +pool: !mux + default: + size: 575G + md_on_ssd_p2: + size: 575G + mem_ratio: 25 + +container: + type: POSIX + +mdtest: + dfs_destroy: False + manager: "MPICH" + ppn: 32 + test_dir: "/" + api: DFS + flags: "-C -F -G 27 -N 1 -Y -u -L" + branching_factor: 1 + write_bytes: 3072 + read_bytes: 3072 + +object_classes: + - "EC_2P1G1" + - "EC_2P1G8" + - "EC_2P1GX" + - "EC_2P2G1" + - "EC_2P2G8" + - "EC_2P2GX" + - "EC_4P1G1" + - "EC_4P1G8" + - "EC_4P1GX" + - "EC_4P2G1" + - "EC_4P2G8" + - "EC_4P2GX" + - "EC_4P3G1" + - "EC_4P3G8" + - "EC_4P3GX" + - "EC_8P2G1" + - "EC_8P2G8" + - "EC_8P2GX" From ba811f0d9e5906f45e7ead80bad0c421cae80780 Mon Sep 17 00:00:00 2001 From: Dalton Bohning Date: Wed, 4 Feb 2026 08:09:48 -0800 Subject: [PATCH 176/253] DAOS-623 cq: remove cat_recovery from githook default branches (#17487) feature/cat_recovery has not had new commits in 2 years, so remove it from the default branch listing for githooks since it slows down the listing the older it is. Signed-off-by: Dalton Bohning --- utils/githooks/branches.default | 1 - 1 file changed, 1 deletion(-) diff --git a/utils/githooks/branches.default b/utils/githooks/branches.default index 2e21a25fbd3..4a74c43cef0 100755 --- a/utils/githooks/branches.default +++ b/utils/githooks/branches.default @@ -1,5 +1,4 @@ #!/bin/bash set -eEuo pipefail -echo feature/cat_recovery echo feature/multiprovider echo feature/firewall From 0469e8c44ce2dd2a2b38fddb5b4edbfdbec687d9 Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Wed, 4 Feb 2026 16:47:45 -0500 Subject: [PATCH 177/253] DAOS-18017 test: Fix for Functional VM Stages (#17505) Install mercury-libfabric package in Functional VM stages. Signed-off-by: Phil Henderson --- Jenkinsfile | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index f534692f483..26ce0473a71 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -882,7 +882,7 @@ pipeline { job_step_update( functionalTest( inst_repos: daosRepos(), - inst_rpms: functionalPackages(1, next_version(), 'tests-internal'), + inst_rpms: functionalPackages(1, next_version(), 'tests-internal') + ' mercury-libfabric', test_function: 'runTestFunctionalV2')) } post { @@ -904,8 +904,8 @@ pipeline { job_step_update( functionalTest( inst_repos: daosRepos(), - inst_rpms: functionalPackages(1, next_version(), 'tests-internal'), - test_function: 'runTestFunctionalV2')) + inst_rpms: functionalPackages(1, next_version(), 'tests-internal') + ' mercury-libfabric', + test_function: 'runTestFunctionalV2')) } post { always { @@ -926,8 +926,8 @@ pipeline { job_step_update( functionalTest( inst_repos: daosRepos(), - inst_rpms: functionalPackages(1, next_version(), 'tests-internal'), - test_function: 'runTestFunctionalV2')) + inst_rpms: functionalPackages(1, next_version(), 'tests-internal') + ' mercury-libfabric', + test_function: 'runTestFunctionalV2')) } post { always { @@ -948,7 +948,7 @@ pipeline { job_step_update( functionalTest( inst_repos: daosRepos(), - inst_rpms: functionalPackages(1, next_version(), 'tests-internal'), + inst_rpms: functionalPackages(1, next_version(), 'tests-internal') + ' mercury-libfabric', test_function: 'runTestFunctionalV2', image_version: 'leap15.6')) } @@ -971,7 +971,7 @@ pipeline { job_step_update( functionalTest( inst_repos: daosRepos(), - inst_rpms: functionalPackages(1, next_version(), 'tests-internal'), + inst_rpms: functionalPackages(1, next_version(), 'tests-internal') + ' mercury-libfabric', test_function: 'runTestFunctionalV2')) } post { From 1ef39ae0fde22eca39917b9845eea4dc89a8e4a6 Mon Sep 17 00:00:00 2001 From: Liu Xuezhao Date: Thu, 5 Feb 2026 14:32:02 +0800 Subject: [PATCH 178/253] DAOS-18487 rebuild: don't change dom fseq for MAP_REVERT_REBUILD (#17469) Should not change domain's do_comp.co_fseq when revert pool map for rebuild err handling, that may cause do_comp.co_fseq be higher than retried rebuild version and always abort+retry. Signed-off-by: Xuezhao Liu --- src/common/pool_map.c | 16 ++++++++-------- src/include/daos/pool_map.h | 4 ++-- src/pool/srv_pool_map.c | 20 +++++++++++--------- src/rebuild/srv.c | 9 +++++---- 4 files changed, 26 insertions(+), 23 deletions(-) diff --git a/src/common/pool_map.c b/src/common/pool_map.c index 13976ab23ca..3711ac98310 100644 --- a/src/common/pool_map.c +++ b/src/common/pool_map.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2016-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -1877,7 +1877,7 @@ child_status_check(struct pool_domain *domain, uint32_t status) /* Domain status update state machine */ static int update_dom_status(struct pool_domain *domain, uint32_t id, uint32_t status, uint32_t version, - bool *updated) + bool *updated, bool for_revert) { int i; @@ -1893,7 +1893,7 @@ update_dom_status(struct pool_domain *domain, uint32_t id, uint32_t status, uint struct pool_domain *child = &domain->do_children[i]; int found; - found = update_dom_status(child, id, status, version, updated); + found = update_dom_status(child, id, status, version, updated, for_revert); if (!found) continue; @@ -1947,14 +1947,14 @@ update_dom_status(struct pool_domain *domain, uint32_t id, uint32_t status, uint /* Only change to DOWNOUT/DOWN if all of children are DOWNOUT/DOWN */ if (child_status_check(child, PO_COMP_ST_DOWN | PO_COMP_ST_DOWNOUT) && (child->do_comp.co_status != status)) { - D_DEBUG(DB_MD, "rank %u id %u status %u --> %u\n", + D_DEBUG(DB_MD, "rank %u id %u status %u --> %u, for_revert %d", child->do_comp.co_rank, child->do_comp.co_id, - child->do_comp.co_status, status); + child->do_comp.co_status, status, for_revert); if (child->do_comp.co_status == PO_COMP_ST_DOWN) child->do_comp.co_flags = PO_COMPF_DOWN2OUT; child->do_comp.co_status = status; - if (status == PO_COMP_ST_DOWN) + if (status == PO_COMP_ST_DOWN && !for_revert) child->do_comp.co_fseq = version; *updated = true; } @@ -1975,12 +1975,12 @@ update_dom_status(struct pool_domain *domain, uint32_t id, uint32_t status, uint int update_dom_status_by_tgt_id(struct pool_map *map, uint32_t tgt_id, uint32_t status, - uint32_t version, bool *updated) + uint32_t version, bool *updated, bool for_revert) { int rc; D_ASSERT(map->po_tree != NULL); - rc = update_dom_status(map->po_tree, tgt_id, status, version, updated); + rc = update_dom_status(map->po_tree, tgt_id, status, version, updated, for_revert); if (rc < 0) return rc; return 0; diff --git a/src/include/daos/pool_map.h b/src/include/daos/pool_map.h index 8d82791d235..c3c094e3ea3 100644 --- a/src/include/daos/pool_map.h +++ b/src/include/daos/pool_map.h @@ -1,6 +1,6 @@ /** * (C) Copyright 2016-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -309,7 +309,7 @@ int pool_map_find_failed_tgts_by_rank(struct pool_map *map, unsigned int *tgt_cnt, d_rank_t rank); int update_dom_status_by_tgt_id(struct pool_map *map, uint32_t tgt_id, uint32_t status, - uint32_t version, bool *updated); + uint32_t version, bool *updated, bool for_revert); bool pool_map_node_status_match(struct pool_domain *dom, unsigned int status); diff --git a/src/pool/srv_pool_map.c b/src/pool/srv_pool_map.c index 32f0710102c..7361c146e8c 100644 --- a/src/pool/srv_pool_map.c +++ b/src/pool/srv_pool_map.c @@ -1,7 +1,7 @@ /** * (C) Copyright 2021-2024 Intel Corporation. * (C) Copyright 2025 Google LLC - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent * @@ -318,12 +318,12 @@ update_one_dom(struct pool_map *map, struct pool_domain *dom, struct pool_target if (dom->do_comp.co_status == PO_COMP_ST_DOWNOUT || dom->do_comp.co_status == PO_COMP_ST_DOWN) update_dom_status_by_tgt_id(map, tgt->ta_comp.co_id, PO_COMP_ST_UP, - *version, &updated); + *version, &updated, false); break; case MAP_EXTEND: if (dom->do_comp.co_status == PO_COMP_ST_NEW) update_dom_status_by_tgt_id(map, tgt->ta_comp.co_id, PO_COMP_ST_UP, - *version, &updated); + *version, &updated, false); break; case MAP_EXCLUDE: /* Only change the dom status if it is from SWIM eviction */ @@ -331,27 +331,29 @@ update_one_dom(struct pool_map *map, struct pool_domain *dom, struct pool_target !(dom->do_comp.co_status & (PO_COMP_ST_DOWN | PO_COMP_ST_DOWNOUT)) && pool_map_node_status_match(dom, PO_COMP_ST_DOWN | PO_COMP_ST_DOWNOUT)) update_dom_status_by_tgt_id(map, tgt->ta_comp.co_id, PO_COMP_ST_DOWN, - *version, &updated); + *version, &updated, false); break; case MAP_FINISH_REBUILD: if (dom->do_comp.co_status == PO_COMP_ST_UP) update_dom_status_by_tgt_id(map, tgt->ta_comp.co_id, PO_COMP_ST_UPIN, - *version, &updated); + *version, &updated, false); else if (dom->do_comp.co_status == PO_COMP_ST_DOWN && exclude_rank) update_dom_status_by_tgt_id(map, tgt->ta_comp.co_id, PO_COMP_ST_DOWNOUT, - *version, &updated); + *version, &updated, false); break; case MAP_REVERT_REBUILD: if (dom->do_comp.co_status == PO_COMP_ST_UP) { if (dom->do_comp.co_fseq == 1) update_dom_status_by_tgt_id(map, tgt->ta_comp.co_id, PO_COMP_ST_NEW, - *version, &updated); + *version, &updated, true); else if (dom->do_comp.co_flags == PO_COMPF_DOWN2UP) update_dom_status_by_tgt_id(map, tgt->ta_comp.co_id, - PO_COMP_ST_DOWN, *version, &updated); + PO_COMP_ST_DOWN, *version, &updated, + true); else update_dom_status_by_tgt_id(map, tgt->ta_comp.co_id, - PO_COMP_ST_DOWNOUT, *version, &updated); + PO_COMP_ST_DOWNOUT, *version, &updated, + true); } break; default: diff --git a/src/rebuild/srv.c b/src/rebuild/srv.c index 24598ab68cf..9bfee11bd7b 100644 --- a/src/rebuild/srv.c +++ b/src/rebuild/srv.c @@ -289,8 +289,8 @@ rebuild_leader_set_status(struct rebuild_global_pool_tracker *rgt, } if (status->dtx_resync_version != resync_ver) - D_INFO(DF_RB " rank %d, update dtx_resync_version from %d to %d", DP_RB_RGT(rgt), - rank, status->dtx_resync_version, resync_ver); + D_DEBUG(DB_REBUILD, DF_RB " rank %d, update dtx_resync_version from %d to %d", + DP_RB_RGT(rgt), rank, status->dtx_resync_version, resync_ver); status->dtx_resync_version = resync_ver; if (flags & SCAN_DONE) status->scan_done = 1; @@ -2906,8 +2906,9 @@ rebuild_tgt_status_check_ult(void *arg) rpt->rt_reported_rec_cnt = status.rec_count; rpt->rt_reported_size = status.size; if (iv.riv_dtx_resyc_version > reported_dtx_resyc_ver) { - D_INFO(DF_RB "reported riv_dtx_resyc_version %d", - DP_RB_RPT(rpt), iv.riv_dtx_resyc_version); + D_DEBUG(DB_REBUILD, + DF_RB "reported riv_dtx_resyc_version %d", + DP_RB_RPT(rpt), iv.riv_dtx_resyc_version); reported_dtx_resyc_ver = iv.riv_dtx_resyc_version; } } else { From 291b503bfe02f99a897b4e2d37d85373f287c1c3 Mon Sep 17 00:00:00 2001 From: Johann Lombardi Date: Thu, 5 Feb 2026 09:46:33 +0100 Subject: [PATCH 179/253] DAOS-17946 pool,cont: change default properties (#17032) Change several default pool and container properties to better aligned with what is used in production. This includes: - crc32 checksum and server-side checksum validation on on containers by default - space reservation (space_rb) for rebuild is now set to 5% by default - RF3 is the default on containers and pools - the default EC cell size has been bumped from 64k to 128k Fail at pool creation time if an explicit incompatible rd_fac value is provided by the user. Otherwise, adjust rd_fac to min(rf3, #domains) on the fly. Signed-off-by: Johann Lombardi Co-authored-by: Phil Henderson --- docs/admin/pool_operations.md | 10 +-- docs/user/container.md | 19 ++--- src/common/tests_dmg_helpers.c | 30 +++++++- src/container/srv_layout.c | 6 +- src/include/daos/object.h | 2 +- src/include/daos_prop.h | 6 +- src/pool/srv_layout.c | 2 +- src/pool/srv_pool.c | 74 +++++++++++-------- src/tests/ftest/aggregation/space_rb.py | 7 +- src/tests/ftest/aggregation/space_rb.yaml | 2 +- .../ftest/deployment/basic_checkout.yaml | 2 +- src/tests/ftest/deployment/ior_per_rank.yaml | 2 +- src/tests/ftest/erasurecode/cell_size.py | 4 +- src/tests/ftest/erasurecode/space_usage.yaml | 2 +- src/tests/ftest/nvme/pool_capacity.py | 9 +-- src/tests/ftest/nvme/pool_capacity.yaml | 18 ++++- src/tests/ftest/osa/offline_drain.yaml | 2 +- src/tests/ftest/osa/offline_extend.yaml | 2 +- .../ftest/osa/offline_reintegration.yaml | 2 +- src/tests/ftest/osa/online_drain.yaml | 2 +- src/tests/ftest/osa/online_extend.yaml | 2 +- src/tests/ftest/osa/online_reintegration.yaml | 2 +- src/tests/ftest/performance/ior_easy.yaml | 2 +- src/tests/ftest/performance/ior_hard.yaml | 2 +- src/tests/ftest/performance/mdtest_easy.yaml | 2 +- src/tests/ftest/performance/mdtest_hard.yaml | 2 +- src/tests/ftest/pool/pda.yaml | 2 +- .../ftest/rebuild/cascading_failures.yaml | 2 +- .../ftest/rebuild/container_create_race.yaml | 2 +- src/tests/ftest/scrubber/basic.py | 33 +++++---- src/tests/ftest/scrubber/basic.yaml | 4 +- .../scrubber/check_csum_metrics_mdtest.yaml | 2 +- src/tests/ftest/scrubber/csum_fault.yaml | 2 +- src/tests/ftest/scrubber/frequency.yaml | 2 +- src/tests/ftest/scrubber/rebuild.yaml | 2 +- src/tests/ftest/scrubber/snapshot.yaml | 2 +- .../ftest/scrubber/target_auto_eviction.yaml | 2 +- src/tests/ftest/server/replay.py | 4 +- src/tests/ftest/telemetry/wal_metrics.py | 7 +- src/tests/ftest/util/test_utils_container.py | 4 +- src/tests/ftest/util/test_utils_pool.py | 8 +- src/tests/suite/daos_test_common.c | 64 ++++++++++++++++ utils/node_local_test.py | 7 +- 43 files changed, 244 insertions(+), 120 deletions(-) diff --git a/docs/admin/pool_operations.md b/docs/admin/pool_operations.md index dc5c3bc12e4..1879f060502 100644 --- a/docs/admin/pool_operations.md +++ b/docs/admin/pool_operations.md @@ -783,7 +783,7 @@ $ dmg pool get-prop pool1 Checksum scrubbing frequency (scrub_freq) not set Checksum scrubbing threshold (scrub_thresh) not set Self-healing policy (self_heal) exclude - Rebuild space ratio (space_rb) 0% + Rebuild space ratio (space_rb) 5% Pool service replica list (svc_list) [0] Pool service redundancy factor (svc_rf) not set Upgrade Status (upgrade_status) not started @@ -812,7 +812,7 @@ $ dmg pool get-prop pool1 Checksum scrubbing frequency (scrub_freq) 604800 Checksum scrubbing threshold (scrub_thresh) 0 Self-healing policy (self_heal) exclude - Rebuild space ratio (space_rb) 0% + Rebuild space ratio (space_rb) 5% Pool service replica list (svc_list) [0] Pool service redundancy factor (svc_rf) 2 Upgrade Status (upgrade_status) in progress @@ -876,7 +876,7 @@ $ dmg pool get-prop tank Checksum scrubbing frequency (scrub_freq) 604800 Checksum scrubbing threshold (scrub_thresh) 0 Self-healing policy (self_heal) exclude,rebuild - Rebuild space ratio (space_rb) 0% + Rebuild space ratio (space_rb) 5% Pool service replica list (svc_list) [0] Pool service redundancy factor (svc_rf) 2 Upgrade Status (upgrade_status) not started @@ -918,7 +918,7 @@ $ dmg pool get-prop tank2 Checksum scrubbing frequency (scrub_freq) 604800 Checksum scrubbing threshold (scrub_thresh) 0 Self-healing policy (self_heal) exclude,rebuild - Rebuild space ratio (space_rb) 0% + Rebuild space ratio (space_rb) 5% Pool service replica list (svc_list) [0] Pool service redundancy factor (svc_rf) 2 Upgrade Status (upgrade_status) not started @@ -968,7 +968,7 @@ Two options are supported: "exclude" (default strategy) and "rebuild". This property defines the percentage of total space reserved on each storage engine for self-healing purpose. The reserved space cannot be consumed by -applications. Valid values are 0% to 100%, the default is 0%. +applications. Valid values are 0% to 100%, the default is 5%. When setting this property, specifying the percentage symbol is optional: `space_rb:2%` and `space_rb:2` both specify two percent of storage capacity. diff --git a/docs/user/container.md b/docs/user/container.md index a6c851506e4..b1538b58537 100644 --- a/docs/user/container.md +++ b/docs/user/container.md @@ -435,17 +435,13 @@ during container create. - cksum (`DAOS_PROP_CO_CSUM`): the type of checksum algorithm to use. Supported values are adler32, crc[16|32|64] or sha[1|256|512]. By default, - checksum is disabled for new containers. + checksum is enabled for new containers using crc32. - cksum\_size (`DAOS_PROP_CO_CSUM_CHUNK_SIZE`): defines the chunk size used for creating checksums of array types. (default is 32K). -- srv\_cksum (`DAOS_PROP_CO_CSUM_SERVER_VERIFY`): Because of the probable decrease to - IOPS, in most cases, it is not desired to verify checksums on an object - update on the server side. It is sufficient for the client to verify on - a fetch because any data corruption, whether on the object update, - storage, or fetch, will be caught. However, there is an advantage to - knowing if corruption happens on an update. The update would fail - right away, indicating to the client to retry the RPC or report an - error to upper levels. +- srv\_cksum (`DAOS_PROP_CO_CSUM_SERVER_VERIFY`): verify the checksum on an + object update on the server side. This is enabled by default. Verifying checksums + on update allows to pro-actively detect corruption over the wire and retry the RPC + from the client, but has an impact on IOPS. For instance, to create a new container with crc64 checksum enabled and checksum verification on the server side, one can use the following command @@ -471,9 +467,8 @@ The DAOS erasure code implementation uses a fixed cell size that applies to all objects in the container. The cell size in DAOS is the size of a single data and parity fragment. By default, a container's `ec_cell_sz` property is inherited from the pool's -default `ec_cell_sz`, which was 1MiB in DAOS 2.0 and has been reduced to -64kiB in DAOS 2.2. The container cell size can also be set at -container creation time via the `--property` option: +default `ec_cell_sz`, which is 128kiB. The container cell size can also be set +at container creation time via the `--property` option: ```bash $ daos cont create tank mycont5 --type POSIX --properties rd_fac:1,cell_size:131072 diff --git a/src/common/tests_dmg_helpers.c b/src/common/tests_dmg_helpers.c index 08825366133..851119e8f88 100644 --- a/src/common/tests_dmg_helpers.c +++ b/src/common/tests_dmg_helpers.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2020-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -827,6 +827,34 @@ dmg_pool_create(const char *dmg_config_file, } } + /* Temporarily use old pool property defaults due to DAOS-17946 */ + /* Set default rd_fac:0 if --properties=rd_fac is not already defined in args */ + bool has_rd_fac = false; + for (int i = 0; i < argcount; i++) { + if (args[i] && strstr(args[i], "--properties=rd_fac") != NULL) { + has_rd_fac = true; + break; + } + } + if (!has_rd_fac) { + args = cmd_push_arg(args, &argcount, "--properties=rd_fac:0 "); + if (args == NULL) + D_GOTO(out, rc = -DER_NOMEM); + } + /* Set default space_rb:0 if --properties=space_rb is not already defined in args */ + bool has_space_rb = false; + for (int i = 0; i < argcount; i++) { + if (args[i] && strstr(args[i], "--properties=space_rb") != NULL) { + has_space_rb = true; + break; + } + } + if (!has_space_rb) { + args = cmd_push_arg(args, &argcount, "--properties=space_rb:0 "); + if (args == NULL) + D_GOTO(out, rc = -DER_NOMEM); + } + if (!has_label) { char path[] = "/tmp/test_XXXXXX"; int tmp_fd; diff --git a/src/container/srv_layout.c b/src/container/srv_layout.c index ae1b16b3ae2..56929f6a6d0 100644 --- a/src/container/srv_layout.c +++ b/src/container/srv_layout.c @@ -73,16 +73,16 @@ struct daos_prop_entry cont_prop_entries_default_v0[CONT_PROP_NUM_V0] = { .dpe_val = 1, }, { .dpe_type = DAOS_PROP_CO_CSUM, - .dpe_val = DAOS_PROP_CO_CSUM_OFF, + .dpe_val = DAOS_PROP_CO_CSUM_CRC32, }, { .dpe_type = DAOS_PROP_CO_CSUM_CHUNK_SIZE, .dpe_val = 32 * 1024, /** 32K */ }, { .dpe_type = DAOS_PROP_CO_CSUM_SERVER_VERIFY, - .dpe_val = DAOS_PROP_CO_CSUM_SV_OFF, + .dpe_val = DAOS_PROP_CO_CSUM_SV_ON, }, { .dpe_type = DAOS_PROP_CO_REDUN_FAC, - .dpe_val = DAOS_PROP_CO_REDUN_RF0, + .dpe_val = DAOS_PROP_CO_REDUN_RF3, }, { .dpe_type = DAOS_PROP_CO_REDUN_LVL, .dpe_val = DAOS_PROP_CO_REDUN_RANK, diff --git a/src/include/daos/object.h b/src/include/daos/object.h index d77f9f9c343..ffad53056c1 100644 --- a/src/include/daos/object.h +++ b/src/include/daos/object.h @@ -79,7 +79,7 @@ enum { /* smallest cell size */ DAOS_EC_CELL_MIN = (4 << 10), /* default cell size */ - DAOS_EC_CELL_DEF = (64 << 10), + DAOS_EC_CELL_DEF = (128 << 10), /* largest cell size */ DAOS_EC_CELL_MAX = (1024 << 10), }; diff --git a/src/include/daos_prop.h b/src/include/daos_prop.h index 467a94b64fd..d41c59bc50b 100644 --- a/src/include/daos_prop.h +++ b/src/include/daos_prop.h @@ -149,7 +149,7 @@ enum daos_pool_props { #define DAOS_PROP_PO_EC_CELL_SZ_MAX (1UL << 30) #define DAOS_PROP_PO_REDUN_FAC_MAX 4 -#define DAOS_PROP_PO_REDUN_FAC_DEFAULT 0 +#define DAOS_PROP_PO_REDUN_FAC_DEFAULT 3 static inline bool daos_rf_is_valid(unsigned long long rf) @@ -291,7 +291,7 @@ enum daos_cont_props { DAOS_PROP_CO_LAYOUT_VER, /** * Checksum on/off + checksum type (CRC16, CRC32, SHA-1 & SHA-2). - * default = DAOS_PROP_CO_CSUM_OFF + * default = DAOS_PROP_CO_CSUM_CRC32 */ DAOS_PROP_CO_CSUM, /** @@ -301,7 +301,7 @@ enum daos_cont_props { DAOS_PROP_CO_CSUM_CHUNK_SIZE, /** * Checksum verification on server. Value = ON/OFF - * default = DAOS_PROP_CO_CSUM_SV_OFF + * default = DAOS_PROP_CO_CSUM_SV_ON */ DAOS_PROP_CO_CSUM_SERVER_VERIFY, /** diff --git a/src/pool/srv_layout.c b/src/pool/srv_layout.c index 12db4f7bd35..fefb56d5720 100644 --- a/src/pool/srv_layout.c +++ b/src/pool/srv_layout.c @@ -66,7 +66,7 @@ struct daos_prop_entry pool_prop_entries_default[DAOS_PROP_PO_NUM] = { }, { .dpe_type = DAOS_PROP_PO_SPACE_RB, - .dpe_val = 0, + .dpe_val = 5, }, { .dpe_type = DAOS_PROP_PO_SELF_HEAL, diff --git a/src/pool/srv_pool.c b/src/pool/srv_pool.c index 2a9311e81f4..b7ada57c251 100644 --- a/src/pool/srv_pool.c +++ b/src/pool/srv_pool.c @@ -763,7 +763,7 @@ pool_prop_write(struct rdb_tx *tx, const rdb_path_t *kvs, daos_prop_t *prop) static int init_pool_metadata(struct rdb_tx *tx, const rdb_path_t *kvs, uint32_t nnodes, const char *group, - const d_rank_list_t *ranks, daos_prop_t *prop, uint32_t ndomains, + const d_rank_list_t *ranks, daos_prop_t *prop_orig, uint32_t ndomains, const uint32_t *domains) { struct pool_buf *map_buf; @@ -780,24 +780,59 @@ init_pool_metadata(struct rdb_tx *tx, const rdb_path_t *kvs, uint32_t nnodes, co uint32_t svc_ops_max; uint32_t svc_ops_num; uint64_t rdb_size; + daos_prop_t *prop = NULL; int rc; struct daos_prop_entry *entry; uuid_t uuid; + /* duplicate the default properties, overwrite it with pool create + * parameter and then write to pool meta data. + */ + prop = daos_prop_dup(&pool_prop_default, true /* pool */, false /* input */); + if (prop == NULL) { + D_ERROR("daos_prop_dup failed.\n"); + D_GOTO(out, rc = -DER_NOMEM); + } + + if (DAOS_FAIL_CHECK(DAOS_FAIL_POOL_CREATE_VERSION)) { + uint64_t fail_val = daos_fail_value_get(); + + entry = daos_prop_entry_get(prop, DAOS_PROP_PO_OBJ_VERSION); + D_ASSERT(entry != NULL); + entry->dpe_val = (uint32_t)fail_val; + } + + rc = pool_prop_default_copy(prop, prop_orig); + if (rc) { + DL_ERROR(rc, "daos_prop_default_copy() failed"); + D_GOTO(out_prop, rc); + } + rc = gen_pool_buf(NULL /* map */, &map_buf, map_version, ndomains, nnodes, ntargets, domains, dss_tgt_nr); if (rc != 0) { D_ERROR("failed to generate pool buf, "DF_RC"\n", DP_RC(rc)); - goto out; + goto out_prop; } - entry = daos_prop_entry_get(prop, DAOS_PROP_PO_REDUN_FAC); + entry = daos_prop_entry_get(prop_orig, DAOS_PROP_PO_REDUN_FAC); if (entry) { + /** if the user provided an explicit incompatible rd_fac, then fail gracefully */ if (entry->dpe_val + 1 > map_buf->pb_domain_nr) { - D_ERROR("ndomains(%u) could not meet redunc factor(%lu)\n", + D_ERROR("ndomains(%u) could not meet specified redunc factor(%lu)\n", map_buf->pb_domain_nr, entry->dpe_val); D_GOTO(out_map_buf, rc = -DER_INVAL); } + } else { + /** if the default rd_fac cannot be satisfied, adjust it on the fly */ + entry = daos_prop_entry_get(prop, DAOS_PROP_PO_REDUN_FAC); + if (entry) { + if (entry->dpe_val + 1 > map_buf->pb_domain_nr) { + D_DEBUG(DB_MD, "ndomains(%u) could not meet default redunc factor(%lu)\n", + map_buf->pb_domain_nr, entry->dpe_val); + entry->dpe_val = (uint64_t) map_buf->pb_domain_nr - 1; + } + } } /* Initialize the pool map properties. */ @@ -930,6 +965,8 @@ init_pool_metadata(struct rdb_tx *tx, const rdb_path_t *kvs, uint32_t nnodes, co out_map_buf: pool_buf_free(map_buf); +out_prop: + daos_prop_free(prop); out: return rc; } @@ -3970,7 +4007,6 @@ ds_pool_create_handler(crt_rpc_t *rpc) struct rdb_tx tx; d_iov_t value; struct rdb_kvs_attr attr; - daos_prop_t *prop_dup = NULL; daos_prop_t *prop = NULL; d_rank_list_t *tgt_ranks = NULL; uint32_t ndomains; @@ -4048,38 +4084,13 @@ ds_pool_create_handler(crt_rpc_t *rpc) D_GOTO(out_tx, rc); } - /* duplicate the default properties, overwrite it with pool create - * parameter and then write to pool meta data. - */ - prop_dup = daos_prop_dup(&pool_prop_default, true /* pool */, - false /* input */); - if (prop_dup == NULL) { - D_ERROR("daos_prop_dup failed.\n"); - D_GOTO(out_tx, rc = -DER_NOMEM); - } - - if (DAOS_FAIL_CHECK(DAOS_FAIL_POOL_CREATE_VERSION)) { - uint64_t fail_val = daos_fail_value_get(); - struct daos_prop_entry *entry; - - entry = daos_prop_entry_get(prop_dup, DAOS_PROP_PO_OBJ_VERSION); - D_ASSERT(entry != NULL); - entry->dpe_val = (uint32_t)fail_val; - } - - rc = pool_prop_default_copy(prop_dup, prop); - if (rc) { - DL_ERROR(rc, "daos_prop_default_copy() failed"); - D_GOTO(out_tx, rc); - } - /* Initialize the DB and the metadata for this pool. */ attr.dsa_class = RDB_KVS_GENERIC; attr.dsa_order = 8; rc = rdb_tx_create_root(&tx, &attr); if (rc != 0) D_GOTO(out_tx, rc); - rc = init_pool_metadata(&tx, &svc->ps_root, ntgts, NULL /* group */, tgt_ranks, prop_dup, + rc = init_pool_metadata(&tx, &svc->ps_root, ntgts, NULL /* group */, tgt_ranks, prop, ndomains, domains); if (rc != 0) D_GOTO(out_tx, rc); @@ -4092,7 +4103,6 @@ ds_pool_create_handler(crt_rpc_t *rpc) D_GOTO(out_tx, rc); out_tx: - daos_prop_free(prop_dup); ds_cont_unlock_metadata(svc->ps_cont_svc); ABT_rwlock_unlock(svc->ps_lock); rdb_tx_end(&tx); diff --git a/src/tests/ftest/aggregation/space_rb.py b/src/tests/ftest/aggregation/space_rb.py index 3f4b426da85..717631e28e2 100644 --- a/src/tests/ftest/aggregation/space_rb.py +++ b/src/tests/ftest/aggregation/space_rb.py @@ -1,5 +1,6 @@ """ (C) Copyright 2024 Intel Corporation. + (C) Copyright 2026 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -96,7 +97,11 @@ def test_space_rb(self): # 2. Call dmg pool get-prop and verify that Rebuild space ratio (space_rb) is 50%. self.log_step( "Call dmg pool get-prop and verify that Rebuild space ratio (space_rb) is 50%.") - expected_space_rb = int(self.params.get("properties", '/run/pool/*').split(":")[1]) + expected_space_rb = 0 + for pool_property in self.params.get("properties", "/run/pool/*", "").split(","): + if pool_property.startswith("space_rb:"): + expected_space_rb = int(pool_property.split(":")[1]) + break self.verify_space_rb_property(pool=pool_1, expected_space_rb=expected_space_rb) # 3. Run IOR to fill 50% of SCM. diff --git a/src/tests/ftest/aggregation/space_rb.yaml b/src/tests/ftest/aggregation/space_rb.yaml index dc3ae51c592..c5d71faaf03 100644 --- a/src/tests/ftest/aggregation/space_rb.yaml +++ b/src/tests/ftest/aggregation/space_rb.yaml @@ -16,7 +16,7 @@ server_config: pool: size: 80G - properties: space_rb:50 + properties: space_rb:50,rd_fac:0 container: type: POSIX diff --git a/src/tests/ftest/deployment/basic_checkout.yaml b/src/tests/ftest/deployment/basic_checkout.yaml index 7fc7b5a63c2..c68564a8dd9 100644 --- a/src/tests/ftest/deployment/basic_checkout.yaml +++ b/src/tests/ftest/deployment/basic_checkout.yaml @@ -21,7 +21,7 @@ server_config: pool: size: 50% - properties: ec_cell_sz:128KiB + properties: rd_fac:0,space_rb:0,ec_cell_sz:128KiB container: type: POSIX diff --git a/src/tests/ftest/deployment/ior_per_rank.yaml b/src/tests/ftest/deployment/ior_per_rank.yaml index 03918af29d0..7c890651b0a 100644 --- a/src/tests/ftest/deployment/ior_per_rank.yaml +++ b/src/tests/ftest/deployment/ior_per_rank.yaml @@ -22,7 +22,7 @@ server_config: pool: mode: 146 size: 350G # Cannot use percentage, as it does not work when using pool create for per rank. - properties: ec_cell_sz:128KiB + properties: rd_fac:0,space_rb:0,ec_cell_sz:128KiB container: type: POSIX diff --git a/src/tests/ftest/erasurecode/cell_size.py b/src/tests/ftest/erasurecode/cell_size.py index d9e470f25cb..1e688345bfd 100644 --- a/src/tests/ftest/erasurecode/cell_size.py +++ b/src/tests/ftest/erasurecode/cell_size.py @@ -1,6 +1,6 @@ ''' (C) Copyright 2020-2023 Intel Corporation. - (C) Copyright 2025 Hewlett Packard Enterprise Development LP + (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent ''' @@ -36,7 +36,7 @@ def test_ec_cell_size(self): transfersize_blocksize = self.params.get("transfersize_blocksize", '/run/ior/*') for cell_size in pool_cell_sizes: - self.pool = self.get_pool(properties=f"ec_cell_sz:{cell_size}") + self.pool = self.get_pool(properties=f"rd_fac:0,space_rb:0,ec_cell_sz:{cell_size}") for dfs_oclass in dfs_oclass_list: self.ior_cmd.dfs_oclass.update(dfs_oclass) for transfer_size, block_size in transfersize_blocksize: diff --git a/src/tests/ftest/erasurecode/space_usage.yaml b/src/tests/ftest/erasurecode/space_usage.yaml index e0ff34387eb..f20803d261e 100644 --- a/src/tests/ftest/erasurecode/space_usage.yaml +++ b/src/tests/ftest/erasurecode/space_usage.yaml @@ -23,7 +23,7 @@ server_config: pool: size: 95% - properties: ec_cell_sz:128KiB,reclaim:disabled + properties: rd_fac:0,space_rb:0,ec_cell_sz:128KiB,reclaim:disabled container: type: POSIX diff --git a/src/tests/ftest/nvme/pool_capacity.py b/src/tests/ftest/nvme/pool_capacity.py index dea4d5bedca..c66f40927f5 100644 --- a/src/tests/ftest/nvme/pool_capacity.py +++ b/src/tests/ftest/nvme/pool_capacity.py @@ -1,5 +1,6 @@ """ (C) Copyright 2020-2024 Intel Corporation. + (C) Copyright 2026 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -93,10 +94,7 @@ def run_test_create_delete(self, num_pool=2, num_cont=5, total_count=100): self.log.info("Running test %s", loop_count) offset = loop_count * num_pool for val in range(offset, offset + num_pool): - self.pool.append( - self.get_pool( - namespace="/run/pool_qty_{}/*".format(num_pool), - properties="reclaim:disabled")) + self.pool.append(self.get_pool(namespace=f"/run/pool_qty_{num_pool}/*")) display_string = "pool{} space at the Beginning".format(val) self.pool[-1].display_pool_daos_space(display_string) @@ -151,8 +149,7 @@ def run_test(self, num_pool=1): # Create the IOR threads threads = [] for val in range(0, num_pool): - self.pool.append(self.get_pool(namespace="/run/pool_qty_{}/*".format(num_pool), - properties="reclaim:disabled")) + self.pool.append(self.get_pool(namespace=f"/run/pool_qty_{num_pool}/*")) display_string = "pool{} space at the Beginning".format(val) self.pool[-1].display_pool_daos_space(display_string) diff --git a/src/tests/ftest/nvme/pool_capacity.yaml b/src/tests/ftest/nvme/pool_capacity.yaml index 135bb746207..cfe5c31704f 100644 --- a/src/tests/ftest/nvme/pool_capacity.yaml +++ b/src/tests/ftest/nvme/pool_capacity.yaml @@ -1,7 +1,9 @@ hosts: test_servers: 2 test_clients: 2 + timeout: 1800 + server_config: name: daos_server engines_per_host: 1 @@ -10,20 +12,30 @@ server_config: nr_xs_helpers: 1 log_file: daos_server0.log storage: auto -pool: - mode: 146 - name: daos_server + +pool_common: &pool_common + properties: rd_fac:0,space_rb:0,reclaim:disabled + pool_qty_1: size: 50% + <<: *pool_common + pool_qty_2: size: 25% + <<: *pool_common + pool_qty_3: size: 16% + <<: *pool_common + pool_qty_10: size: 5% + <<: *pool_common + container: type: POSIX control_method: daos + ior: no_parallel_job: 10 clientslots: diff --git a/src/tests/ftest/osa/offline_drain.yaml b/src/tests/ftest/osa/offline_drain.yaml index 79500c0abac..61c9aae2849 100644 --- a/src/tests/ftest/osa/offline_drain.yaml +++ b/src/tests/ftest/osa/offline_drain.yaml @@ -35,7 +35,7 @@ pool: nvme_size: 108000000000 svcn: 4 rebuild_timeout: 240 - properties: scrub:timed + properties: rd_fac:0,space_rb:0,scrub:timed container: type: POSIX diff --git a/src/tests/ftest/osa/offline_extend.yaml b/src/tests/ftest/osa/offline_extend.yaml index 90b78c7a962..40eb0467f41 100644 --- a/src/tests/ftest/osa/offline_extend.yaml +++ b/src/tests/ftest/osa/offline_extend.yaml @@ -37,7 +37,7 @@ pool: scm_size: 6000000000 nvme_size: 54000000000 svcn: 2 - properties: scrub:lazy + properties: rd_fac:0,space_rb:0,scrub:lazy container: type: POSIX diff --git a/src/tests/ftest/osa/offline_reintegration.yaml b/src/tests/ftest/osa/offline_reintegration.yaml index ae8874d37d8..d4e70262606 100644 --- a/src/tests/ftest/osa/offline_reintegration.yaml +++ b/src/tests/ftest/osa/offline_reintegration.yaml @@ -49,7 +49,7 @@ pool: svcn: 4 rebuild_timeout: 240 pool_query_timeout: 30 - properties: scrub:timed,scrub_freq:1 + properties: rd_fac:0,space_rb:0,scrub:timed,scrub_freq:1 container: type: POSIX diff --git a/src/tests/ftest/osa/online_drain.yaml b/src/tests/ftest/osa/online_drain.yaml index 9738816b799..9dbcfbbec44 100644 --- a/src/tests/ftest/osa/online_drain.yaml +++ b/src/tests/ftest/osa/online_drain.yaml @@ -38,7 +38,7 @@ pool: nvme_size: 108G rebuild_timeout: 120 pool_query_timeout: 30 - properties: scrub:timed,scrub_freq:1 + properties: rd_fac:0,space_rb:0,scrub:timed,scrub_freq:1 container: type: POSIX diff --git a/src/tests/ftest/osa/online_extend.yaml b/src/tests/ftest/osa/online_extend.yaml index 1522c357db9..144c6441371 100644 --- a/src/tests/ftest/osa/online_extend.yaml +++ b/src/tests/ftest/osa/online_extend.yaml @@ -46,7 +46,7 @@ pool: svcn: 4 rebuild_timeout: 120 pool_query_timeout: 30 - properties: scrub:lazy + properties: rd_fac:0,space_rb:0,scrub:lazy container: type: POSIX diff --git a/src/tests/ftest/osa/online_reintegration.yaml b/src/tests/ftest/osa/online_reintegration.yaml index b5e6f7f7309..531e4c16e20 100644 --- a/src/tests/ftest/osa/online_reintegration.yaml +++ b/src/tests/ftest/osa/online_reintegration.yaml @@ -39,7 +39,7 @@ pool: svcn: 4 rebuild_timeout: 120 pool_query_timeout: 30 - properties: scrub:timed + properties: rd_fac:0,space_rb:0,scrub:timed container: type: POSIX diff --git a/src/tests/ftest/performance/ior_easy.yaml b/src/tests/ftest/performance/ior_easy.yaml index 0b515361fa2..139f199f53e 100644 --- a/src/tests/ftest/performance/ior_easy.yaml +++ b/src/tests/ftest/performance/ior_easy.yaml @@ -24,7 +24,7 @@ server_config: pool: size: 95% - properties: ec_cell_sz:1MiB + properties: rd_fac:0,space_rb:0,ec_cell_sz:1MiB container: type: POSIX diff --git a/src/tests/ftest/performance/ior_hard.yaml b/src/tests/ftest/performance/ior_hard.yaml index fb09a7514a5..deeb807bded 100644 --- a/src/tests/ftest/performance/ior_hard.yaml +++ b/src/tests/ftest/performance/ior_hard.yaml @@ -24,7 +24,7 @@ server_config: pool: size: 95% - properties: ec_cell_sz:128KiB + properties: rd_fac:0,space_rb:0,ec_cell_sz:128KiB container: type: POSIX diff --git a/src/tests/ftest/performance/mdtest_easy.yaml b/src/tests/ftest/performance/mdtest_easy.yaml index 02070bbcbaa..01444ec8519 100644 --- a/src/tests/ftest/performance/mdtest_easy.yaml +++ b/src/tests/ftest/performance/mdtest_easy.yaml @@ -24,7 +24,7 @@ server_config: pool: size: 95% - properties: ec_cell_sz:1MiB + properties: rd_fac:0,space_rb:0,ec_cell_sz:1MiB container: type: POSIX diff --git a/src/tests/ftest/performance/mdtest_hard.yaml b/src/tests/ftest/performance/mdtest_hard.yaml index 949cfb93111..f5c49502f58 100644 --- a/src/tests/ftest/performance/mdtest_hard.yaml +++ b/src/tests/ftest/performance/mdtest_hard.yaml @@ -24,7 +24,7 @@ server_config: pool: size: 95% - properties: ec_cell_sz:1MiB + properties: rd_fac:0,space_rb:0,ec_cell_sz:1MiB container: type: POSIX diff --git a/src/tests/ftest/pool/pda.yaml b/src/tests/ftest/pool/pda.yaml index 9fdec66ca4f..3b98ced5bfc 100644 --- a/src/tests/ftest/pool/pda.yaml +++ b/src/tests/ftest/pool/pda.yaml @@ -18,7 +18,7 @@ pool: scm_size: 1G pool_1: scm_size: 1G - properties: ec_pda:2,rp_pda:4 + properties: rd_fac:0,space_rb:0,ec_pda:2,rp_pda:4 container: type: POSIX control_method: daos diff --git a/src/tests/ftest/rebuild/cascading_failures.yaml b/src/tests/ftest/rebuild/cascading_failures.yaml index abc15a45f29..294332d854b 100644 --- a/src/tests/ftest/rebuild/cascading_failures.yaml +++ b/src/tests/ftest/rebuild/cascading_failures.yaml @@ -20,7 +20,7 @@ server_config: pool: size: 1G pool_query_timeout: 30 - properties: rd_fac:2 + properties: rd_fac:2,space_rb:0 container: akey_size: 5 dkey_size: 5 diff --git a/src/tests/ftest/rebuild/container_create_race.yaml b/src/tests/ftest/rebuild/container_create_race.yaml index 45887acdc7c..892af3461e0 100644 --- a/src/tests/ftest/rebuild/container_create_race.yaml +++ b/src/tests/ftest/rebuild/container_create_race.yaml @@ -28,7 +28,7 @@ testparams: pool: scm_size: 8G pool_query_timeout: 15 - properties: rd_fac:1 + properties: rd_fac:1,space_rb:0 container: type: POSIX diff --git a/src/tests/ftest/scrubber/basic.py b/src/tests/ftest/scrubber/basic.py index 13ec352c360..fe6445cea60 100644 --- a/src/tests/ftest/scrubber/basic.py +++ b/src/tests/ftest/scrubber/basic.py @@ -1,5 +1,6 @@ """ (C) Copyright 2018-2023 Intel Corporation. + (C) Copyright 2026 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -13,14 +14,8 @@ class TestWithScrubberBasic(TestWithScrubber): :avocado: recursive """ - def run_scrubber_basic(self, pool_prop=None, cont_prop=None): - """JIRA ID: DAOS-7371 - Scrubber basic main method which runs the basic testing. - - Args: - pool_prop(str) : Test pool properties string. - cont_prop(str) : Test container properties string - """ + def run_scrubber_basic(self): + """Runs the basic scrubber testing.""" flags = self.params.get("ior_flags", '/run/ior/iorflags/*') apis = self.params.get("ior_api", '/run/ior/iorflags/*') transfer_block_size = self.params.get("transfer_block_size", @@ -30,7 +25,6 @@ def run_scrubber_basic(self, pool_prop=None, cont_prop=None): self.ior_cmd.flags.update(flags[0], "ior.flags") self.ior_cmd.dfs_oclass.update(obj_class[0]) self.ior_cmd.dfs_dir_oclass.update(obj_class[0]) - self.create_pool_cont_with_scrubber(pool_prop=pool_prop, cont_prop=cont_prop) for test in transfer_block_size: self.ior_cmd.transfer_size.update(test[0]) self.ior_cmd.block_size.update(test[1]) @@ -57,7 +51,16 @@ def test_scrubber_disabled_during_pool_creation(self): :avocado: tags=TestWithScrubberBasic,test_scrubber_disabled_during_pool_creation """ - self.run_scrubber_basic(None, None) + other_properties = self.params.get("other_properties", '/run/pool/*') + + self.add_pool() + for prop_val in other_properties.split(","): + if prop_val is not None: + value = prop_val.split(":") + self.pool.set_property(value[0], value[1]) + self.add_container(pool=self.pool) + + self.run_scrubber_basic() def test_scrubber_enabled_during_pool_creation(self): """JIRA ID: DAOS-7371 @@ -74,6 +77,10 @@ def test_scrubber_enabled_during_pool_creation(self): :avocado: tags=TestWithScrubberBasic,test_scrubber_enabled_during_pool_creation """ - pool_prop = self.params.get("properties", '/run/pool/*') - cont_prop = self.params.get("properties", '/run/container/*') - self.run_scrubber_basic(pool_prop, cont_prop) + pool_properties = self.params.get("properties", '/run/pool/*') + other_properties = self.params.get("other_properties", '/run/pool/*') + + self.add_pool(properties=f"{pool_properties},{other_properties}") + self.add_container(pool=self.pool) + + self.run_scrubber_basic() diff --git a/src/tests/ftest/scrubber/basic.yaml b/src/tests/ftest/scrubber/basic.yaml index e8ad2cdc111..60a98eb3142 100644 --- a/src/tests/ftest/scrubber/basic.yaml +++ b/src/tests/ftest/scrubber/basic.yaml @@ -44,12 +44,14 @@ pool: svcn: 4 rebuild_timeout: 120 pool_query_timeout: 30 - properties: "scrub:timed,scrub_freq:1" + properties: rd_fac:0,space_rb:0 + other_properties: scrub:timed,scrub_freq:1 container: type: POSIX control_method: daos oclass: RP_2G1 + properties: cksum:crc16 ior: ior_timeout: 60 diff --git a/src/tests/ftest/scrubber/check_csum_metrics_mdtest.yaml b/src/tests/ftest/scrubber/check_csum_metrics_mdtest.yaml index 962a5fb73e9..befb63d1942 100644 --- a/src/tests/ftest/scrubber/check_csum_metrics_mdtest.yaml +++ b/src/tests/ftest/scrubber/check_csum_metrics_mdtest.yaml @@ -30,7 +30,7 @@ pool: size: 50% svcn: 4 pool_query_timeout: 30 - properties: "scrub:timed,scrub_freq:1" + properties: "rd_fac:0,space_rb:0,scrub:timed,scrub_freq:1" container: type: POSIX diff --git a/src/tests/ftest/scrubber/csum_fault.yaml b/src/tests/ftest/scrubber/csum_fault.yaml index 2ba60342792..407d8b7b414 100644 --- a/src/tests/ftest/scrubber/csum_fault.yaml +++ b/src/tests/ftest/scrubber/csum_fault.yaml @@ -46,7 +46,7 @@ pool: svcn: 4 rebuild_timeout: 120 pool_query_timeout: 30 - properties: "scrub:timed,scrub_freq:1" + properties: "rd_fac:0,space_rb:0,scrub:timed,scrub_freq:1" container: type: POSIX diff --git a/src/tests/ftest/scrubber/frequency.yaml b/src/tests/ftest/scrubber/frequency.yaml index f5b92ba96c9..befbe818106 100644 --- a/src/tests/ftest/scrubber/frequency.yaml +++ b/src/tests/ftest/scrubber/frequency.yaml @@ -34,7 +34,7 @@ pool: svcn: 4 rebuild_timeout: 120 pool_query_timeout: 30 - properties: "scrub:timed" + properties: rd_fac:0,space_rb:0,scrub:timed container: type: POSIX diff --git a/src/tests/ftest/scrubber/rebuild.yaml b/src/tests/ftest/scrubber/rebuild.yaml index 6b28ad4521e..41775934891 100644 --- a/src/tests/ftest/scrubber/rebuild.yaml +++ b/src/tests/ftest/scrubber/rebuild.yaml @@ -36,7 +36,7 @@ pool: svcn: 4 rebuild_timeout: 120 pool_query_timeout: 30 - properties: "scrub:timed,scrub_freq:1,scrub_thresh:2" + properties: "rd_fac:0,space_rb:0,scrub:timed,scrub_freq:1,scrub_thresh:2" container: type: POSIX diff --git a/src/tests/ftest/scrubber/snapshot.yaml b/src/tests/ftest/scrubber/snapshot.yaml index 6c8791bcbfa..156feef76d1 100644 --- a/src/tests/ftest/scrubber/snapshot.yaml +++ b/src/tests/ftest/scrubber/snapshot.yaml @@ -36,7 +36,7 @@ pool: svcn: 4 rebuild_timeout: 120 pool_query_timeout: 30 - properties: "scrub:timed,scrub_freq:2" + properties: "rd_fac:0,space_rb:0,scrub:timed,scrub_freq:2" container: type: POSIX diff --git a/src/tests/ftest/scrubber/target_auto_eviction.yaml b/src/tests/ftest/scrubber/target_auto_eviction.yaml index 60f21945562..9ab153651f6 100644 --- a/src/tests/ftest/scrubber/target_auto_eviction.yaml +++ b/src/tests/ftest/scrubber/target_auto_eviction.yaml @@ -46,7 +46,7 @@ pool: svcn: 4 rebuild_timeout: 120 pool_query_timeout: 30 - properties: "scrub:timed,scrub_freq:1,scrub_thresh:2" + properties: rd_fac:0,space_rb:0,scrub:timed,scrub_freq:1,scrub_thresh:2 container: type: POSIX diff --git a/src/tests/ftest/server/replay.py b/src/tests/ftest/server/replay.py index 28ed9ea7486..fb4fd23685f 100644 --- a/src/tests/ftest/server/replay.py +++ b/src/tests/ftest/server/replay.py @@ -1,6 +1,6 @@ """ (C) Copyright 2023 Intel Corporation. - (C) Copyright 2025 Hewlett Packard Enterprise Development LP + (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -359,7 +359,7 @@ def test_replay_check_pointing(self): """ frequency = 5 container = self.create_container( - properties=f'checkpoint:timed,checkpoint_freq:{frequency}') + properties=f'rd_fac:0,space_rb:0,checkpoint:timed,checkpoint_freq:{frequency}') self.log.info('%s check point frequency: %s seconds', container.pool, frequency) self.log_step('Write data to the container (ior)') diff --git a/src/tests/ftest/telemetry/wal_metrics.py b/src/tests/ftest/telemetry/wal_metrics.py index 19bd605704a..748ef30cc56 100644 --- a/src/tests/ftest/telemetry/wal_metrics.py +++ b/src/tests/ftest/telemetry/wal_metrics.py @@ -1,6 +1,6 @@ """ (C) Copyright 2018-2024 Intel Corporation. - (C) Copyright 2025 Hewlett Packard Enterprise Development LP + (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -143,7 +143,7 @@ def test_wal_checkpoint_metrics(self): wal_metrics = list(self.telemetry.ENGINE_POOL_CHECKPOINT_METRICS) self.log_step('Creating a pool with check pointing disabled (dmg pool create)') - add_pool(self, properties='checkpoint:disabled') + add_pool(self, properties='rd_fac:0,space_rb:0,checkpoint:disabled') self.log_step( 'Collect WAL checkpoint metrics after creating a pool w/o check pointing ' @@ -160,7 +160,8 @@ def test_wal_checkpoint_metrics(self): self.fail('WAL check point metrics not zero after creating a pool w/o check pointing') self.log_step('Creating a pool with timed check pointing (dmg pool create)') - pool = add_pool(self, properties=f'checkpoint:timed,checkpoint_freq:{frequency}') + pool = add_pool( + self, properties=f'rd_fac:0,space_rb:0,checkpoint:timed,checkpoint_freq:{frequency}') self.log_step( 'Collect WAL checkpoint metrics after creating a pool w/ check pointing ' diff --git a/src/tests/ftest/util/test_utils_container.py b/src/tests/ftest/util/test_utils_container.py index 5fff6c88bd1..a8be58785a3 100644 --- a/src/tests/ftest/util/test_utils_container.py +++ b/src/tests/ftest/util/test_utils_container.py @@ -1,6 +1,6 @@ """ (C) Copyright 2018-2024 Intel Corporation. - (C) Copyright 2025 Hewlett Packard Enterprise Development LP + (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -367,7 +367,7 @@ def __init__(self, pool, daos_command, label_generator=None, namespace=CONT_NAME self.dir_oclass = BasicParameter(None) self.file_oclass = BasicParameter(None) self.chunk_size = BasicParameter(None) - self.properties = BasicParameter(None) + self.properties = BasicParameter(None, "cksum:off,srv_cksum:off") self.acl_file = BasicParameter(None) self.daos_timeout = BasicParameter(None) self.label = BasicParameter(None, "TestContainer") diff --git a/src/tests/ftest/util/test_utils_pool.py b/src/tests/ftest/util/test_utils_pool.py index 8013791f506..591c4fe59ae 100644 --- a/src/tests/ftest/util/test_utils_pool.py +++ b/src/tests/ftest/util/test_utils_pool.py @@ -272,7 +272,7 @@ def __init__(self, context, dmg_command, label_generator=None, namespace=POOL_NA self.gid = os.getegid() self.mode = BasicParameter(None) - self.name = BasicParameter(None) # server group name + self.name = BasicParameter(None) # server group name self.svcn = BasicParameter(None) self.target_list = BasicParameter(None) self.nranks = BasicParameter(None) @@ -281,9 +281,9 @@ def __init__(self, context, dmg_command, label_generator=None, namespace=POOL_NA self.mem_ratio = BasicParameter(None) self.scm_size = BasicParameter(None) self.nvme_size = BasicParameter(None) - self.prop_name = BasicParameter(None) # name of property to be set - self.prop_value = BasicParameter(None) # value of property - self.properties = BasicParameter(None) # string of cs name:value + self.prop_name = BasicParameter(None) # name of property to be set + self.prop_value = BasicParameter(None) # value of property + self.properties = BasicParameter(None, "rd_fac:0,space_rb:0") # string of cs name:value self.rebuild_timeout = BasicParameter(None) self.pool_query_timeout = BasicParameter(None) self.pool_query_delay = BasicParameter(None) diff --git a/src/tests/suite/daos_test_common.c b/src/tests/suite/daos_test_common.c index a68a564b8b3..437a3e13ab0 100644 --- a/src/tests/suite/daos_test_common.c +++ b/src/tests/suite/daos_test_common.c @@ -244,6 +244,70 @@ test_setup_cont_create(void **state, daos_prop_t *co_prop) } } + /* Temporarily use old container property defaults due to DAOS-17946 */ + /* Set DAOS_PROP_CO_CSUM to off if not already defined */ + if (daos_prop_entry_get(co_prop, DAOS_PROP_CO_CSUM) == NULL) { + daos_prop_t *csum_prop = daos_prop_alloc(1); + if (csum_prop == NULL) { + D_ERROR("failed to allocate csum prop\n"); + daos_prop_free(redun_lvl_prop); + daos_prop_free(merged_props); + return -DER_NOMEM; + } + csum_prop->dpp_entries[0].dpe_type = DAOS_PROP_CO_CSUM; + csum_prop->dpp_entries[0].dpe_val = DAOS_PROP_CO_CSUM_OFF; + + daos_prop_t *new_merged_props = daos_prop_merge(co_prop, csum_prop); + if (new_merged_props == NULL) { + D_ERROR("failed to merge co_prop and csum_prop\n"); + daos_prop_free(redun_lvl_prop); + daos_prop_free(merged_props); + daos_prop_free(csum_prop); + return -DER_NOMEM; + } + + /* Update co_prop to point to the newly merged properties */ + if (merged_props) { + daos_prop_free(merged_props); + merged_props = new_merged_props; + } else { + merged_props = new_merged_props; + } + co_prop = merged_props; + daos_prop_free(csum_prop); + } + /* Set DAOS_PROP_CO_CSUM_SERVER_VERIFY to off if not already defined */ + if (daos_prop_entry_get(co_prop, DAOS_PROP_CO_CSUM_SERVER_VERIFY) == NULL) { + daos_prop_t *csum_sv_prop = daos_prop_alloc(1); + if (csum_sv_prop == NULL) { + D_ERROR("failed to allocate csum_sv_prop\n"); + daos_prop_free(redun_lvl_prop); + daos_prop_free(merged_props); + return -DER_NOMEM; + } + csum_sv_prop->dpp_entries[0].dpe_type = DAOS_PROP_CO_CSUM_SERVER_VERIFY; + csum_sv_prop->dpp_entries[0].dpe_val = DAOS_PROP_CO_CSUM_SV_OFF; + + daos_prop_t *new_merged_props = daos_prop_merge(co_prop, csum_sv_prop); + if (new_merged_props == NULL) { + D_ERROR("failed to merge co_prop and csum_sv_prop\n"); + daos_prop_free(redun_lvl_prop); + daos_prop_free(merged_props); + daos_prop_free(csum_sv_prop); + return -DER_NOMEM; + } + + /* Update co_prop to point to the newly merged properties */ + if (merged_props) { + daos_prop_free(merged_props); + merged_props = new_merged_props; + } else { + merged_props = new_merged_props; + } + co_prop = merged_props; + daos_prop_free(csum_sv_prop); + } + D_ASSERT(co_prop != NULL); if (daos_prop_entry_get(co_prop, DAOS_PROP_CO_LABEL) == NULL) { char cont_label[32]; diff --git a/utils/node_local_test.py b/utils/node_local_test.py index 894bbdc3761..18337560726 100755 --- a/utils/node_local_test.py +++ b/utils/node_local_test.py @@ -1015,7 +1015,8 @@ def _make_pool(self): else: size = 1024 * 4 - rc = self.run_dmg(['pool', 'create', 'NLT', '--scm-size', f'{size}M']) + rc = self.run_dmg(['pool', 'create', 'NLT', '--scm-size', f'{size}M', '--properties', + 'rd_fac:0,space_rb:0']) print(rc) assert rc.returncode == 0 self.fetch_pools() @@ -1839,6 +1840,8 @@ def create_cont(conf, pool=None, ctype=None, label=None, path=None, oclass=None, if attrs: cmd.extend(['--attrs', ','.join([f"{name}:{val}" for name, val in attrs.items()])]) + cmd.extend(['--properties', 'cksum:off,srv_cksum:off,rd_fac:0']) + def _create_cont(): """Helper function for create_cont""" rc = run_daos_cmd(conf, cmd, use_json=True, log_check=log_check, valgrind=valgrind, @@ -6209,7 +6212,7 @@ def get_cmd(cont_id): 'create', pool.id(), '--properties', - f'srv_cksum:on,label:{cont_id}'] + f'srv_cksum:on,label:{cont_id},rd_fac:0'] test_cmd = AllocFailTest(conf, 'cont-create', get_cmd) test_cmd.wf = wf From 895120ae79e71a419f8a06d8e0d7065bd3e9fa6e Mon Sep 17 00:00:00 2001 From: Tom Nabarro Date: Thu, 5 Feb 2026 15:10:14 +0000 Subject: [PATCH 180/253] DAOS-18565 control: Allow update to SPDK config via override flag (#17496) Signed-off-by: Tom Nabarro --- src/control/server/instance_storage.go | 4 ++-- src/control/server/storage/config.go | 7 ++++--- src/control/server/storage/provider.go | 11 ++++++++++- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/src/control/server/instance_storage.go b/src/control/server/instance_storage.go index 1d6f3cd6cca..ab8f8adcb98 100644 --- a/src/control/server/instance_storage.go +++ b/src/control/server/instance_storage.go @@ -1,6 +1,6 @@ // // (C) Copyright 2020-2024 Intel Corporation. -// (C) Copyright 2025 Hewlett Packard Enterprise Development LP +// (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP // (C) Copyright 2025 Google LLC // // SPDX-License-Identifier: BSD-2-Clause-Patent @@ -150,7 +150,7 @@ func (ei *EngineInstance) awaitStorageReady(ctx context.Context) error { if !needsSuperblock { ei.log.Debugf("%s: superblock not needed", msgIdx) - if ei.storage.HasBlockDevices() { + if ei.storage.HasBlockDevices() && !ei.storage.AllowSpdkConfOverride() { ei.log.Debugf("%s: checking bdev config", msgIdx) ctrlrs, err := getEngineBdevCtrlrs(ctx, ei) diff --git a/src/control/server/storage/config.go b/src/control/server/storage/config.go index d21aa9db15a..5e2cf986e2d 100644 --- a/src/control/server/storage/config.go +++ b/src/control/server/storage/config.go @@ -1,6 +1,6 @@ // // (C) Copyright 2019-2024 Intel Corporation. -// (C) Copyright 2025 Hewlett Packard Enterprise Development LP +// (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -60,8 +60,9 @@ const ( // ControlMetadata describes configuration options for control plane metadata storage on the // DAOS server. type ControlMetadata struct { - Path string `yaml:"path,omitempty"` - DevicePath string `yaml:"device,omitempty"` + Path string `yaml:"path,omitempty"` + DevicePath string `yaml:"device,omitempty"` + AllowSpdkConfOverride bool `yaml:"allow_spdk_conf_override"` } // Directory returns the full path to the directory where the control plane metadata is saved. diff --git a/src/control/server/storage/provider.go b/src/control/server/storage/provider.go index 625b95c9849..f8c3601b845 100644 --- a/src/control/server/storage/provider.go +++ b/src/control/server/storage/provider.go @@ -1,6 +1,6 @@ // // (C) Copyright 2021-2024 Intel Corporation. -// (C) Copyright 2025 Hewlett Packard Enterprise Development LP +// (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP // (C) Copyright 2025 Google LLC // // SPDX-License-Identifier: BSD-2-Clause-Patent @@ -202,6 +202,15 @@ func (p *Provider) ControlMetadataIsMounted() (bool, error) { return p.Sys.IsMounted(p.engineStorage.ControlMetadata.Path) } +// AllowSpdkConfOverride returns true if override of SPDK JSON config file (daos_nvme.conf) has been +// explicitly enabled within the ControlMetadata section of the server config file. +func (p *Provider) AllowSpdkConfOverride() bool { + if !p.engineStorage.ControlMetadata.HasPath() { + return false + } + return p.engineStorage.ControlMetadata.AllowSpdkConfOverride +} + // PrepareScm calls into storage SCM provider to attempt to configure PMem devices to be usable by // DAOS. func (p *Provider) PrepareScm(req ScmPrepareRequest) (*ScmPrepareResponse, error) { From bb2329c8386da8e868a58ee3d437799b7a4323c2 Mon Sep 17 00:00:00 2001 From: Kris Jacque Date: Thu, 5 Feb 2026 11:32:30 -0700 Subject: [PATCH 181/253] DAOS-17693 control: Correctly copy mkfs options (#17498) Options were not being copied properly into the mkfs argument slice. This PR fixes the issue and improves unit testing. Signed-off-by: Kris Jacque --- src/control/provider/system/system_linux.go | 24 +++- .../provider/system/system_linux_test.go | 131 ++++++++++++++---- .../server/storage/metadata/provider.go | 9 +- .../server/storage/metadata/provider_test.go | 42 +++--- 4 files changed, 152 insertions(+), 54 deletions(-) diff --git a/src/control/provider/system/system_linux.go b/src/control/provider/system/system_linux.go index e27066e4215..846e2f4ca50 100644 --- a/src/control/provider/system/system_linux.go +++ b/src/control/provider/system/system_linux.go @@ -1,6 +1,6 @@ // // (C) Copyright 2019-2024 Intel Corporation. -// (C) Copyright 2025 Hewlett Packard Enterprise Development LP +// (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -56,12 +56,18 @@ var magicToStr = map[int64]string{ // DefaultProvider returns the package-default provider implementation. func DefaultProvider() *LinuxProvider { - return &LinuxProvider{} + return &LinuxProvider{ + runCommand: func(name string, args ...string) ([]byte, error) { + return exec.Command(name, args...).Output() + }, + } } // LinuxProvider encapsulates Linux-specific implementations of system // interfaces. -type LinuxProvider struct{} +type LinuxProvider struct { + runCommand func(string, ...string) ([]byte, error) +} // mountId,parentId,major:minor,root,mountPoint const ( @@ -254,6 +260,10 @@ type MkfsReq struct { // Mkfs attempts to create a filesystem of the supplied type, on the // supplied device. func (s LinuxProvider) Mkfs(req MkfsReq) error { + if req.Filesystem == "" { + return errors.New("no filesystem type specified") + } + cmdPath, err := exec.LookPath(fmt.Sprintf("mkfs.%s", req.Filesystem)) if err != nil { return errors.Wrapf(err, "unable to find mkfs.%s", req.Filesystem) @@ -263,7 +273,7 @@ func (s LinuxProvider) Mkfs(req MkfsReq) error { return err } - args := make([]string, 0, len(req.Options)) + args := make([]string, len(req.Options)) _ = copy(args, req.Options) // TODO: Think about a way to allow for some kind of progress // callback so that the user has some visibility into long-running @@ -274,7 +284,7 @@ func (s LinuxProvider) Mkfs(req MkfsReq) error { if req.Force { args = append([]string{"-F"}, args...) } - out, err := exec.Command(cmdPath, args...).Output() + out, err := s.runCommand(cmdPath, args...) if err != nil { return &RunCmdError{ Wrapped: err, @@ -301,7 +311,7 @@ func (s LinuxProvider) GetDeviceLabel(device string) (string, error) { } args := []string{"-o", "label", "--noheadings", device} - out, err := exec.Command(cmdPath, args...).Output() + out, err := s.runCommand(cmdPath, args...) if err != nil { return "", &RunCmdError{ Wrapped: err, @@ -325,7 +335,7 @@ func (s LinuxProvider) Getfs(device string) (string, error) { } args := []string{"-s", device} - out, err := exec.Command(cmdPath, args...).Output() + out, err := s.runCommand(cmdPath, args...) if err != nil { return FsTypeNone, &RunCmdError{ Wrapped: err, diff --git a/src/control/provider/system/system_linux_test.go b/src/control/provider/system/system_linux_test.go index e658b6d5bcf..a8ec337b6ac 100644 --- a/src/control/provider/system/system_linux_test.go +++ b/src/control/provider/system/system_linux_test.go @@ -1,6 +1,6 @@ // // (C) Copyright 2019-2024 Intel Corporation. -// (C) Copyright 2025 Hewlett Packard Enterprise Development LP +// (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -10,6 +10,7 @@ package system import ( "errors" "os" + "path/filepath" "regexp" "strings" "syscall" @@ -202,39 +203,39 @@ func TestSystemLinux_GetfsType(t *testing.T) { } } -func TestSystemLinux_GetDeviceLabel(t *testing.T) { - validDev := func(t *testing.T) string { - t.Helper() +func validDev(t *testing.T) string { + t.Helper() - // Only want numbered partitions, not whole disks - re := regexp.MustCompile(`^[a-zA-Z]+[0-9]+$`) + // Only want numbered partitions, not whole disks + re := regexp.MustCompile(`^[a-zA-Z]+[0-9]+$`) - sysRoot := "/sys/class/block/" - entries, err := os.ReadDir(sysRoot) - if err != nil { - t.Fatalf("unable to read %q: %v", sysRoot, err) - } - - for _, entry := range entries { - if !re.MatchString(entry.Name()) { - continue - } + sysRoot := "/sys/class/block/" + entries, err := os.ReadDir(sysRoot) + if err != nil { + t.Fatalf("unable to read %q: %v", sysRoot, err) + } - devPath := "/dev/" + entry.Name() - info, err := os.Stat(devPath) - if err != nil { - continue - } - if (info.Mode()&os.ModeDevice) != 0 && (info.Mode()&os.ModeCharDevice) == 0 { - t.Logf("using block device %q for test", devPath) - return devPath - } + for _, entry := range entries { + if !re.MatchString(entry.Name()) { + continue } - t.Fatal("no valid block device found for test") - return "" + devPath := "/dev/" + entry.Name() + info, err := os.Stat(devPath) + if err != nil { + continue + } + if (info.Mode()&os.ModeDevice) != 0 && (info.Mode()&os.ModeCharDevice) == 0 { + t.Logf("using block device %q for test", devPath) + return devPath + } } + t.Fatal("no valid block device found for test") + return "" +} + +func TestSystemLinux_GetDeviceLabel(t *testing.T) { for name, tc := range map[string]struct { path string expErr error @@ -312,3 +313,79 @@ func TestSystemLinux_fsStrFromMagic(t *testing.T) { }) } } + +func TestSystemLinux_Mkfs(t *testing.T) { + for name, tc := range map[string]struct { + req MkfsReq + expErr error + expCmdName string + expCmdArgs []string + }{ + "empty": { + req: MkfsReq{}, + expErr: errors.New("no filesystem"), + }, + "bad filesystem": { + req: MkfsReq{ + Filesystem: "moo", + }, + expErr: errors.New("unable to find mkfs.moo"), + }, + "bad device": { + req: MkfsReq{ + Filesystem: "ext4", + Device: "/notreal", + }, + expErr: syscall.ENOENT, + }, + "success": { + req: MkfsReq{ + Filesystem: "ext4", + Device: validDev(t), // real device, but actual mkfs command is mocked + }, + expCmdName: "mkfs.ext4", + expCmdArgs: []string{validDev(t)}, + }, + "force": { + req: MkfsReq{ + Filesystem: "ext4", + Device: validDev(t), + Force: true, + }, + expCmdName: "mkfs.ext4", + expCmdArgs: []string{"-F", validDev(t)}, + }, + "options": { + req: MkfsReq{ + Filesystem: "ext4", + Device: validDev(t), + Options: []string{"-L", "my_device"}, + }, + expCmdName: "mkfs.ext4", + expCmdArgs: []string{"-L", "my_device", validDev(t)}, + }, + } { + t.Run(name, func(t *testing.T) { + p := DefaultProvider() + + var seenName string + var seenArgs []string + p.runCommand = func(name string, args ...string) ([]byte, error) { + seenName = name + seenArgs = args + return []byte{}, nil + } + + err := p.Mkfs(tc.req) + + test.CmpErr(t, tc.expErr, err) + + if seenName != "" { + // don't care where the binary was found, just that it was + seenName = filepath.Base(seenName) + } + test.AssertEqual(t, tc.expCmdName, seenName, "mkfs command name") + test.AssertEqual(t, tc.expCmdArgs, seenArgs, "mkfs args") + }) + } +} diff --git a/src/control/server/storage/metadata/provider.go b/src/control/server/storage/metadata/provider.go index bf7c86a0b6d..07a25157817 100644 --- a/src/control/server/storage/metadata/provider.go +++ b/src/control/server/storage/metadata/provider.go @@ -1,6 +1,6 @@ // // (C) Copyright 2022-2024 Intel Corporation. -// (C) Copyright 2025 Hewlett Packard Enterprise Development LP +// (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -107,13 +107,16 @@ func (p *Provider) setupMountPoint(req storage.MetadataFormatRequest) error { return errors.Wrap(err, "checking existing device label") } - var opts []string + opts := []string{ + // Quiet mode + "-q", + } if label != "" { p.log.Debugf("preserving existing device label %q for %q", label, req.Device) opts = append(opts, "-L", label) } - p.log.Debugf("formatting device %q", req.Device) + p.log.Debugf("mkfs.%s %q with options: %s", defaultDevFS, req.Device, strings.Join(opts, " ")) if err := p.sys.Mkfs(system.MkfsReq{ Filesystem: defaultDevFS, Device: req.Device, diff --git a/src/control/server/storage/metadata/provider_test.go b/src/control/server/storage/metadata/provider_test.go index f9c6f05c5b2..2215187f1d6 100644 --- a/src/control/server/storage/metadata/provider_test.go +++ b/src/control/server/storage/metadata/provider_test.go @@ -1,6 +1,6 @@ // // (C) Copyright 2022-2024 Intel Corporation. -// (C) Copyright 2025 Hewlett Packard Enterprise Development LP +// (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -110,7 +110,8 @@ func TestMetadata_Provider_Format(t *testing.T) { sysCfg: &system.MockSysConfig{ GetfsTypeErr: []error{errors.New("mock GetfsType")}, }, - expMkfs: true, + expMkfsOpts: []string{"-q"}, + expMkfs: true, }, "GetfsType retries with parent if dir doesn't exist": { req: pathReq, @@ -145,16 +146,18 @@ func TestMetadata_Provider_Format(t *testing.T) { sysCfg: &system.MockSysConfig{ MkfsErr: errors.New("mock mkfs"), }, - expErr: errors.New("mock mkfs"), - expMkfs: true, + expErr: errors.New("mock mkfs"), + expMkfsOpts: []string{"-q"}, + expMkfs: true, }, "Mount fails": { req: deviceReq, mountCfg: &storage.MockMountProviderConfig{ MountErr: errors.New("mock Mount"), }, - expErr: errors.New("mock Mount"), - expMkfs: true, + expErr: errors.New("mock Mount"), + expMkfsOpts: []string{"-q"}, + expMkfs: true, }, "remove old data dir fails": { req: deviceReq, @@ -172,8 +175,9 @@ func TestMetadata_Provider_Format(t *testing.T) { } } }, - expErr: errors.New("removing old control metadata subdirectory"), - expMkfs: true, + expErr: errors.New("removing old control metadata subdirectory"), + expMkfsOpts: []string{"-q"}, + expMkfs: true, }, "create data dir fails": { req: deviceReq, @@ -191,16 +195,18 @@ func TestMetadata_Provider_Format(t *testing.T) { } } }, - expErr: errors.New("creating control metadata subdirectory"), - expMkfs: true, + expErr: errors.New("creating control metadata subdirectory"), + expMkfsOpts: []string{"-q"}, + expMkfs: true, }, "chown data dir fails": { req: deviceReq, sysCfg: &system.MockSysConfig{ ChownErr: errors.New("mock chown"), }, - expErr: errors.New("mock chown"), - expMkfs: true, + expErr: errors.New("mock chown"), + expMkfsOpts: []string{"-q"}, + expMkfs: true, }, "Unmount fails": { req: deviceReq, @@ -208,19 +214,21 @@ func TestMetadata_Provider_Format(t *testing.T) { IsMountedRes: true, UnmountErr: errors.New("mock Unmount"), }, - expErr: errors.New("mock Unmount"), - expMkfs: true, + expErr: errors.New("mock Unmount"), + expMkfsOpts: []string{"-q"}, + expMkfs: true, }, "device success": { - req: deviceReq, - expMkfs: true, + req: deviceReq, + expMkfsOpts: []string{"-q"}, + expMkfs: true, }, "preserve existing label": { req: deviceReq, sysCfg: &system.MockSysConfig{ GetDeviceLabelRes: "old_label", }, - expMkfsOpts: []string{"-L", "old_label"}, + expMkfsOpts: []string{"-q", "-L", "old_label"}, expMkfs: true, }, "path only doesn't attempt device format": { From df7854a580eaf81417164237828c6d2e4740e5e0 Mon Sep 17 00:00:00 2001 From: Kris Jacque Date: Thu, 5 Feb 2026 14:52:12 -0700 Subject: [PATCH 182/253] DAOS-18494 control: Don't use default sys in daos pool list (#17384) Removed a case where we were still substituting the default system name in the daos pool list command. Signed-off-by: Kris Jacque --- src/control/lib/daos/api/pool.go | 9 ++++----- src/control/lib/daos/api/pool_test.go | 4 ++-- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/control/lib/daos/api/pool.go b/src/control/lib/daos/api/pool.go index 4644ed243ba..666592f4b4c 100644 --- a/src/control/lib/daos/api/pool.go +++ b/src/control/lib/daos/api/pool.go @@ -13,7 +13,6 @@ import ( "github.com/google/uuid" "github.com/pkg/errors" - "github.com/daos-stack/daos/src/control/build" "github.com/daos-stack/daos/src/control/lib/daos" "github.com/daos-stack/daos/src/control/lib/ranklist" "github.com/daos-stack/daos/src/control/logging" @@ -670,11 +669,11 @@ func GetPoolList(ctx context.Context, req GetPoolListReq) ([]*daos.PoolInfo, err log := logging.FromContext(ctx) log.Debugf("GetPoolList(%+v)", req) - if req.SysName == "" { - req.SysName = build.DefaultSystemName + var cSysName *C.char + if req.SysName != "" { + cSysName = C.CString(req.SysName) + defer freeString(cSysName) } - cSysName := C.CString(req.SysName) - defer freeString(cSysName) var cPools []C.daos_mgmt_pool_info_t for { diff --git a/src/control/lib/daos/api/pool_test.go b/src/control/lib/daos/api/pool_test.go index cc1c7e59f81..19cb4935cb1 100644 --- a/src/control/lib/daos/api/pool_test.go +++ b/src/control/lib/daos/api/pool_test.go @@ -955,11 +955,11 @@ func TestAPI_GetPoolList(t *testing.T) { ctx: test.Context(t), expPools: defaultPoolInfoResp, }, - "default system name supplied": { + "empty system name supplied": { ctx: test.Context(t), req: GetPoolListReq{}, checkParams: func(t *testing.T) { - test.CmpAny(t, "sysName", build.DefaultSystemName, daos_mgmt_list_pools_SetSys) + test.CmpAny(t, "sysName", "", daos_mgmt_list_pools_SetSys) }, expPools: defaultPoolInfoResp, }, From f7f5c2d51d6fb8a776a37d74d1f7729b157b0e2e Mon Sep 17 00:00:00 2001 From: Liu Xuezhao Date: Fri, 6 Feb 2026 19:55:25 +0800 Subject: [PATCH 183/253] DAOS-18470 rebuild: re-schedule rebuild task after stopped (#17492) * DAOS-18470 rebuild: re-schedule rebuild task after stopped To fix some rebuild stop sequence problem for example - 1. kill rank 5, trigger rebuild task A 2. dmg pool rebuild stop 3. kill rank 6 trigger rebuild task B After 3's rebuild done, rank 5 status keeps as DOWN, but rank 6 is DOWNOUT. The problem is due to in rebuild task B it actually rebuilt both rank 5 and rank 6, but when it completes it only sets rank 5's status to DOWNOUT because rebuild B's task->dst_tgts only with rank 6. This patch take a temporarily simple method that - After the rebuild stopped, reschedule original rebuild task with delay -1, so following's rebuild can be merge with that to a new rebuild with merged task->dst_tgts (which include both rank 5 and 6 in that case). In future can consider to refine rebuild task management further. Signed-off-by: Xuezhao Liu --- src/rebuild/srv.c | 46 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 42 insertions(+), 4 deletions(-) diff --git a/src/rebuild/srv.c b/src/rebuild/srv.c index 9bfee11bd7b..87b27f03776 100644 --- a/src/rebuild/srv.c +++ b/src/rebuild/srv.c @@ -1605,6 +1605,12 @@ rebuild_try_merge_tgts(struct ds_pool *pool, uint32_t map_ver, if (delay_sec != (uint64_t)(-1)) merge_task->dst_schedule_time = daos_gettime_coarse() + delay_sec; } + /* For the case of new rebuild task in queue, and then rebuild stop's fail reclaim + * complete and re-scheduled the original rebuild task with delay -1. + */ + if (merge_pre_task->dst_schedule_time != (uint64_t)(-1) && + delay_sec == (uint64_t)(-1)) + merge_task = merge_pre_task; } else if (merge_post_task != NULL && merge_post_task->dst_rebuild_op == rebuild_op) { if ((merge_post_task->dst_schedule_time == (uint64_t)(-1) && delay_sec == (uint64_t)(-1)) || @@ -1854,6 +1860,25 @@ rebuild_task_complete_schedule(struct rebuild_task *task, struct ds_pool *pool, * fails, it will be used to discard all of the previous rebuild data * (reclaim - 1 see obj_reclaim()), but keep the in-flight I/O data. */ + if (rgt->rgt_stop_admin) { + rc = ds_rebuild_schedule( + pool, task->dst_reclaim_ver - 1 /* map_ver */, + rgt->rgt_stable_epoch, task->dst_new_layout_version, + &task->dst_tgts, RB_OP_FAIL_RECLAIM, + task->dst_rebuild_op /* retry_rebuild_op */, + task->dst_map_ver /* retry_map_ver */, rgt->rgt_stop_admin, + task, delay_sec); + DL_CDEBUG(rc, DLOG_ERR, DLOG_INFO, rc, + DF_RB ": errno " DF_RC ", schedule %u(%s)", + DP_RB_RGT(rgt), DP_RC(rgt->rgt_status.rs_errno), + RB_OP_FAIL_RECLAIM, RB_OP_STR(RB_OP_FAIL_RECLAIM)); + D_GOTO(complete, rc); + } + + /* revert pool map and defer scheduling a retry until Fail_reclaim is done + */ + retry_rebuild_task(task, rgt, &retry_opc); + rc = ds_rebuild_schedule( pool, task->dst_reclaim_ver - 1 /* map_ver */, rgt->rgt_stable_epoch, task->dst_new_layout_version, &task->dst_tgts, RB_OP_FAIL_RECLAIM, @@ -1863,10 +1888,6 @@ rebuild_task_complete_schedule(struct rebuild_task *task, struct ds_pool *pool, DF_RB ": errno " DF_RC ", schedule %u(%s)", DP_RB_RGT(rgt), DP_RC(rgt->rgt_status.rs_errno), RB_OP_FAIL_RECLAIM, RB_OP_STR(RB_OP_FAIL_RECLAIM)); - - /* revert pool map and defer scheduling a retry until Fail_reclaim is done - */ - retry_rebuild_task(task, rgt, &retry_opc); D_GOTO(complete, rc); } @@ -1926,6 +1947,23 @@ rebuild_task_complete_schedule(struct rebuild_task *task, struct ds_pool *pool, DL_CDEBUG(rc1, DLOG_ERR, DLOG_INFO, rc1, DF_RB ": updated, state %d errno " DF_RC, DP_RB_RGT(rgt), rgt->rgt_status.rs_state, DP_RC(rgt->rgt_status.rs_errno)); + + /* re-schedule the stopped original rebuild task with delay -1, to be merged with + * following rebuild task, to avoid losing the task->dst_tgts. + */ + if (task->dst_retry_rebuild_op == RB_OP_REBUILD) { + rc = ds_rebuild_schedule( + pool, task->dst_retry_map_ver, rgt->rgt_reclaim_epoch, + task->dst_new_layout_version, &task->dst_tgts, + task->dst_retry_rebuild_op, RB_OP_NONE /* retry_rebuild_op */, + 0 /* retry_map_ver */, false /* stop_admin */, task, + -1 /* delay_sec */); + DL_CDEBUG(rc, DLOG_ERR, DLOG_INFO, rc, + DF_RB ": errno " DF_RC ", schedule retry %u(%s) with delay -1", + DP_RB_RGT(rgt), DP_RC(rgt->rgt_status.rs_errno), + task->dst_retry_rebuild_op, + RB_OP_STR(task->dst_retry_rebuild_op)); + } } else if ((task->dst_rebuild_op == RB_OP_FAIL_RECLAIM) && (task->dst_retry_rebuild_op != RB_OP_NONE)) { /* Fail_reclaim done (and a stop command wasn't received during) - retry rebuild. */ From 08c288630941a47f9c8d1951556cc08a25ad48ad Mon Sep 17 00:00:00 2001 From: Niu Yawei Date: Fri, 6 Feb 2026 22:53:57 +0800 Subject: [PATCH 184/253] DAOS-18544 rebuild: free mrones on error cleanup (#17484) Free mrones on error cleanup. Signed-off-by: Niu Yawei --- src/object/srv_obj_migrate.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/object/srv_obj_migrate.c b/src/object/srv_obj_migrate.c index 854d03f5ead..ad0d7b97843 100644 --- a/src/object/srv_obj_migrate.c +++ b/src/object/srv_obj_migrate.c @@ -2813,6 +2813,17 @@ migrate_obj_punch_one(void *data) return rc; } +static inline void +free_mrones(struct enum_unpack_arg *unpack_arg) +{ + struct migrate_one *mrone, *tmp; + + d_list_for_each_entry_safe(mrone, tmp, &unpack_arg->merge_list, mo_list) { + d_list_del_init(&mrone->mo_list); + migrate_one_destroy(mrone); + } +} + static int migrate_start_ult(struct enum_unpack_arg *unpack_arg) { @@ -3099,6 +3110,8 @@ migrate_one_epoch_object(daos_epoch_range_t *epr, struct migrate_pool_tls *tls, enum_flags |= DIOF_TO_LEADER; } + free_mrones(&unpack_arg); + if (buf != NULL && buf != stack_buf) D_FREE(buf); From fc447b2263f80ba1d2073f78288a12995814fb4f Mon Sep 17 00:00:00 2001 From: Tom Nabarro Date: Fri, 6 Feb 2026 17:13:58 +0000 Subject: [PATCH 185/253] DAOS-18566 control: Add spdk iobuf opts to server config (#17512) Add SPDK iobuf configurable parameters for small and large pool count to the server config file. Tuning these parameters can alleviate bottlenecks in high performance environments. Signed-off-by: Tom Nabarro --- src/control/server/config/server_test.go | 10 +- src/control/server/engine/config.go | 9 +- src/control/server/engine/config_test.go | 36 +++- src/control/server/storage/bdev.go | 4 +- .../server/storage/bdev/backend_json.go | 36 ++++ .../server/storage/bdev/backend_json_test.go | 177 +++++++++++++++++- src/control/server/storage/config.go | 13 ++ src/control/server/storage/config_test.go | 163 ++++++++++++++++ src/control/server/storage/provider.go | 1 + utils/config/daos_server.yml | 4 + 10 files changed, 444 insertions(+), 9 deletions(-) diff --git a/src/control/server/config/server_test.go b/src/control/server/config/server_test.go index 3bf190b9b61..97462d60ea6 100644 --- a/src/control/server/config/server_test.go +++ b/src/control/server/config/server_test.go @@ -1,6 +1,6 @@ // // (C) Copyright 2020-2024 Intel Corporation. -// (C) Copyright 2025 Hewlett Packard Enterprise Development LP +// (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -297,7 +297,8 @@ func TestServerConfig_Constructed(t *testing.T) { WithLogFile("/var/log/daos/daos_engine.0.log"). WithLogMask("INFO"). WithStorageEnableHotplug(false). - WithStorageAutoFaultyCriteria(true, 100, 200), + WithStorageAutoFaultyCriteria(true, 100, 200). + WithStorageSpdkIobufProps(16384, 2048), engine.MockConfig(). WithSystemName("daos_server"). WithSocketDir("./.daos/daos_server"). @@ -325,7 +326,8 @@ func TestServerConfig_Constructed(t *testing.T) { WithLogFile("/var/log/daos/daos_engine.1.log"). WithLogMask("INFO"). WithStorageEnableHotplug(false). - WithStorageAutoFaultyCriteria(false, 0, 0), + WithStorageAutoFaultyCriteria(false, 0, 0). + WithStorageSpdkIobufProps(0, 0), } constructed.Path = testFile // just to avoid failing the cmp @@ -334,7 +336,7 @@ func TestServerConfig_Constructed(t *testing.T) { t.Logf("default: %+v", defaultCfg.Engines[i]) } - if diff := cmp.Diff(defaultCfg, constructed, defConfigCmpOpts...); diff != "" { + if diff := cmp.Diff(constructed, defaultCfg, defConfigCmpOpts...); diff != "" { t.Fatalf("(-want, +got): %s", diff) } } diff --git a/src/control/server/engine/config.go b/src/control/server/engine/config.go index d34c6732d9d..65748106adf 100644 --- a/src/control/server/engine/config.go +++ b/src/control/server/engine/config.go @@ -1,6 +1,6 @@ // // (C) Copyright 2019-2024 Intel Corporation. -// (C) Copyright 2025 Hewlett Packard Enterprise Development LP +// (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -790,6 +790,13 @@ func (c *Config) WithStorageAutoFaultyCriteria(enable bool, maxIoErrs, maxCsumEr return c } +// WithStorageSpdkIobufProps specifies SPDK I/O buffer pool settings in the I/O Engine. +func (c *Config) WithStorageSpdkIobufProps(smallPoolCount, largePoolCount uint32) *Config { + c.Storage.SpdkIobufProps.SmallPoolCount = smallPoolCount + c.Storage.SpdkIobufProps.LargePoolCount = largePoolCount + return c +} + // WithIndex sets the I/O Engine instance index. func (c *Config) WithIndex(i uint32) *Config { c.Index = i diff --git a/src/control/server/engine/config_test.go b/src/control/server/engine/config_test.go index 5ad23861e0f..48c29a7b794 100644 --- a/src/control/server/engine/config_test.go +++ b/src/control/server/engine/config_test.go @@ -1,6 +1,6 @@ // // (C) Copyright 2019-2024 Intel Corporation. -// (C) Copyright 2025 Hewlett Packard Enterprise Development LP +// (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -1419,3 +1419,37 @@ func TestConfig_SetNUMAAffinity(t *testing.T) { }) } } + +func TestConfig_WithStorageSpdkIobufProps(t *testing.T) { + for name, tc := range map[string]struct { + smallPoolCount uint32 + largePoolCount uint32 + }{ + "zero values": { + smallPoolCount: 0, + largePoolCount: 0, + }, + "small pool count only": { + smallPoolCount: 1024, + largePoolCount: 0, + }, + "large pool count only": { + smallPoolCount: 0, + largePoolCount: 512, + }, + "both pool counts set": { + smallPoolCount: 2048, + largePoolCount: 1024, + }, + } { + t.Run(name, func(t *testing.T) { + cfg := NewConfig(). + WithStorageSpdkIobufProps(tc.smallPoolCount, tc.largePoolCount) + + test.AssertEqual(t, tc.smallPoolCount, cfg.Storage.SpdkIobufProps.SmallPoolCount, + "unexpected small pool count") + test.AssertEqual(t, tc.largePoolCount, cfg.Storage.SpdkIobufProps.LargePoolCount, + "unexpected large pool count") + }) + } +} diff --git a/src/control/server/storage/bdev.go b/src/control/server/storage/bdev.go index d30ac6af487..a69023ab977 100644 --- a/src/control/server/storage/bdev.go +++ b/src/control/server/storage/bdev.go @@ -1,6 +1,6 @@ // // (C) Copyright 2019-2024 Intel Corporation. -// (C) Copyright 2025 Hewlett Packard Enterprise Development LP +// (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP // (C) Copyright 2025 Google LLC // // SPDX-License-Identifier: BSD-2-Clause-Patent @@ -61,6 +61,7 @@ const ( ConfSetAccelProps = C.NVME_CONF_SET_ACCEL_PROPS ConfSetSpdkRpcServer = C.NVME_CONF_SET_SPDK_RPC_SERVER ConfSetAutoFaultyProps = C.NVME_CONF_SET_AUTO_FAULTY + ConfIobufSetOptions = "iobuf_set_options" ) // Acceleration related constants for engine setting and optional capabilities. @@ -616,6 +617,7 @@ type ( AccelProps AccelProps SpdkRpcSrvProps SpdkRpcServer AutoFaultyProps BdevAutoFaulty + SpdkIobufProps SpdkIobuf VMDEnabled bool ScannedBdevs NvmeControllers // VMD needs address mapping for backing devices. } diff --git a/src/control/server/storage/bdev/backend_json.go b/src/control/server/storage/bdev/backend_json.go index 020566df4d1..8caa2014717 100644 --- a/src/control/server/storage/bdev/backend_json.go +++ b/src/control/server/storage/bdev/backend_json.go @@ -1,5 +1,6 @@ // // (C) Copyright 2021-2024 Intel Corporation. +// (C) Copyright 2026 Hewlett Packard Enterprise Development LP // (C) Copyright 2025 Google LLC // // SPDX-License-Identifier: BSD-2-Clause-Patent @@ -88,6 +89,15 @@ type AioCreateParams struct { func (_ AioCreateParams) isSpdkSubsystemConfigParams() {} +// IobufParams specifies details for a storage.ConfIobufSetOptions method. Zero values are not +// marshalled to JSON config output. +type IobufParams struct { + SmallPoolCount uint32 `json:"small_pool_count,omitzero"` + LargePoolCount uint32 `json:"large_pool_count,omitzero"` +} + +func (_ IobufParams) isSpdkSubsystemConfigParams() {} + // HotplugBusidRangeParams specifies details for a storage.ConfSetHotplugBusidRange method. type HotplugBusidRangeParams struct { Begin uint8 `json:"begin"` @@ -140,6 +150,8 @@ func (ssc *SpdkSubsystemConfig) UnmarshalJSON(data []byte) error { ssc.Params = &VmdEnableParams{} case storage.ConfBdevAioCreate: ssc.Params = &AioCreateParams{} + case storage.ConfIobufSetOptions: + ssc.Params = &IobufParams{} default: return errors.Errorf("unknown SPDK subsystem config method %q", ssc.Method) } @@ -324,6 +336,29 @@ func (sc *SpdkConfig) WithBdevConfigs(log logging.Logger, req *storage.BdevWrite return sc } +// WithSpdkIobufOpts adds custom SPDK iobuf options. No config entry is added if values are all +// zero. Only non-zero IobufParams field values are +func (sc *SpdkConfig) WithSpdkIobufOpts(req *storage.BdevWriteConfigRequest) *SpdkConfig { + if req.SpdkIobufProps.IsEmpty() { + return sc + } + + sc.Subsystems = append(sc.Subsystems, &SpdkSubsystem{ + Name: "iobuf", + Configs: []*SpdkSubsystemConfig{ + { + Method: storage.ConfIobufSetOptions, + Params: &IobufParams{ + SmallPoolCount: req.SpdkIobufProps.SmallPoolCount, + LargePoolCount: req.SpdkIobufProps.LargePoolCount, + }, + }, + }, + }) + + return sc +} + // Add hotplug bus-ID range to DAOS config data for use by non-SPDK consumers in // engine e.g. BIO or VOS. func hotplugPropSet(req *storage.BdevWriteConfigRequest, data *DaosData) { @@ -388,6 +423,7 @@ func newSpdkConfig(log logging.Logger, req *storage.BdevWriteConfigRequest) (*Sp rpcSrvSet(req, sc.DaosData) autoFaultySet(req, sc.DaosData) sc.WithBdevConfigs(log, req) + sc.WithSpdkIobufOpts(req) // SPDK-3370: Ensure hotplug config appears after attach directives to avoid race when VMD // with hotplug is enabled with multiple domains. diff --git a/src/control/server/storage/bdev/backend_json_test.go b/src/control/server/storage/bdev/backend_json_test.go index 33eff6f0a37..bce19effc37 100644 --- a/src/control/server/storage/bdev/backend_json_test.go +++ b/src/control/server/storage/bdev/backend_json_test.go @@ -1,5 +1,6 @@ // // (C) Copyright 2021-2024 Intel Corporation. +// (C) Copyright 2026 Hewlett Packard Enterprise Development LP // (C) Copyright 2025 Google LLC // // SPDX-License-Identifier: BSD-2-Clause-Patent @@ -86,6 +87,8 @@ func TestBackend_newSpdkConfig(t *testing.T) { autoFaultyEnable bool autoFaultyIO uint32 autoFaultyCsum uint32 + iobufSmallPoolNr uint32 + iobufLargePoolNr uint32 expExtraSubsystems []*SpdkSubsystem expBdevCfgs []*SpdkSubsystemConfig expDaosCfgs []*DaosConfig @@ -196,7 +199,7 @@ func TestBackend_newSpdkConfig(t *testing.T) { }...), vosEnv: "AIO", }, - "multiple controllers; accel, rpc server & auto faulty settings": { + "accel, rpc server & auto faulty settings": { class: storage.ClassNvme, devList: []string{test.MockPCIAddr(1), test.MockPCIAddr(2)}, accelEngine: storage.AccelEngineSPDK, @@ -232,6 +235,28 @@ func TestBackend_newSpdkConfig(t *testing.T) { }, }, }, + "iobuf custom settings provided": { + class: storage.ClassNvme, + devList: []string{test.MockPCIAddr(1), test.MockPCIAddr(2)}, + devRoles: storage.BdevRoleAll, + iobufSmallPoolNr: 16384, + iobufLargePoolNr: 2048, + expBdevCfgs: multiCtrlrConfs(storage.BdevRoleAll, false), + expExtraSubsystems: []*SpdkSubsystem{ + { + Name: "iobuf", + Configs: []*SpdkSubsystemConfig{ + { + Method: storage.ConfIobufSetOptions, + Params: &IobufParams{ + SmallPoolCount: 16384, + LargePoolCount: 2048, + }, + }, + }, + }, + }, + }, } for name, tc := range tests { @@ -273,7 +298,8 @@ func TestBackend_newSpdkConfig(t *testing.T) { WithStorageAccelProps(tc.accelEngine, tc.accelOptMask). WithStorageSpdkRpcSrvProps(tc.rpcSrvEnable, tc.rpcSrvSockAddr). WithStorageAutoFaultyCriteria(tc.autoFaultyEnable, tc.autoFaultyIO, - tc.autoFaultyCsum) + tc.autoFaultyCsum). + WithStorageSpdkIobufProps(tc.iobufSmallPoolNr, tc.iobufLargePoolNr) if tc.devRoles != 0 { engineConfig.Storage.ControlMetadata = storage.ControlMetadata{ @@ -332,3 +358,150 @@ func TestBackend_unreadableSpdkConfig(t *testing.T) { t.Fatal("expected error") } } + +func TestBackend_IobufParams_JSONUnmarshal(t *testing.T) { + for name, tc := range map[string]struct { + input string + expOutput IobufParams + }{ + "empty": { + input: `{}`, + expOutput: IobufParams{ + SmallPoolCount: 0, + LargePoolCount: 0, + }, + }, + "small_pool_count only": { + input: `{"small_pool_count":1024}`, + expOutput: IobufParams{ + SmallPoolCount: 1024, + LargePoolCount: 0, + }, + }, + "large_pool_count only": { + input: `{"large_pool_count":512}`, + expOutput: IobufParams{ + SmallPoolCount: 0, + LargePoolCount: 512, + }, + }, + "both values set": { + input: `{"small_pool_count":2048,"large_pool_count":1024}`, + expOutput: IobufParams{ + SmallPoolCount: 2048, + LargePoolCount: 1024, + }, + }, + } { + t.Run(name, func(t *testing.T) { + var v IobufParams + if err := json.Unmarshal([]byte(tc.input), &v); err != nil { + t.Fatal(err) + } + + if diff := cmp.Diff(v, tc.expOutput); diff != "" { + t.Fatalf("unmarshal mismatch (-want +got):\n%s\nJSON: %s", diff, tc.input) + } + }) + } +} + +func TestBackend_IobufParams_JSON_RoundTrip(t *testing.T) { + for name, tc := range map[string]struct { + params IobufParams + }{ + "zero values": { + params: IobufParams{ + SmallPoolCount: 0, + LargePoolCount: 0, + }, + }, + "small_pool_count only": { + params: IobufParams{ + SmallPoolCount: 1024, + LargePoolCount: 0, + }, + }, + "large_pool_count only": { + params: IobufParams{ + SmallPoolCount: 0, + LargePoolCount: 512, + }, + }, + "both values set": { + params: IobufParams{ + SmallPoolCount: 2048, + LargePoolCount: 1024, + }, + }, + } { + t.Run(name, func(t *testing.T) { + // Marshal to JSON + buf, err := json.Marshal(&tc.params) + if err != nil { + t.Fatal(err) + } + + // Unmarshal back + var unmarshaled IobufParams + if err := json.Unmarshal(buf, &unmarshaled); err != nil { + t.Fatal(err) + } + + // Verify round-trip preserves values + if diff := cmp.Diff(tc.params, unmarshaled); diff != "" { + t.Fatalf("round-trip mismatch (-want +got):\n%s\nJSON: %s", diff, string(buf)) + } + }) + } +} + +func TestBackend_IobufParams_JSONOutput(t *testing.T) { + for name, tc := range map[string]struct { + params IobufParams + expJSON string + }{ + "both zero produces empty object": { + params: IobufParams{ + SmallPoolCount: 0, + LargePoolCount: 0, + }, + expJSON: `{}`, + }, + "only small_pool_count": { + params: IobufParams{ + SmallPoolCount: 1024, + LargePoolCount: 0, + }, + expJSON: `{"small_pool_count":1024}`, + }, + "only large_pool_count": { + params: IobufParams{ + SmallPoolCount: 0, + LargePoolCount: 512, + }, + expJSON: `{"large_pool_count":512}`, + }, + "both non-zero": { + params: IobufParams{ + SmallPoolCount: 2048, + LargePoolCount: 1024, + }, + expJSON: `{"small_pool_count":2048,"large_pool_count":1024}`, + }, + } { + t.Run(name, func(t *testing.T) { + buf, err := json.Marshal(&tc.params) + if err != nil { + t.Fatal(err) + } + + jsonStr := string(buf) + + // Check exact JSON output + if diff := cmp.Diff(tc.expJSON, jsonStr); diff != "" { + t.Fatalf("unexpected JSON output (-want +got):\n%s", diff) + } + }) + } +} diff --git a/src/control/server/storage/config.go b/src/control/server/storage/config.go index 5e2cf986e2d..548c959aee7 100644 --- a/src/control/server/storage/config.go +++ b/src/control/server/storage/config.go @@ -1149,6 +1149,18 @@ type BdevAutoFaulty struct { MaxCsumErrs uint32 `yaml:"max_csum_errs,omitempty" json:"max_csum_errs"` } +// SpdkIobuf struct describes settings for DAOS I/O buffer pool configuration within the BIO +// module of the engine process. +type SpdkIobuf struct { + SmallPoolCount uint32 `yaml:"small_pool_count,omitempty" json:"small_pool_count,omitempty"` + LargePoolCount uint32 `yaml:"large_pool_count,omitempty" json:"large_pool_count,omitempty"` +} + +// IsEmpty returns true if all struct values are zero. +func (si *SpdkIobuf) IsEmpty() bool { + return si.SmallPoolCount == 0 && si.LargePoolCount == 0 +} + // Config defines engine storage. type Config struct { ControlMetadata ControlMetadata `yaml:"-"` // inherited from server @@ -1161,6 +1173,7 @@ type Config struct { AccelProps AccelProps `yaml:"acceleration,omitempty"` SpdkRpcSrvProps SpdkRpcServer `yaml:"spdk_rpc_server,omitempty"` AutoFaultyProps BdevAutoFaulty `yaml:"bdev_auto_faulty,omitempty"` + SpdkIobufProps SpdkIobuf `yaml:"spdk_iobuf,omitempty"` } // SetNUMAAffinity enables the assignment of NUMA affinity to tier configs. diff --git a/src/control/server/storage/config_test.go b/src/control/server/storage/config_test.go index f595aaa4ffa..e3dc3053278 100644 --- a/src/control/server/storage/config_test.go +++ b/src/control/server/storage/config_test.go @@ -1,5 +1,6 @@ // // (C) Copyright 2019-2023 Intel Corporation. +// (C) Copyright 2026 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -961,6 +962,168 @@ acceleration: } } +func TestStorage_SpdkIobuf_FromYAML(t *testing.T) { + for name, tc := range map[string]struct { + input string + expProps SpdkIobuf + expErr error + }{ + "iobuf section missing": { + input: ``, + }, + "iobuf section empty": { + input: ` +spdk_iobuf: +`, + }, + "small_pool_count only": { + input: ` +spdk_iobuf: + small_pool_count: 1024 +`, + expProps: SpdkIobuf{ + SmallPoolCount: 1024, + }, + }, + "large_pool_count only": { + input: ` +spdk_iobuf: + large_pool_count: 512 +`, + expProps: SpdkIobuf{ + LargePoolCount: 512, + }, + }, + "both pool counts set": { + input: ` +spdk_iobuf: + small_pool_count: 2048 + large_pool_count: 1024 +`, + expProps: SpdkIobuf{ + SmallPoolCount: 2048, + LargePoolCount: 1024, + }, + }, + "zero values": { + input: ` +spdk_iobuf: + small_pool_count: 0 + large_pool_count: 0 +`, + expProps: SpdkIobuf{ + SmallPoolCount: 0, + LargePoolCount: 0, + }, + }, + } { + t.Run(name, func(t *testing.T) { + cfg := new(Config) + err := yaml.UnmarshalStrict([]byte(tc.input), cfg) + test.CmpErr(t, tc.expErr, err) + if tc.expErr != nil { + return + } + + if diff := cmp.Diff(tc.expProps, cfg.SpdkIobufProps, defConfigCmpOpts()...); diff != "" { + t.Fatalf("bad props (-want +got):\n%s", diff) + } + }) + } +} + +func TestStorage_SpdkIobuf_ToYAML(t *testing.T) { + for name, tc := range map[string]struct { + props SpdkIobuf + expOut string + }{ + "empty": { + expOut: "{}\n", + }, + "small_pool_count only": { + props: SpdkIobuf{ + SmallPoolCount: 1024, + }, + expOut: "small_pool_count: 1024\n", + }, + "large_pool_count only": { + props: SpdkIobuf{ + LargePoolCount: 512, + }, + expOut: "large_pool_count: 512\n", + }, + "both pool counts set": { + props: SpdkIobuf{ + SmallPoolCount: 2048, + LargePoolCount: 1024, + }, + expOut: "small_pool_count: 2048\nlarge_pool_count: 1024\n", + }, + } { + t.Run(name, func(t *testing.T) { + buf, err := yaml.Marshal(&tc.props) + if err != nil { + t.Fatal(err) + } + + if diff := cmp.Diff(tc.expOut, string(buf)); diff != "" { + t.Fatalf("bad output (-want +got):\n%s", diff) + } + }) + } +} + +func TestStorage_SpdkIobuf_JSON(t *testing.T) { + for name, tc := range map[string]struct { + props SpdkIobuf + expOut string + }{ + "empty": { + expOut: `{}`, + }, + "small_pool_count only": { + props: SpdkIobuf{ + SmallPoolCount: 1024, + }, + expOut: `{"small_pool_count":1024}`, + }, + "large_pool_count only": { + props: SpdkIobuf{ + LargePoolCount: 512, + }, + expOut: `{"large_pool_count":512}`, + }, + "both pool counts set": { + props: SpdkIobuf{ + SmallPoolCount: 2048, + LargePoolCount: 1024, + }, + expOut: `{"small_pool_count":2048,"large_pool_count":1024}`, + }, + } { + t.Run(name, func(t *testing.T) { + buf, err := json.Marshal(&tc.props) + if err != nil { + t.Fatal(err) + } + + if diff := cmp.Diff(tc.expOut, string(buf)); diff != "" { + t.Fatalf("bad output (-want +got):\n%s", diff) + } + + // Test round-trip + var unmarshaled SpdkIobuf + if err := json.Unmarshal(buf, &unmarshaled); err != nil { + t.Fatal(err) + } + + if diff := cmp.Diff(tc.props, unmarshaled); diff != "" { + t.Fatalf("bad round-trip (-want +got):\n%s", diff) + } + }) + } +} + func TestStorage_ControlMetadata_Directory(t *testing.T) { for name, tc := range map[string]struct { cm ControlMetadata diff --git a/src/control/server/storage/provider.go b/src/control/server/storage/provider.go index f8c3601b845..7696ececacb 100644 --- a/src/control/server/storage/provider.go +++ b/src/control/server/storage/provider.go @@ -589,6 +589,7 @@ func BdevWriteConfigRequestFromConfig(ctx context.Context, log logging.Logger, c AccelProps: cfg.AccelProps, SpdkRpcSrvProps: cfg.SpdkRpcSrvProps, AutoFaultyProps: cfg.AutoFaultyProps, + SpdkIobufProps: cfg.SpdkIobufProps, } for idx, tier := range cfg.Tiers.BdevConfigs() { diff --git a/utils/config/daos_server.yml b/utils/config/daos_server.yml index 7fc87854f9c..88e93fcd3f1 100644 --- a/utils/config/daos_server.yml +++ b/utils/config/daos_server.yml @@ -476,6 +476,10 @@ # max_io_errs: 100 # max_csum_errs: 200 # +# # Set SPDK iobuf tunable values. Defaults if unset are 8192 for small and 1024 for large. +# spdk_iobuf: +# small_pool_count: 16384 +# large_pool_count: 2048 # #- # # Number of I/O service threads (and network endpoints) per engine. From fd0e3bf9b4fccb54d8111ad1d54c1592181d8507 Mon Sep 17 00:00:00 2001 From: Kris Jacque Date: Fri, 6 Feb 2026 10:27:05 -0700 Subject: [PATCH 186/253] DAOS-17416 control: Use C.malloc in daos C API calls (#17479) Passing Go-allocated C structs into the daos C API was causing enough false positives in valgrind to make the results unusable. Signed-off-by: Kris Jacque --- src/control/lib/daos/api/pool.go | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/control/lib/daos/api/pool.go b/src/control/lib/daos/api/pool.go index 666592f4b4c..1f2556759cc 100644 --- a/src/control/lib/daos/api/pool.go +++ b/src/control/lib/daos/api/pool.go @@ -246,7 +246,9 @@ func PoolConnect(ctx context.Context, req PoolConnectReq) (*PoolConnectResp, err req.Flags = daos.PoolConnectFlagReadOnly } - var dpi C.daos_pool_info_t + dpi := (*C.daos_pool_info_t)(C.calloc(1, C.sizeof_daos_pool_info_t)) + defer C.free(unsafe.Pointer(dpi)) + if req.Query { dpi.pi_bits = C.ulong(daos.DefaultPoolQueryMask) } @@ -260,11 +262,14 @@ func PoolConnect(ctx context.Context, req PoolConnectReq) (*PoolConnectResp, err defer freeString(cSys) } - if err := daosError(daos_pool_connect(cPoolID, cSys, C.uint(req.Flags), &poolConn.daosHandle, &dpi, nil)); err != nil { + cHandle := (*C.daos_handle_t)(C.calloc(1, C.sizeof_daos_handle_t)) + defer C.free(unsafe.Pointer(cHandle)) + if err := daosError(daos_pool_connect(cPoolID, cSys, C.uint(req.Flags), cHandle, dpi, nil)); err != nil { return nil, errors.Wrap(err, "failed to connect to pool") } - poolInfo := newPoolInfo(&dpi) + poolInfo := newPoolInfo(dpi) + poolConn.daosHandle = *cHandle poolConn.connHandle.UUID = poolInfo.UUID if req.ID != poolInfo.UUID.String() { poolInfo.Label = req.ID @@ -465,17 +470,18 @@ func PoolQueryTargets(ctx context.Context, sysName, poolID string, rank ranklist defer disconnect() logging.FromContext(ctx).Debugf("PoolQueryTargets(%s:%d:[%s])", poolConn, rank, targets) - ptInfo := C.daos_target_info_t{} + ptInfo := (*C.daos_target_info_t)(C.calloc(1, C.sizeof_daos_target_info_t)) + defer C.free(unsafe.Pointer(ptInfo)) var rc C.int infos := make([]*daos.PoolQueryTargetInfo, 0, targets.Count()) for _, tgt := range targets.Ranks() { - rc = daos_pool_query_target(poolConn.daosHandle, C.uint32_t(tgt), C.uint32_t(rank), &ptInfo, nil) + rc = daos_pool_query_target(poolConn.daosHandle, C.uint32_t(tgt), C.uint32_t(rank), ptInfo, nil) if err := daosError(rc); err != nil { return nil, errors.Wrapf(err, "failed to query pool %s rank:target %d:%d", poolID, rank, tgt) } - infos = append(infos, newPoolTargetInfo(&ptInfo)) + infos = append(infos, newPoolTargetInfo(ptInfo)) } return infos, nil From 9addf85a67d2f153cf13368ce76ad408c1e15cd0 Mon Sep 17 00:00:00 2001 From: Jan Michalski Date: Fri, 6 Feb 2026 18:26:59 +0000 Subject: [PATCH 187/253] SRE-3578 test: expand ompi_mpi_finalize() leak suppression (#17510) Found on EL9.7 { Memcheck:Leak match-leak-kinds: definite fun:realloc fun:__vasprintf_internal fun:__asprintf_chk obj:* obj:* obj:* obj:* obj:* obj:* obj:* fun:orte_finalize fun:ompi_mpi_finalize fun:par_fini fun:par_fini fun:main } Signed-off-by: Jan Michalski --- utils/test_memcheck.supp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/test_memcheck.supp b/utils/test_memcheck.supp index ef69713271a..0f89a558093 100644 --- a/utils/test_memcheck.supp +++ b/utils/test_memcheck.supp @@ -165,7 +165,7 @@ Memcheck:Leak match-leak-kinds: all ... - fun:?alloc + fun:*alloc ... fun:ompi_mpi_finalize ... From 8166d6110d1e051955c82841501ab1631fc9b0a2 Mon Sep 17 00:00:00 2001 From: Tomasz Gromadzki Date: Fri, 6 Feb 2026 19:33:08 +0100 Subject: [PATCH 188/253] DAOS-18359 ci: Skip-build-* are no longer ingnored (#17514) Skip-build-* pragmas are taken into account Signed-off-by: Tomasz Gromadzki --- Jenkinsfile | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 26ce0473a71..e36de3fb189 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -200,14 +200,18 @@ Boolean skip_build_stage(String distro='', String compiler='gcc') { } } - // Skip the stage if any Skip-build-- pragmas are true - String pragma_names = ['build'] + // Skip the stage if any Skip-build[--] pragmas are true + List pragma_names = ['build'] if (distro && compiler) { pragma_names << "build-${distro}-${compiler}" } - Boolean any_pragma_skip = pragma_names.any { name -> skip_pragma_set(name) } + Boolean any_pragma_skip = pragma_names.any { name -> + if (skip_pragma_set(name)) { + println("[${env.STAGE_NAME}] Skipping build stage due to \"Skip-${name}: true\" pragma") + return true + } + } if (any_pragma_skip) { - println("[${env.STAGE_NAME}] Skipping build stage for due to Skip-[${pragma_names}] pragma") return true } @@ -876,7 +880,7 @@ pipeline { expression { !skipStage() } } agent { - label params.CI_FUNCTIONAL_VM9_LABEL + label vm9_label('EL8') } steps { job_step_update( From 7da8024ad9821d6fd1609669029ec0f4b31a5f5d Mon Sep 17 00:00:00 2001 From: Jan Michalski Date: Fri, 6 Feb 2026 23:17:33 +0000 Subject: [PATCH 189/253] DAOS-18529 test: expand/add a few suppressions (#17471) { Memcheck:Addr8 fun:atomic_load<__sanitizer::atomic_uint64_t> fun:NoTsanAtomicLoad fun:AtomicLoad fun:__tsan_go_atomic64_load fun:racecall } { Memcheck:Addr8 fun:atomic_store<__sanitizer::atomic_uint64_t> fun:NoTsanAtomicStore fun:AtomicStore fun:__tsan_go_atomic64_store fun:racecall } { Memcheck:Value8 fun:MemoryAccess fun:__tsan_read_pc fun:racecall } { Memcheck:Value8 fun:atomic_store<__sanitizer::atomic_uint32_t> fun:StoreShadow fun:CheckRaces fun:MemoryAccess fun:__tsan_write_pc fun:racecall } { Memcheck:Addr8 fun:func_add fun:NoTsanAtomicFetchAdd fun:__tsan_go_atomic64_fetch_add fun:racecall } Signed-off-by: Jan Michalski --- src/cart/utils/memcheck-cart.supp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/cart/utils/memcheck-cart.supp b/src/cart/utils/memcheck-cart.supp index a2f73cfcdad..30c49052d44 100644 --- a/src/cart/utils/memcheck-cart.supp +++ b/src/cart/utils/memcheck-cart.supp @@ -606,18 +606,21 @@ { __tsan_go_atomic64_load Memcheck:Addr8 + ... fun:__tsan_go_atomic64_load fun:racecall } { __tsan_go_atomic64_store Memcheck:Addr8 + ... fun:__tsan_go_atomic64_store fun:racecall } { __tsan_go_atomic64_compare_exchange Memcheck:Addr8 + ... fun:__tsan_go_atomic64_compare_exchange fun:racecall } @@ -634,12 +637,14 @@ { __tsan_write_pc Memcheck:Value8 + ... fun:__tsan_write_pc fun:racecall } { __tsan_read_pc Memcheck:Value8 + ... fun:__tsan_read_pc fun:racecall } @@ -695,18 +700,21 @@ { __tsan_go_atomic32_load Memcheck:Addr4 + ... fun:__tsan_go_atomic32_load fun:racecall } { __tsan_go_atomic32_store Memcheck:Addr4 + ... fun:__tsan_go_atomic32_store fun:racecall } { __tsan_go_atomic32_compare_exchange Memcheck:Addr4 + ... fun:__tsan_go_atomic32_compare_exchange fun:racecall } @@ -758,12 +766,14 @@ { __tsan_go_atomic32_fetch_add Memcheck:Addr4 + ... fun:__tsan_go_atomic32_fetch_add fun:racecall } { __tsan_go_atomic64_fetch_add Memcheck:Addr8 + ... fun:__tsan_go_atomic64_fetch_add fun:racecall } From a83ec8bc38299e64d2002f7d755b63e9b96f9903 Mon Sep 17 00:00:00 2001 From: Niu Yawei Date: Sat, 7 Feb 2026 14:47:33 +0800 Subject: [PATCH 190/253] DAOS-18544 rebuild: free leaked mo_cusm_iov (#17519) Free leaked mo_csum_iov Signed-off-by: Niu Yawei --- src/object/srv_obj_migrate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/object/srv_obj_migrate.c b/src/object/srv_obj_migrate.c index ad0d7b97843..bfd8a5c324a 100644 --- a/src/object/srv_obj_migrate.c +++ b/src/object/srv_obj_migrate.c @@ -1838,6 +1838,7 @@ migrate_one_destroy(struct migrate_one *mrone) D_ASSERT(d_list_empty(&mrone->mo_list)); daos_iov_free(&mrone->mo_dkey); + daos_iov_free(&mrone->mo_csum_iov); if (mrone->mo_iods_update_ephs) { for (i = 0; i < mrone->mo_iod_alloc_num; i++) { From f5ea6d5c75b34ae46a78df8f13ac3a45b93599d3 Mon Sep 17 00:00:00 2001 From: Jan Michalski Date: Mon, 9 Feb 2026 15:54:03 +0000 Subject: [PATCH 191/253] SRE-3578 test: suppress setgrent() leak (#17511) Found on EL9.7 { Memcheck:Leak match-leak-kinds: reachable fun:malloc fun:nss_files_global_allocate fun:__libc_allocate_once_slow fun:__nss_files_data_get fun:__nss_files_data_setent fun:__nss_setent fun:setgrent fun:test_acl_all_gid_principal_conversion obj:/usr/lib64/libcmocka.so.0.7.0 fun:_cmocka_run_group_tests fun:main } Signed-off-by: Jan Michalski --- utils/test_memcheck.supp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/utils/test_memcheck.supp b/utils/test_memcheck.supp index 0f89a558093..df2cd5bb28b 100644 --- a/utils/test_memcheck.supp +++ b/utils/test_memcheck.supp @@ -448,3 +448,11 @@ fun:spdk_mem_map_set_translation ... } +{ + setgrent() leak + Memcheck:Leak + match-leak-kinds: reachable + fun:*alloc + ... + fun:setgrent +} From 0f5aab4a59ca95a9f7c7f8d1a33c19a7c7b4a9f9 Mon Sep 17 00:00:00 2001 From: Jan Michalski Date: Mon, 9 Feb 2026 15:58:32 +0000 Subject: [PATCH 192/253] SRE-3572 test: skip installing openmpi-devel when requested EL9 (#17521) Ref: daos-stack/daos#16634 Signed-off-by: Jan Michalski --- utils/scripts/install-el9.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/utils/scripts/install-el9.sh b/utils/scripts/install-el9.sh index a9234ff7cba..21980fac63f 100755 --- a/utils/scripts/install-el9.sh +++ b/utils/scripts/install-el9.sh @@ -65,7 +65,6 @@ dnf --nodocs install ${dnf_install_args} \ ndctl-devel \ numactl \ numactl-devel \ - openmpi-devel \ openssl-devel \ pandoc \ patch \ @@ -84,6 +83,12 @@ dnf --nodocs install ${dnf_install_args} \ ncurses-devel \ yasm +if [[ -z "${NO_OPENMPI_DEVEL+set}" ]]; then + # shellcheck disable=SC2086 + dnf --nodocs install ${dnf_install_args} \ + openmpi-devel +fi + ruby_version=$(dnf module list ruby | grep -Eow "3\.[0-9]+" | tail -1) # shellcheck disable=SC2086 dnf --nodocs install ${dnf_install_args} \ From 9a024a76d840e63db159d1add9d84e88e4916563 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 9 Feb 2026 12:53:45 -0800 Subject: [PATCH 193/253] DAOS-18575 cq: Bump github/codeql-action to 4.32.2 (#17528) Updates `github/codeql-action` from 4.32.0 to 4.32.2 Signed-off-by: dependabot[bot] --- .github/workflows/ossf-scorecard.yml | 2 +- .github/workflows/trivy.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ossf-scorecard.yml b/.github/workflows/ossf-scorecard.yml index 4fb3da4ec81..c1d4f3c5e62 100644 --- a/.github/workflows/ossf-scorecard.yml +++ b/.github/workflows/ossf-scorecard.yml @@ -71,6 +71,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard (optional). # Commenting out will disable upload of results to your repo's Code Scanning dashboard - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@b20883b0cd1f46c72ae0ba6d1090936928f9fa30 # v4.32.0 + uses: github/codeql-action/upload-sarif@45cbd0c69e560cd9e7cd7f8c32362050c9b7ded2 # v4.32.2 with: sarif_file: results.sarif diff --git a/.github/workflows/trivy.yml b/.github/workflows/trivy.yml index e4c0103f69d..fc7b733dd85 100644 --- a/.github/workflows/trivy.yml +++ b/.github/workflows/trivy.yml @@ -68,7 +68,7 @@ jobs: trivy-config: 'utils/trivy/trivy.yaml' - name: Upload Trivy scan results to GitHub Security tab - uses: github/codeql-action/upload-sarif@b20883b0cd1f46c72ae0ba6d1090936928f9fa30 # v4.32.0 + uses: github/codeql-action/upload-sarif@45cbd0c69e560cd9e7cd7f8c32362050c9b7ded2 # v4.32.2 with: sarif_file: 'trivy-results.sarif' From 7578b067cc28c71129c3f589e81bea1912b59db6 Mon Sep 17 00:00:00 2001 From: Dalton Bohning Date: Mon, 9 Feb 2026 13:04:17 -0800 Subject: [PATCH 194/253] DAOS-16983 test: reduce dfuse/caching_check threshold (#17486) Reduce expected threshold from 300% to 250%. Discard performance of first read with caching enabled. Also misc test cleanup. Signed-off-by: Dalton Bohning --- src/tests/ftest/dfuse/caching_check.py | 25 ++++++++++++------------ src/tests/ftest/dfuse/caching_check.yaml | 17 +++++++++------- 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/src/tests/ftest/dfuse/caching_check.py b/src/tests/ftest/dfuse/caching_check.py index 852a24f0dfd..61980fd4aaa 100644 --- a/src/tests/ftest/dfuse/caching_check.py +++ b/src/tests/ftest/dfuse/caching_check.py @@ -1,5 +1,6 @@ """ (C) Copyright 2019-2023 Intel Corporation. + (C) Copyright 2026 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -36,22 +37,21 @@ def test_dfuse_caching_check(self): :avocado: tags=daosio,dfuse :avocado: tags=DfuseCachingCheck,test_dfuse_caching_check """ - # get params - flags = self.params.get("iorflags", '/run/ior/*') + # Get params + ior_flags_write = self.params.get("flags_write", self.ior_cmd.namespace) + ior_flags_read = self.params.get("flags_read", self.ior_cmd.namespace) read_x = self.params.get("read_x", "/run/ior/*", 1) - # update flag - self.ior_cmd.update_params(flags=flags[0]) - self.log_step('Write to the dfuse mount point') + self.ior_cmd.update_params(flags=ior_flags_write) self.run_ior_with_pool(fail_on_warning=False, stop_dfuse=False) self.log_step('Get baseline read performance from dfuse with caching disabled') - self.ior_cmd.update_params(flags=flags[1]) + self.ior_cmd.update_params(flags=ior_flags_read) base_read_arr = [] - out = self.run_ior_with_pool(fail_on_warning=False, stop_dfuse=False) + out = self.run_ior_with_pool(fail_on_warning=False, stop_dfuse=False, create_cont=False) base_read_arr.append(IorCommand.get_ior_metrics(out)) - out = self.run_ior_with_pool(fail_on_warning=False, stop_dfuse=False) + out = self.run_ior_with_pool(fail_on_warning=False, stop_dfuse=False, create_cont=False) base_read_arr.append(IorCommand.get_ior_metrics(out)) # the index of max_mib @@ -62,12 +62,11 @@ def test_dfuse_caching_check(self): self.dfuse.update_params(disable_caching=False) self.dfuse.run() - self.log_step('Get first read performance with caching enabled') - out = self.run_ior_with_pool(fail_on_warning=False, stop_dfuse=False) - base_read_arr.append(IorCommand.get_ior_metrics(out)) + self.log_step('Discard first read performance with caching enabled') + _ = self.run_ior_with_pool(fail_on_warning=False, stop_dfuse=False, create_cont=False) self.log_step('Get cached read performance') - out = self.run_ior_with_pool(fail_on_warning=False) + out = self.run_ior_with_pool(fail_on_warning=False, create_cont=False) with_caching = IorCommand.get_ior_metrics(out) self.log_step('Verify cached read performance is greater than first read') @@ -78,4 +77,4 @@ def test_dfuse_caching_check(self): for base_read in base_read_arr: actual_change = percent_change(base_read[0][max_mib], with_caching[0][max_mib]) if actual_change < read_x: - self.fail('Expected a speedup of {} but got {}'.format(read_x, actual_change)) + self.fail(f'Expected a speedup of {read_x} but got {actual_change}') diff --git a/src/tests/ftest/dfuse/caching_check.yaml b/src/tests/ftest/dfuse/caching_check.yaml index 42a9b3f3dc3..71e453114d4 100644 --- a/src/tests/ftest/dfuse/caching_check.yaml +++ b/src/tests/ftest/dfuse/caching_check.yaml @@ -1,7 +1,9 @@ hosts: test_servers: 3 test_clients: 1 + timeout: 300 + server_config: name: daos_server engines_per_host: 1 @@ -10,24 +12,25 @@ server_config: log_mask: INFO storage: auto system_ram_reserved: 64 + pool: size: 50% + container: type: POSIX - control_method: daos + ior: client_processes: ppn: 32 test_file: testFile api: POSIX - dfs_destroy: false transfer_size: 1M block_size: 1G - dfs_oclass: "EC_2P1G1" - read_x: 3 # 300% - iorflags: - - "-v -w -k -G 3" - - "-v -r -k -G 3" + dfs_oclass: EC_2P1G1 + read_x: 2.5 # 250% + flags_write: "-v -w -k -G 3" + flags_read: "-v -r -k -G 3" + dfuse: disable_caching: true disable_wb_caching: true From 0f60bbfe21b798c742bb0061f8a1d257850ed5b9 Mon Sep 17 00:00:00 2001 From: Liu Xuezhao Date: Tue, 10 Feb 2026 12:36:26 +0800 Subject: [PATCH 195/253] DAOS-18487 rebuild: refine rebuild_tgt_query (#17508) dms.dm_migrating status only can be trust when scan global done, so get rt_global_scan_done firstly before checking dms.dm_migrating. and some log refining. Signed-off-by: Xuezhao Liu Co-authored-by: Liang Zhen Co-authored-by: Wang Shilong --- src/container/srv_container.c | 2 +- src/object/srv_obj_migrate.c | 16 ++++++++-------- src/rebuild/srv.c | 15 +++++++++++---- 3 files changed, 20 insertions(+), 13 deletions(-) diff --git a/src/container/srv_container.c b/src/container/srv_container.c index c2d39f5ea1d..3efa3211f89 100644 --- a/src/container/srv_container.c +++ b/src/container/srv_container.c @@ -1914,7 +1914,7 @@ ds_cont_tgt_refresh_track_eph(uuid_t pool_uuid, uuid_t cont_uuid, rc = ds_pool_thread_collective( pool_uuid, PO_COMP_ST_NEW | PO_COMP_ST_DOWN | PO_COMP_ST_DOWNOUT, cont_refresh_track_eph_one, &arg, DSS_ULT_DEEP_STACK | DSS_ULT_FL_PERIODIC); - DL_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, rc, + DL_CDEBUG(rc != 0, DLOG_ERR, DLOG_DBG, rc, DF_CONT ": refresh ec_agg_eph " DF_X64 ", " "stable_eph " DF_X64, DP_CONT(pool_uuid, cont_uuid), ec_agg_eph, stable_eph); diff --git a/src/object/srv_obj_migrate.c b/src/object/srv_obj_migrate.c index bfd8a5c324a..ea3d78640a8 100644 --- a/src/object/srv_obj_migrate.c +++ b/src/object/srv_obj_migrate.c @@ -751,7 +751,7 @@ mrone_obj_fetch(struct migrate_one *mrone, daos_handle_t oh, d_sg_list_t *sgls, int rc = 0; if (tls->mpt_fini) { - D_WARN("someone aborted the rebuild " DF_UUID "\n", DP_UUID(mrone->mo_pool_uuid)); + D_WARN(DF_RB " someone aborted the rebuild", DP_RB_MRO(mrone)); D_GOTO(out, rc = migrate_pool_tls_get_status(tls)); } @@ -2018,7 +2018,7 @@ migrate_one_ult(void *arg) tls = mrone->mo_tls; if (tls->mpt_fini) { - D_WARN("someone aborted the rebuild " DF_UUID "\n", DP_UUID(mrone->mo_pool_uuid)); + D_WARN(DF_RB " someone aborted the rebuild", DP_RB_MRO(mrone)); goto out; } @@ -2474,8 +2474,8 @@ migrate_one_create(struct enum_unpack_arg *arg, struct dc_obj_enum_unpack_io *io int rc = 0; if (tls->mpt_fini) { - D_WARN("someone aborted the rebuild " DF_UUID "dkey " DF_KEY "iod_nr %d\n", - DP_UUID(iter_arg->pool_uuid), DP_KEY(dkey), iod_eph_total); + D_WARN("someone aborted the rebuild " DF_UUID " ver %d, dkey " DF_KEY "iod_nr %d\n", + DP_UUID(iter_arg->pool_uuid), version, DP_KEY(dkey), iod_eph_total); D_GOTO(out, rc = 0); } D_DEBUG(DB_REBUILD, DF_RB ": migrate dkey " DF_KEY " iod nr %d\n", DP_RB_MPT(tls), @@ -2635,7 +2635,7 @@ migrate_enum_unpack_cb(struct dc_obj_enum_unpack_io *io, void *data) return rc; if (tls->mpt_fini) { - D_WARN("someone aborted the rebuild " DF_UUID "\n", DP_UUID(arg->arg->pool_uuid)); + D_WARN(DF_RB " someone aborted the rebuild", DP_RB_MPT(tls)); D_GOTO(put, rc = 0); } @@ -2787,7 +2787,7 @@ migrate_obj_punch_one(void *data) tls = arg->pool_tls; if (tls->mpt_fini) { - D_WARN("someone aborted the rebuild " DF_UUID "\n", DP_UUID(arg->pool_uuid)); + D_WARN(DF_RB " someone aborted the rebuild", DP_RB_MPT(tls)); D_GOTO(out, rc = 0); } @@ -2835,7 +2835,7 @@ migrate_start_ult(struct enum_unpack_arg *unpack_arg) int rc = 0; if (tls->mpt_fini) { - D_WARN("someone aborted the rebuild " DF_UUID "\n", DP_UUID(arg->pool_uuid)); + D_WARN(DF_RB " someone aborted the rebuild", DP_RB_MPT(tls)); D_GOTO(out, rc = 0); } d_list_for_each_entry_safe(mrone, tmp, &unpack_arg->merge_list, @@ -3242,7 +3242,7 @@ migrate_obj_ult(void *data) migrate_obj_get(arg); tls = arg->pool_tls; if (tls->mpt_fini) { - D_WARN("someone aborted the rebuild " DF_UUID "\n", DP_UUID(arg->pool_uuid)); + D_WARN(DF_RB " someone aborted the rebuild", DP_RB_MPT(tls)); D_GOTO(free_notls, rc); } diff --git a/src/rebuild/srv.c b/src/rebuild/srv.c index 87b27f03776..aeb8b2daa87 100644 --- a/src/rebuild/srv.c +++ b/src/rebuild/srv.c @@ -292,10 +292,14 @@ rebuild_leader_set_status(struct rebuild_global_pool_tracker *rgt, D_DEBUG(DB_REBUILD, DF_RB " rank %d, update dtx_resync_version from %d to %d", DP_RB_RGT(rgt), rank, status->dtx_resync_version, resync_ver); status->dtx_resync_version = resync_ver; - if (flags & SCAN_DONE) + if ((flags & SCAN_DONE) && !status->scan_done) { + D_DEBUG(DB_REBUILD, DF_RB " rank %d is scan_done", DP_RB_RGT(rgt), rank); status->scan_done = 1; - if (flags & PULL_DONE) + } + if ((flags & PULL_DONE) && !status->pull_done) { + D_DEBUG(DB_REBUILD, DF_RB " rank %d is pull_done", DP_RB_RGT(rgt), rank); status->pull_done = 1; + } } static void @@ -656,11 +660,14 @@ rebuild_tgt_query(struct rebuild_tgt_pool_tracker *rpt, struct ds_migrate_status dms = { 0 }; struct rebuild_pool_tls *tls; struct rebuild_tgt_query_arg arg; + bool global_scan_done; int rc; + /* Get rt_global_scan_done before querying dms.dm_migrating status */ + global_scan_done = rpt->rt_global_scan_done; + arg.rpt = rpt; arg.status = status; - if (rpt->rt_rebuild_op != RB_OP_RECLAIM && rpt->rt_rebuild_op != RB_OP_FAIL_RECLAIM) { rc = ds_migrate_query_status(rpt->rt_pool_uuid, rpt->rt_rebuild_ver, rpt->rt_rebuild_gen, rpt->rt_rebuild_op, @@ -686,7 +693,7 @@ rebuild_tgt_query(struct rebuild_tgt_pool_tracker *rpt, status->obj_count += dms.dm_obj_count; status->rec_count = dms.dm_rec_count; status->size = dms.dm_total_size; - if (status->scanning || dms.dm_migrating) + if (!global_scan_done || status->scanning || dms.dm_migrating) status->rebuilding = true; else status->rebuilding = false; From 103e8b32614551dc5a97ce8762bb936f2c1aa3a3 Mon Sep 17 00:00:00 2001 From: Dalton Bohning Date: Tue, 10 Feb 2026 09:48:14 -0800 Subject: [PATCH 196/253] DAOS-18588 build: Create 2.7.104-tb (#17537) Signed-off-by: Dalton Bohning --- TAG | 2 +- VERSION | 2 +- utils/rpms/daos.changelog | 3 +++ utils/rpms/daos.spec | 4 ++-- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/TAG b/TAG index 0a41151c039..069a65ddf86 100644 --- a/TAG +++ b/TAG @@ -1 +1 @@ -2.7.103-tb +2.7.104-tb diff --git a/VERSION b/VERSION index da75c3b7334..1b2efb48149 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.7.103 +2.7.104 diff --git a/utils/rpms/daos.changelog b/utils/rpms/daos.changelog index 3a0a53f5251..ac92201275b 100644 --- a/utils/rpms/daos.changelog +++ b/utils/rpms/daos.changelog @@ -1,4 +1,7 @@ %changelog +* Tue Feb 10 2026 Dalton Bohning 2.7.104-1 +- Bump version to 2.7.104 + * Fri Jan 16 2026 Jerome Soumagne 2.7.103-2 - Drop libfabric-devel build requirement - Drop libfabric requirement that is already provided by mercury-libfabric diff --git a/utils/rpms/daos.spec b/utils/rpms/daos.spec index 2a1827b0176..b4b8af94452 100644 --- a/utils/rpms/daos.spec +++ b/utils/rpms/daos.spec @@ -23,8 +23,8 @@ %endif Name: daos -Version: 2.7.103 -Release: 2%{?relval}%{?dist} +Version: 2.7.104 +Release: 1%{?relval}%{?dist} Summary: DAOS Storage Engine License: BSD-2-Clause-Patent From 7d40f64b4d1e1c7d8d8e0204d50db78e03ae5127 Mon Sep 17 00:00:00 2001 From: 0xE0F Date: Wed, 11 Feb 2026 09:02:40 +1100 Subject: [PATCH 197/253] DAOS-16362 pydaos: ensure checkpoint path is created (#17489) Some use of Checkpoint assumes that path to the checkpoiunt file will be created with all missing parent directories. For instance, DLIO benchmark writes checkpoints as `/prefix/global_epochX_stepY/layer-Z.pt`. This commit adds `ensure_path` parameter to call `mkdirall` before writing checkpoint file. Signed-off-by: Denis Barakhtanov --- src/client/pydaos/torch/torch_api.py | 52 +++++++++++++++----- src/client/pydaos/torch/torch_shim.c | 36 +++++++++++++- src/tests/ftest/pytorch/checkpoint.py | 68 ++++++++++++++++++++------- 3 files changed, 127 insertions(+), 29 deletions(-) diff --git a/src/client/pydaos/torch/torch_api.py b/src/client/pydaos/torch/torch_api.py index 9225c97e8d1..73ed2f22dbb 100644 --- a/src/client/pydaos/torch/torch_api.py +++ b/src/client/pydaos/torch/torch_api.py @@ -1,6 +1,6 @@ # # (C) Copyright 2024-2025 Google LLC -# (C) Copyright 2024-2025 Enakta Labs Ltd +# (C) Copyright 2024-2026 Enakta Labs Ltd # # SPDX-License-Identifier: BSD-2-Clause-Patent # @@ -11,11 +11,14 @@ In addition, it provides Checkpoint class to save and load PyTorch model checkpoints. """ +import errno import io import math import os import stat +import sys from multiprocessing import Process, Queue +from pathlib import Path from torch.utils.data import Dataset as TorchDataset from torch.utils.data import IterableDataset as TorchIterableDataset @@ -372,15 +375,19 @@ def __init__(self, dfs, path, mode, open_flags, class_name, self._workers.append(worker) def _worker_fn(self, queue): - self._dfs.worker_init() - while True: - work = queue.get() - if work is None: - break - - (offset, chunk) = work - self._dfs.write(self._path, self._mode, self._oflags, - self._class_name, self._file_chunk_size, offset, chunk) + try: + self._dfs.worker_init() + while True: + work = queue.get() + if work is None: + break + + (offset, chunk) = work + self._dfs.write(self._path, self._mode, self._oflags, + self._class_name, self._file_chunk_size, offset, chunk) + # pylint: disable=broad-exception-caught + except Exception as e: + sys.exit(getattr(e, 'errno', errno.EIO)) def write(self, data): """ Writes data to the buffer.""" @@ -431,6 +438,11 @@ def close(self): for worker in self._workers: worker.join() + # lets see if any worker exited abnormally and if so, raise an error + for worker in self._workers: + if worker.exitcode != 0: + raise OSError(worker.exitcode, os.strerror(worker.exitcode)) + super().close() def _flush(self): @@ -619,13 +631,16 @@ def reader(self, file, stream=None): stream.seek(0) return stream - def writer(self, file): + def writer(self, file, ensure_path=True): """ Returns write buffer to save the checkpoint file """ if file is None: raise ValueError("file is required") path = os.path.join(self._prefix, file) + if ensure_path: + self._dfs.mkdirall(os.path.dirname(path)) + return WriteBuffer(self._dfs, path, self._mode, self._oflags, self._class_name, self._file_chunk_size, self._transfer_chunk_size, self._chunks_limit, self._workers) @@ -810,3 +825,18 @@ def get_file_size(self, path): if ret != 0: raise OSError(ret, os.strerror(ret), path) return size + + def mkdirall(self, path, mode=0o755): + """ Creates directory, making parent directories if needed """ + + path = os.path.normpath(path) + dirs = list(Path(path).parts) + if not dirs: + raise ValueError(f"invalid path: {path}") + + parent = dirs.pop(0) + for name in dirs: + parent = os.path.join(parent, name) + ret = torch_shim.torch_mkdir(DAOS_MAGIC, self._dfs, parent, mode) + if ret not in (0, errno.EEXIST): + raise OSError(ret, os.strerror(ret), parent) diff --git a/src/client/pydaos/torch/torch_shim.c b/src/client/pydaos/torch/torch_shim.c index 73d93df45e8..5027ab690f8 100644 --- a/src/client/pydaos/torch/torch_shim.c +++ b/src/client/pydaos/torch/torch_shim.c @@ -1,7 +1,7 @@ /** * (C) Copyright 2019-2024 Intel Corporation. * (C) Copyright 2024-2025 Google LLC - * (C) Copyright 2024-2025 Enakta Labs Ltd + * (C) Copyright 2024-2026 Enakta Labs Ltd * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -1061,6 +1061,39 @@ __shim_handle__torch_get_fsize(PyObject *self, PyObject *args) return Py_BuildValue("iK", rc, st.st_size); } +static PyObject * +__shim_handle__torch_mkdir(PyObject *self, PyObject *args) +{ + struct dfs_handle *hdl = NULL; + char *path = NULL; + char *dir = NULL; + char *name = NULL; + mode_t mode; + dfs_obj_t *parent = NULL; + + RETURN_NULL_IF_FAILED_TO_PARSE(args, "LsI", &hdl, &path, &mode); + + assert(hdl->dfs != NULL); + + int rc = split_path(path, &dir, &name); + if (rc) { + return PyLong_FromLong(rc); + } + + rc = lookup_or_insert_dir_obj(hdl, dir, &parent); + if (rc) { + D_ERROR("Could not lookup '%s': %s (rc=%d)", dir, strerror(rc), rc); + goto out; + } + + rc = dfs_mkdir(hdl->dfs, parent, name, mode, 0); + +out: + D_FREE(dir); + D_FREE(name); + return PyLong_FromLong(rc); +} + /** * Python shim module */ @@ -1080,6 +1113,7 @@ static PyMethodDef torchMethods[] = { EXPORT_PYTHON_METHOD(torch_recommended_dir_split), EXPORT_PYTHON_METHOD(torch_list_with_anchor), EXPORT_PYTHON_METHOD(torch_get_fsize), + EXPORT_PYTHON_METHOD(torch_mkdir), EXPORT_PYTHON_METHOD(module_init), EXPORT_PYTHON_METHOD(module_fini), diff --git a/src/tests/ftest/pytorch/checkpoint.py b/src/tests/ftest/pytorch/checkpoint.py index fd450768db5..685491dacc9 100644 --- a/src/tests/ftest/pytorch/checkpoint.py +++ b/src/tests/ftest/pytorch/checkpoint.py @@ -1,9 +1,10 @@ """ (C) Copyright 2025 Google LLC - (C) Copyright 2025 Enakta Labs Ltd + (C) Copyright 2025-2026 Enakta Labs Ltd SPDX-License-Identifier: BSD-2-Clause-Patent """ +import errno import os import uuid @@ -73,6 +74,41 @@ def test_checkpoint_chunking(self): chunk_size=chunk_size, chunks_limit=chunks_limit, workers=worker) + def test_checkpoint_nested_directories(self): + """ Test Pytorch Checkpoint interface with nested directories + Test Description: Ensure that parent directories are created for the checkpoint path + + :avocado: tags=all,full_regression + :avocado: tags=vm + :avocado: tags=pytorch + :avocado: tags=PytorchCheckpointTest,test_checkpoint_nested_directories + """ + + pool = self.get_pool() + container = self.get_container(pool) + + d1, d2 = str(uuid.uuid4()), str(uuid.uuid4()) + files = ["/file.pt", f"/{d1}/file.pt", f"/{d1}/{d2}/file.pt"] + + # by default parent directories should be created + with Checkpoint(pool.identifier, container.identifier) as pt: + for name in files: + with pt.writer(name) as w: + w.write(os.urandom(4096)) + + # ensure that it fails with expected exception + try: + with Checkpoint(pool.identifier, container.identifier) as pt: + fname = f"/{str(uuid.uuid4())}/file.pt" + with pt.writer(fname, ensure_path=False) as w: + w.write(os.urandom(4096)) + raise RuntimeError("expected OSError with errno.ENOENT") + except OSError as e: + if e.errno != errno.ENOENT: + raise RuntimeError(f"expected errno.ENOENT, got {os.strerror(e.errno)}") from e + except Exception as e: + raise RuntimeError(f"unexpected error: {e}") from e + def _test_checkpoint(self, pool, cont, writes, chunk_size=0, chunks_limit=0, workers=0): """Creates a checkpoint with the given parameters, writes the given data to it, then reads written data back from it and compares it with the expected writes. @@ -80,19 +116,17 @@ def _test_checkpoint(self, pool, cont, writes, chunk_size=0, chunks_limit=0, wor self.log.info("Checkpoint test: writes=%s, chunk_size=%s, chunks_limit=%s, workers=%s", len(writes), chunk_size, chunks_limit, workers) - chkp = Checkpoint(pool, cont, transfer_chunk_size=chunk_size, chunks_limit=chunks_limit, - workers=workers) - - expected = bytearray() - fname = str(uuid.uuid4()) - with chkp.writer(fname) as w: - for chunk in writes: - w.write(chunk) - expected.extend(chunk) - - actual = chkp.reader(fname) - if expected != actual.getvalue(): - self.fail( - f"checkpoint did not read back the expected content for {len(writes)} writes," - f"chunk_size={chunk_size}, chunks_limit={chunks_limit}, workers={workers}") - del chkp + with Checkpoint(pool, cont, transfer_chunk_size=chunk_size, chunks_limit=chunks_limit, + workers=workers) as chkp: + expected = bytearray() + fname = str(uuid.uuid4()) + with chkp.writer(fname) as w: + for chunk in writes: + w.write(chunk) + expected.extend(chunk) + + actual = chkp.reader(fname) + if expected != actual.getvalue(): + self.fail( + f"checkpoint did not read back the expected content for {len(writes)} writes," + f"chunk_size={chunk_size}, chunks_limit={chunks_limit}, workers={workers}") From ba67727721fc8c9d6cf1b234d752336fa1037918 Mon Sep 17 00:00:00 2001 From: Samir Raval Date: Wed, 11 Feb 2026 22:08:09 +0530 Subject: [PATCH 198/253] DAOS-18125 doc: Created the DAOS telemetric document (#17086) Adding daos metrics command with an example to analyze the different metrics counter for cluster,pool,container and IO. Adding few key troubleshooting example for what to look from daos metrics in case of issue. Signed-off-by: ravalsam --- docs/admin/telemetry_guide.md | 365 ++++++++++++++++++++++++++++++++++ 1 file changed, 365 insertions(+) create mode 100644 docs/admin/telemetry_guide.md diff --git a/docs/admin/telemetry_guide.md b/docs/admin/telemetry_guide.md new file mode 100644 index 00000000000..eae31acdc8d --- /dev/null +++ b/docs/admin/telemetry_guide.md @@ -0,0 +1,365 @@ +# DAOS Telemetry Example + +This document will help to run daos metrics command and collect some key metrics from the +server to help debug the issues and analyze the system behavior. + +## How to run telemetry command: + +### Directly on server using daos_metrics command as sudo user + +- Example of collecting the pool query metrics on the servers using daos_metrics command. +- daos_metrics -S will show telemetry data from First I/O Engine (default 0) +- daos_metrics -S 1 will show telemetry data from Second I/O Engine, in case multiple engines are running per node. + +``` +$ sudo daos_metrics -C -S 0 | grep pool_query +ID: 0/pool/8259d3ff-523e-4a43-9248-26aba2a62f4c/ops/pool_query,0 +ID: 0/pool/8259d3ff-523e-4a43-9248-26aba2a62f4c/ops/pool_query_space,0 +$ sudo daos_metrics -C -S 1 | grep pool_query +ID: 1/pool/8259d3ff-523e-4a43-9248-26aba2a62f4c/ops/pool_query,12 +ID: 1/pool/8259d3ff-523e-4a43-9248-26aba2a62f4c/ops/pool_query_space,10 +``` + +### Dmg command on admin node (dmg telemetry metrics query) + +- Example of collecting the pool query metrics from individual servers using dmg command + +``` +$ sudo dmg telemetry metrics query -m engine_pool_ops_pool_query -l brd-221 +connecting to brd-221:9191... +- Metric Set: engine_pool_ops_pool_query (Type: Counter) + Total number of processed pool query operations + Metric Labels Value + ------ ------ ----- + Counter (pool=8259d3ff-523e-4a43-9248-26aba2a62f4c, rank=0) 0 + Counter (pool=8259d3ff-523e-4a43-9248-26aba2a62f4c, rank=1) 0 + +$ sudo dmg telemetry metrics query -m engine_pool_ops_pool_query -l brd-222 +connecting to brd-222:9191... +- Metric Set: engine_pool_ops_pool_query (Type: Counter) + Total number of processed pool query operations + Metric Labels Value + ------ ------ ----- + Counter (pool=8259d3ff-523e-4a43-9248-26aba2a62f4c, rank=2) 0 + Counter (pool=8259d3ff-523e-4a43-9248-26aba2a62f4c, rank=4) 0 +``` + +### Identify the pool UUID and leader rank: + + - Some metrics are only available on pool leader rank so identify the leader rank for that pool from the pool query command. + - Below is the example of pool query where leader rank is 1 + - Pool 55cc96d8-5c46-41f4-af29-881d293b6f6f, ntarget=48, disabled=0, `leader=1`, version=1, state=Ready + +``` +#sudo dmg pool query samir_pool +``` +``` +Pool 55cc96d8-5c46-41f4-af29-881d293b6f6f, ntarget=48, disabled=0, leader=1, version=1, state=Ready +Pool health info: +- Rebuild idle, 0 objs, 0 recs +Pool space info: +- Target count:48 +- Total memory-file size: 151 GB +- Metadata storage: + Total size: 151 GB + Free: 118 GB, min:2.5 GB, max:2.5 GB, mean:2.5 GB +- Data storage: + Total size: 600 GB + Free: 598 GB, min:12 GB, max:12 GB, mean:12 GB +``` + + - Find the leader rank address so that daos_metrics command can be run on that specific server. + In this example Rank 1 is on `brd-221.daos.hpc.amslabs.hpecorp.net` (`10.214.213.41`) + +``` +#sudo dmg system query -v +``` +``` +Rank UUID Control Address Fault Domain State Reason +---- ---- --------------- ------------ ----- ------ +0 6c481fea-b820-4b50-9845-6a5a04b4cfcf 10.214.213.41:10001 /brd-221.daos.hpc.amslabs.hpecorp.net Joined +1 43865b12-86d3-4107-afe8-3921f19bc9ff 10.214.213.41:10001 /brd-221.daos.hpc.amslabs.hpecorp.net Joined +2 eb413873-c13c-43ea-8bdf-21b691e169c9 10.214.212.229:10001 /brd-222.daos.hpc.amslabs.hpecorp.net Joined +3 607ad987-a55a-4365-ad6b-c4160ac5ff67 10.214.214.190:10001 /brd-223.daos.hpc.amslabs.hpecorp.net Joined +4 6c3d9b9a-2fff-4874-a7f0-309c4126a8e6 10.214.212.229:10001 /brd-222.daos.hpc.amslabs.hpecorp.net Joined +5 6884e5c9-b38b-46aa-b042-7fad9b37cf45 10.214.214.190:10001 /brd-223.daos.hpc.amslabs.hpecorp.net Joined +``` + - dmg command example based on leader Fault Domain (hostname) `-l brd-221` + +``` +$ sudo dmg telemetry metrics query -m engine_pool_ops_pool_query -l brd-221 +connecting to brd-221:9191... +- Metric Set: engine_pool_ops_pool_query (Type: Counter) + Total number of processed pool query operations + Metric Labels Value + ------ ------ ----- + Counter (pool=8259d3ff-523e-4a43-9248-26aba2a62f4c, rank=0) 0 + Counter (pool=8259d3ff-523e-4a43-9248-26aba2a62f4c, rank=1) 2 +``` + +OR + + - dmg command example based on Control Address `-l 10.214.213.41` + +``` +$ sudo dmg telemetry metrics query -m engine_pool_ops_pool_query -l 10.214.213.41 +connecting to 10.214.213.41:9191... +- Metric Set: engine_pool_ops_pool_query (Type: Counter) + Total number of processed pool query operations + Metric Labels Value + ------ ------ ----- + Counter (pool=8259d3ff-523e-4a43-9248-26aba2a62f4c, rank=0) 0 + Counter (pool=8259d3ff-523e-4a43-9248-26aba2a62f4c, rank=1) 2 + +``` + +## Engine Metrics: + +Admin can set environment variable for pool and container name for below example. + +``` +export MY_POOL=Test_pool +export MY_CONT=Test_cont +export MY_MOUNT=/tmp/daos_mount +``` + + +|Operation| Description | DAOS Command | Metrics Command | Output | +|:---:| :---: | :---: | :---: |:------------: | +|When engine started | Timestamp of last engine startup | None | `sudo daos_metrics -S 1 -C \| grep 'started_at' \| grep -v pool`|ID: 0/started_at,Tue Oct 28 23:21:24 2025| +|When engine become ready | Timestamp when the engine became ready | None | `sudo daos_metrics -S 1 -C \| grep 'servicing_at'`|ID: 0/servicing_at,Tue Oct 28 23:21:33 2025| +|Find Engine Rank ID | Rank ID of this engine | None | `sudo daos_metrics -S 1 -C \| grep '/rank' \| grep -v pool`|ID: 1/rank,276| +|check if Engine is dead | engine_events_dead_ranks | None | `sudo daos_metrics -S 1 -C \| grep '/dead'`| ID: 0/events/dead_ranks,1 | +|last event on rank | Timestamp of last received event | None | `sudo daos_metrics -S 1 -C \| grep '/last_event'`| ID: 1/events/last_event_ts,Thu Jan 1 00:00:00 1970 | + +## Pool Metrics: + +|Operation| Description | DAOS Command | Metrics Command | Output | +|:---:| :---: | :---: | :---: |:------------: | +|With No Pools| Total number of processed pool connect operations | None | `sudo daos_metrics -C -S 0 \| grep 'ops/pool'`|None| +|After creating single pool| | dmg pool create $MY_POOL | `sudo daos_metrics -C -S 0 \| grep 'ops/pool'`| ID: 1/pool/55cc96d8-5c46-41f4-af29-881d293b6f6f/ops/pool_evict,0
ID: 1/pool/55cc96d8-5c46-41f4-af29-881d293b6f6f/ops/pool_connect,0
ID: 1/pool/55cc96d8-5c46-41f4-af29-881d293b6f6f/ops/pool_disconnect,0
ID: 1/pool/55cc96d8-5c46-41f4-af29-881d293b6f6f/ops/pool_query,0
ID: 1/pool/55cc96d8-5c46-41f4-af29-881d293b6f6f/ops/pool_query_space,0| +|After querying the single pool without storage| Total number of processed pool query operations | dmg pool query $MY_POOL -t | `sudo daos_metrics -C -S 0 \| grep 'ops/pool_query'` | ID: 1/pool/55cc96d8-5c46-41f4-af29-881d293b6f6f/ops/pool_query,1| +|After querying the single pool with storage | Total number of processed pool query (with operation) operations | dmg pool query $MY_POOL | `sudo daos_metrics -C -S 0 \| grep 'ops/pool_query_space'` | ID: 1/pool/55cc96d8-5c46-41f4-af29-881d293b6f6f/ops/pool_query_space,1| +|After Creating Container| Total number of processed pool connect operations | daos cont create $MY_POOL $MY_CONT| `sudo daos_metrics -C -S 0 \| grep 'ops/pool_connect'`| ID: 1/pool/55cc96d8-5c46-41f4-af29-881d293b6f6f/ops/pool_connect,1| +|After Creating Container| Total number of processed pool disconnect operations | daos cont create $MY_POOL $MY_CONT| `sudo daos_metrics -C -S 0 \| grep 'ops/pool_disconnect'`|ID: 1/pool/55cc96d8-5c46-41f4-af29-881d293b6f6f/ops/pool_disconnect,1| +|After Mounting FUSE Container| Total number of processed pool connect operations | dfuse -m $MY_MOUNT -p $MY_POOL -c $MY_CONT| `sudo daos_metrics -C -S 0 \| grep 'ops/pool_connect'`| ID: 1/pool/55cc96d8-5c46-41f4-af29-881d293b6f6f/ops/pool_connect,2| +|After Unmounting FUSE Container| Total number of processed pool disconnect operations | fusermount3 -u -m $MY_MOUNT | `sudo daos_metrics -C -S 0 \| grep 'ops/pool_disconnect'`| ID: 1/pool/55cc96d8-5c46-41f4-af29-881d293b6f6f/ops/pool_disconnect,2| +|After Pool evict | Total number of pool handle evict operations | dmg pool evict $MY_POOL | `sudo daos_metrics -C -S 0 \| grep 'ops/pool_evict'`| ID: 1/pool/55cc96d8-5c46-41f4-af29-881d293b6f6f/ops/pool_evict,2| + +## Container Metrics: + +|Operation| Description | DAOS Command | Metrics Command | Output | +|:---:| :---: | :---: | :---: |:------------: | +| Container creation | Total number of successful container create operations | daos cont create $MY_POOL $MY_CONT --type='POSIX' | `sudo daos_metrics -C -S 0 \| grep cont_create \| grep `|ID: 0/pool/c22c6a6c-7e31-4788-90a4-a55d1083d57b/ops/cont_create,1| +| Container query | Total number of successful container query operations | daos container query $MY_POOL $MY_CONT | `sudo daos_metrics -C -S 0 \| grep cont_query \| grep `|ID: 0/pool/c22c6a6c-7e31-4788-90a4-a55d1083d57b/ops/cont_query,4| +| Container open | Total number of successful container open operations | dfuse -m $MY_MOUNT -p $MY_POOL -c $MY_CONT | `sudo daos_metrics -C -S 0 \| grep cont_open \| grep `|ID: 0/pool/c22c6a6c-7e31-4788-90a4-a55d1083d57b/ops/cont_open,3| +| Container destroy | Total number of successful container destroy operations | daos cont destroy $MY_POOL $MY_CONT | `sudo daos_metrics -C -S 0 \| grep cont_destroy \| grep `|ID: 0/pool/c22c6a6c-7e31-4788-90a4-a55d1083d57b/ops/cont_destroy,1| + +## I/O Metrics: + +|Operation| Description | DAOS Command | Metrics Command | Output | +|:---:| :---: | :---: | :---: |:------------: | +| data written | Total number of bytes updated/written |Write the Data using any IO | `sudo daos_metrics -C -S 1 \| grep \| grep 'xferred/update'`|ID: 1/pool/e63e81dd-7d5d-4622-8196-83256b12326c/xferred/update/tgt_0,1335885824
ID: 1/pool/e63e81dd-7d5d-4622-8196-83256b12326c/xferred/update/tgt_7,1337983064
ID: 1/pool/e63e81dd-7d5d-4622-8196-83256b12326c/xferred/update/tgt_2,1342177280
ID: 1/pool/e63e81dd-7d5d-4622-8196-83256b12326c/xferred/update/tgt_4,1325400064
ID: 1/pool/e63e81dd-7d5d-4622-8196-83256b12326c/xferred/update/tgt_6,1337982976
ID: 1/pool/e63e81dd-7d5d-4622-8196-83256b12326c/xferred/update/tgt_5,1384120320
ID: 1/pool/e63e81dd-7d5d-4622-8196-83256b12326c/xferred/update/tgt_1,1332740096
ID: 1/pool/e63e81dd-7d5d-4622-8196-83256b12326c/xferred/update/tgt_3,1341128828| +| data read | Total number of bytes fetched/read | Read the Data using any IO | `sudo daos_metrics -C -S 1 \| grep \| grep 'xferred/fetch'`|ID: 1/pool/e63e81dd-7d5d-4622-8196-83256b12326c/xferred/fetch/tgt_0,1335885824
ID: 1/pool/e63e81dd-7d5d-4622-8196-83256b12326c/xferred/fetch/tgt_7,1337983240
ID: 1/pool/e63e81dd-7d5d-4622-8196-83256b12326c/xferred/fetch/tgt_2,1342177280
ID: 1/pool/e63e81dd-7d5d-4622-8196-83256b12326c/xferred/fetch/tgt_4,1325400064
ID: 1/pool/e63e81dd-7d5d-4622-8196-83256b12326c/xferred/fetch/tgt_6,1337982976
ID: 1/pool/e63e81dd-7d5d-4622-8196-83256b12326c/xferred/fetch/tgt_5,1384120320
ID: 1/pool/e63e81dd-7d5d-4622-8196-83256b12326c/xferred/fetch/tgt_1,1332740096
ID: 1/pool/e63e81dd-7d5d-4622-8196-83256b12326c/xferred/fetch/tgt_3,1341129076| +| Write IOPS operation | Total number of processed object RPCs | Write the Data using any IO | `sudo daos_metrics -S 1 -C \| grep \| grep 'ops/update'`|ID: 1/pool/8259d3ff-523e-4a43-9248-26aba2a62f4c/ops/update/tgt_6,222
ID: 1/pool/8259d3ff-523e-4a43-9248-26aba2a62f4c/ops/update/tgt_2,204
ID: 1/pool/8259d3ff-523e-4a43-9248-26aba2a62f4c/ops/update/tgt_5,210
ID: 1/pool/8259d3ff-523e-4a43-9248-26aba2a62f4c/ops/update/tgt_7,223
ID: 1/pool/8259d3ff-523e-4a43-9248-26aba2a62f4c/ops/update/tgt_4,224
ID: 1/pool/8259d3ff-523e-4a43-9248-26aba2a62f4c/ops/update/tgt_0,196
ID: 1/pool/8259d3ff-523e-4a43-9248-26aba2a62f4c/ops/update/tgt_1,198
ID: 1/pool/8259d3ff-523e-4a43-9248-26aba2a62f4c/ops/update/tgt_3,206| +| Read IOPS operation | Total number of processed object RPCs | Read the Data using any IO | `sudo daos_metrics -S 1 -C \| grep \| grep 'ops/fetch'`|ID: 1/pool/8259d3ff-523e-4a43-9248-26aba2a62f4c/ops/fetch/tgt_6,234
ID: 1/pool/8259d3ff-523e-4a43-9248-26aba2a62f4c/ops/fetch/tgt_2,206
ID: 1/pool/8259d3ff-523e-4a43-9248-26aba2a62f4c/ops/fetch/tgt_5,215
ID: 1/pool/8259d3ff-523e-4a43-9248-26aba2a62f4c/ops/fetch/tgt_7,214
ID: 1/pool/8259d3ff-523e-4a43-9248-26aba2a62f4c/ops/fetch/tgt_4,225
ID: 1/pool/8259d3ff-523e-4a43-9248-26aba2a62f4c/ops/fetch/tgt_0,192
ID: 1/pool/8259d3ff-523e-4a43-9248-26aba2a62f4c/ops/fetch/tgt_1,202
ID: 1/pool/8259d3ff-523e-4a43-9248-26aba2a62f4c/ops/fetch/tgt_3,221| +| IO latency Update | update RPC processing time | Write the Data using 1MiB xfersize | `sudo daos_metrics -S 1 -C \| grep 'io/latency/update'`|ID: 1/io/latency/update/1MB/tgt_0,34423,9173,239956,24216.092267,1875,45405173,35287.151469
ID: 1/io/latency/update/1MB/tgt_1,34824,9195,224337,24619.489373,1882,46333879,35692.908836
ID: 1/io/latency/update/1MB/tgt_2,17586,9187,246820,25627.308223,1885,48307476,37184.782868
ID: 1/io/latency/update/1MB/tgt_3,60684,9182,264286,25998.202265,1943,50514507,38227.372221
ID: 1/io/latency/update/1MB/tgt_4,83487,9193,235707,26626.855799,1914,50963802,37382.179815
ID: 1/io/latency/update/1MB/tgt_5,26402,9200,235859,24656.685802,1951,48105194,34931.529382
ID: 1/io/latency/update/1MB/tgt_6,107294,9190,244975,26761.485861,1945,52051090,38022.684882
ID: 1/io/latency/update/1MB/tgt_7,79041,9213,219362,25710.023921,1923,49440376,36611.272385| +| IO latency Fetch | fetch RPC processing time | Read the Data using 1MiB xfersize | `sudo daos_metrics -S 1 -C \| grep 'io/latency/fetch'`|ID: 1/io/latency/fetch/1MB/tgt_0,29630,9419,225908,19060.848723,1527,29105916,26764.971072
ID: 1/io/latency/fetch/1MB/tgt_1,18329,9406,343931,17769.093144,1546,27471018,23882.809783
ID: 1/io/latency/fetch/1MB/tgt_2,9887,9385,131315,18075.996768,1547,27963567,22973.594024
ID: 1/io/latency/fetch/1MB/tgt_3,39508,9411,155136,19332.508228,1580,30545363,25593.694908
ID: 1/io/latency/fetch/1MB/tgt_4,22616,9413,412206,19062.688062,1558,29699668,27359.057624
ID: 1/io/latency/fetch/1MB/tgt_5,22280,9418,126520,17382.379032,1612,28020395,20937.262665
ID: 1/io/latency/fetch/1MB/tgt_6,40743,9409,207370,18697.681472,1576,29467546,23236.574768
ID: 1/io/latency/fetch/1MB/tgt_7,24048,9417,112182,17725.164955,1558,27615807,21375.496411| + +## Troubleshooting: + +### No response to pool query or any I/O operation + +In case of no response to any dmg pool command or any I/O operation means any one of the single xstream might have stuck. Either ULT is stuck or NVMe cannot respond to I/O operation. +To check if ULT is stuck, run metrics command on each server and check for sched/cycle_duration and sched/cycle_size. +In this case sched/cycle_duration and sched/cycle_size for stuck xstream counter value is higher (outlier) compared to other xstream and ULT count. + +- sched/cycle_duration: Schedule cycle duration, units: ms +- sched/cycle_size: Schedule cycle size, units: ULT + +Below is the example on real system where ULT was stuck and not responding. You can see the outlier for xs_3\ +**xs_3: 87624970 ms**\ +**xs_3: 72508 ULT** +``` +# sudo daos_metrics -C -S 0 | grep -e cycle + + cycle_duration + xs_0: 4 ms [min: 0, max: 736, avg: 1, sum: 4374707, stddev: 2, samples: 4337768] + xs_1: 0 ms [min: 0, max: 4, avg: 0, sum: 12, stddev: 1, samples: 57] + xs_2: 1000 ms [min: 0, max: 1008, avg: 1001, sum: 170805474, stddev: 5, samples: 170636] + xs_3: 87624970 ms [min: 0, max: 87624970, avg: 0, sum: 155215898, stddev: 677, samples: 16729436166] + xs_4: 0 ms [min: 0, max: 480, avg: 0, sum: 170775422, stddev: 0, samples: 52866364909] + xs_5: 0 ms [min: 0, max: 532, avg: 0, sum: 170774266, stddev: 0, samples: 53277804155] + xs_6: 0 ms [min: 0, max: 457, avg: 0, sum: 170775310, stddev: 0, samples: 52654423202] + xs_7: 0 ms [min: 0, max: 449, avg: 0, sum: 170774942, stddev: 0, samples: 53289078146] + xs_8: 0 ms [min: 0, max: 696, avg: 0, sum: 170779578, stddev: 0, samples: 53348599756] + xs_9: 0 ms [min: 0, max: 444, avg: 0, sum: 170775582, stddev: 0, samples: 53085628214] + xs_10: 0 ms [min: 0, max: 456, avg: 0, sum: 170775386, stddev: 0, samples: 53361992047] + xs_11: 0 ms [min: 0, max: 668, avg: 0, sum: 170779354, stddev: 0, samples: 52868332788] + xs_12: 0 ms [min: 0, max: 664, avg: 0, sum: 170779222, stddev: 0, samples: 53207853905] + xs_13: 0 ms [min: 0, max: 484, avg: 0, sum: 170778230, stddev: 0, samples: 53161107629] + xs_14: 0 ms [min: 0, max: 452, avg: 0, sum: 170778690, stddev: 0, samples: 54026864334] + xs_15: 0 ms [min: 0, max: 664, avg: 0, sum: 170779106, stddev: 0, samples: 53240085110] + xs_16: 0 ms [min: 0, max: 588, avg: 0, sum: 170778746, stddev: 0, samples: 53324006952] + xs_17: 0 ms [min: 0, max: 664, avg: 0, sum: 170778646, stddev: 0, samples: 53244261876] + xs_18: 0 ms [min: 0, max: 452, avg: 0, sum: 170779198, stddev: 0, samples: 53498338576] + xs_19: 4 ms [min: 0, max: 108, avg: 0, sum: 30913, stddev: 1, samples: 461742] + xs_20: 0 ms [min: 0, max: 112, avg: 0, sum: 30832, stddev: 1, samples: 460370] + xs_21: 0 ms [min: 0, max: 112, avg: 0, sum: 31340, stddev: 1, samples: 461099] + xs_22: 0 ms [min: 0, max: 116, avg: 0, sum: 105495174, stddev: 0, samples: 92074321933] + cycle_size + xs_0: 1 ULT [min: 1, max: 672, avg: 1, sum: 4486893, stddev: 1, samples: 4337768] + xs_1: 1 ULT [min: 1, max: 15, avg: 2, sum: 116, stddev: 2, samples: 57] + xs_2: 1 ULT [min: 1, max: 1, avg: 1, sum: 170636, stddev: 0, samples: 170636] + xs_3: 72508 ULT [min: 1, max: 72508, avg: 1, sum: 16944980993, stddev: 1, samples: 16729436166] + xs_4: 1 ULT [min: 1, max: 253, avg: 1, sum: 53106562082, stddev: 0, samples: 52866364919] + xs_5: 1 ULT [min: 1, max: 293, avg: 1, sum: 53517385848, stddev: 0, samples: 53277804166] + xs_6: 1 ULT [min: 1, max: 262, avg: 1, sum: 52893882375, stddev: 0, samples: 52654423213] + xs_7: 1 ULT [min: 1, max: 263, avg: 1, sum: 53529014337, stddev: 0, samples: 53289078157] + xs_8: 1 ULT [min: 1, max: 269, avg: 1, sum: 53588382832, stddev: 0, samples: 53348599768] + xs_9: 1 ULT [min: 1, max: 538, avg: 1, sum: 53325349666, stddev: 0, samples: 53085628225] + xs_10: 1 ULT [min: 1, max: 440, avg: 1, sum: 53601721471, stddev: 0, samples: 53361992058] + xs_11: 1 ULT [min: 1, max: 365, avg: 1, sum: 53108191221, stddev: 0, samples: 52868332799] + xs_12: 1 ULT [min: 1, max: 268, avg: 1, sum: 53447917652, stddev: 0, samples: 53207853917] + xs_13: 1 ULT [min: 1, max: 258, avg: 1, sum: 53400854712, stddev: 0, samples: 53161107641] + xs_14: 1 ULT [min: 1, max: 265, avg: 1, sum: 54266784187, stddev: 0, samples: 54026864345] + xs_15: 1 ULT [min: 1, max: 440, avg: 1, sum: 53480318341, stddev: 0, samples: 53240085122] + xs_16: 1 ULT [min: 1, max: 270, avg: 1, sum: 53564352374, stddev: 0, samples: 53324006963] + xs_17: 1 ULT [min: 1, max: 273, avg: 1, sum: 53484431253, stddev: 0, samples: 53244261888] + xs_18: 1 ULT [min: 1, max: 275, avg: 1, sum: 53738248689, stddev: 0, samples: 53498338588] + xs_19: 1 ULT [min: 1, max: 2, avg: 1, sum: 461743, stddev: 0, samples: 461742] + xs_20: 1 ULT [min: 1, max: 1, avg: 1, sum: 460370, stddev: 0, samples: 460370] + xs_21: 1 ULT [min: 1, max: 1, avg: 1, sum: 461099, stddev: 0, samples: 461099] + xs_22: 1 ULT [min: 1, max: 3, avg: 1, sum: 92074426829, stddev: 0, samples: 92074321962] +``` + +### Slow performance + +If DAOS system is performing slower, check write(update) & read(fetch) metrics to indicate the source of the problem across all engines. + +For example, mention below, one of the NVMe was impacting the overall IO performance because write BW on that specific drive was slower. As you can see two targets (tgt_0 & tgt_8) latency for 4MB write were too high compared to other targets. That indicate that specific drive is having lower write BW which increase the update latency too high. +This metrics are available in different IO size ranges from 256B to 4GB so looks for matching IO size used for testing the performance. Below example we used IOR write size 4MB. + +``` +#sudo daos_metrics -C -S 0 | grep 'io/latency/update' + +ID: 0/io/latency/update/4MB/tgt_0,16349826,733843,16349826,7329515.976190,42,307839671,4196687.177444 +ID: 0/io/latency/update/4MB/tgt_1,1260,1147,2191,1463.423077,52,76098,273.640909 +ID: 0/io/latency/update/4MB/tgt_2,1252,1122,2275,1452.000000,62,90024,272.896966 +ID: 0/io/latency/update/4MB/tgt_3,1637,1179,2639,1558.844444,45,70148,302.601219 +ID: 0/io/latency/update/4MB/tgt_4,1155,1119,2280,1496.857143,49,73346,281.746857 +ID: 0/io/latency/update/4MB/tgt_5,1804,1139,1920,1493.767442,43,64232,234.072520 +ID: 0/io/latency/update/4MB/tgt_6,1160,1136,2550,1560.862745,51,79604,293.899440 +ID: 0/io/latency/update/4MB/tgt_7,1399,1126,1969,1411.929825,57,80480,195.942125 +ID: 0/io/latency/update/4MB/tgt_8,15264368,857936,19645847,9109087.453125,64,582981597,5094157.829112 +ID: 0/io/latency/update/4MB/tgt_9,1601,1146,2455,1437.038462,52,74726,262.549712 +ID: 0/io/latency/update/4MB/tgt_10,1366,1138,2094,1459.828125,64,93429,228.692526 +ID: 0/io/latency/update/4MB/tgt_11,1118,1113,2742,1475.378788,66,97375,309.820731 +ID: 0/io/latency/update/4MB/tgt_12,1169,1158,2531,1492.392857,56,83574,270.312323 +ID: 0/io/latency/update/4MB/tgt_13,1477,1148,2204,1485.853659,41,60920,244.983118 +ID: 0/io/latency/update/4MB/tgt_14,1159,1159,2390,1523.333333,48,73120,318.466026 +ID: 0/io/latency/update/4MB/tgt_15,1511,1165,2318,1447.608696,46,66590,253.351094 + +#sudo daos_metrics -C -S 0 | grep 'io/latency/fetch' + +ID: 0/io/latency/fetch/4MB/tgt_0,1390,1099,2169,1380.785714,42,57993,202.810200 +ID: 0/io/latency/fetch/4MB/tgt_1,1902,1413,2956,1845.769231,52,95980,313.043041 +ID: 0/io/latency/fetch/4MB/tgt_2,1741,1395,2493,1783.983871,62,110607,226.501945 +ID: 0/io/latency/fetch/4MB/tgt_3,1543,1241,2568,1824.800000,45,82116,281.414092 +ID: 0/io/latency/fetch/4MB/tgt_4,1705,1506,2426,1850.020408,49,90651,232.079413 +ID: 0/io/latency/fetch/4MB/tgt_5,1579,1251,2396,1754.139535,43,75428,213.314275 +ID: 0/io/latency/fetch/4MB/tgt_6,1566,1262,2403,1747.823529,51,89139,260.631134 +ID: 0/io/latency/fetch/4MB/tgt_7,1663,1354,2912,1853.631579,57,105657,287.610267 +ID: 0/io/latency/fetch/4MB/tgt_8,1508,1051,2276,1417.562500,64,90724,271.118956 +ID: 0/io/latency/fetch/4MB/tgt_9,1508,1404,2468,1791.788462,52,93173,251.042324 +ID: 0/io/latency/fetch/4MB/tgt_10,1746,1453,2645,1796.203125,64,114957,230.458630 +ID: 0/io/latency/fetch/4MB/tgt_11,1695,1394,2416,1761.151515,66,116236,220.046376 +ID: 0/io/latency/fetch/4MB/tgt_12,1966,1396,2654,1740.464286,56,97466,238.501684 +ID: 0/io/latency/fetch/4MB/tgt_13,1915,1341,2613,1774.536585,41,72756,237.038298 +ID: 0/io/latency/fetch/4MB/tgt_14,1861,1337,2543,1807.625000,48,86766,279.890680 +ID: 0/io/latency/fetch/4MB/tgt_15,1740,1326,2420,1733.521739,46,79742,238.393674 + +``` + + +### NVMe Device Error + +Many times, NVMe device has error which can also be an indication for slow performance or system stuck issue. + +``` +#sudo daos_metrics -S 0 -M | grep errs + media_errs: 0 errs, desc: Number of unrecovered data integrity error, units: errs + read_errs: 0 errs, desc: Number of errors reported to the engine on read commands, units: errs + write_errs: 0 errs, desc: Number of errors reported to the engine on write commands, units: errs + unmap_errs: 0 errs, desc: Number of errors reported to the engine on unmap/trim commands, units: errs + checksum_mismatch: 0 errs, desc: Number of checksum mismatch detected by the engine, units: errs + +#sudo daos_metrics -C -S 0 | grep nvm | grep err +ID: 0/nvme/0000:83:00.0/commands/media_errs,0 +ID: 0/nvme/0000:83:00.0/commands/read_errs,0 +ID: 0/nvme/0000:83:00.0/commands/write_errs,0 +ID: 0/nvme/0000:83:00.0/commands/unmap_errs,0 +ID: 0/nvme/0000:83:00.0/vendor/endtoend_err_cnt_raw,0 +ID: 0/nvme/0000:83:00.0/vendor/crc_err_cnt_raw,0 +``` + +## Metrics Unit Type + +daos_metrics output is available in multiple units. for example, Counters, Gauge. It can display the data based on different unit type. + +### Display Counter type metrics +A counter is a cumulative metric that represents a single monotonically increasing counter whose value can only increase or be reset or to zero on restart. + +``` +sudo daos_metrics -c -S 0 -M -C +name,value,min,max,mean,sample_size,sum,std_dev,description,units +ID: 0/events/dead_ranks,0,,,,,,Number of dead rank events received,events +ID: 0/net/uri/lookup_self,0,,,,,,total number of URI requests for self +ID: 0/net/uri/lookup_other,0,,,,,,total number of URI requests for other ranks +ID: 0/net/ofi+tcp;ofi_rxm/hg/bulks/ctx_0,0,,,,,,Mercury-layer count of bulk transfers,bulks +ID: 0/net/ofi+tcp;ofi_rxm/hg/bulks/ctx_1,0,,,,,,Mercury-layer count of bulk transfers,bulks +``` + +### Display Gauge type metrics +A gauge is a metric that represents a single numerical value that can arbitrarily go up and down. + +``` +sudo daos_metrics -S 0 -g -M -C | more +name,value,min,max,mean,sample_size,sum,std_dev,description,units +ID: 0/rank,0,,,,,,Rank ID of this engine +ID: 0/net/ofi+tcp;ofi_rxm/hg/active_rpcs/ctx_0,0,,,,,,Mercury-layer count of active RPCs,rpcs +ID: 0/net/ofi+tcp;ofi_rxm/hg/active_rpcs/ctx_1,0,,,,,,Mercury-layer count of active RPCs,rpcs +ID: 0/net/ofi+tcp;ofi_rxm/hg/active_rpcs/ctx_2,0,,,,,,Mercury-layer count of active RPCs,rpcs +ID: 0/net/ofi+tcp;ofi_rxm/hg/active_rpcs/ctx_3,0,,,,,,Mercury-layer count of active RPCs,rpcs +``` + +## Metrics Unit output format + +Gauge metrics units are in format where multiple values are display for number of samples. For example, update/fetch latency output. + +``` + latency + update + 256B + tgt_0: 118 us [min: 15, max: 3703, avg: 100, sum: 200968, stddev: 124, samples: 2010] +``` + +|metrics type| definition|| +|:---:|:-------------------:|:---:| +|value|Current value|118 us| +|min|The minimum value from all data samples|15 us| +|max|The maximum value from all data samples|3703 us| +|avg|The average value based on all data samples|100 us| +|sum|The total value of all data samples|200968 us| +|stddev| Standard deviation |124 us| +|samples|Total number of data samples used for metrics at given point|2010| + +## Reset the metrics counter + +Metrics counter will be reset when system restarts or it can be reset using below command on individual servers. + +For Engine 0 & 1 (In case multiple engines are running on same node) +``` +sudo daos_metrics -S 0 -e; sudo daos_metrics -S 1 -e +``` + From f010597aaac718e0c99b65672c7849bd8a2c09e3 Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Thu, 12 Feb 2026 09:31:07 -0500 Subject: [PATCH 199/253] DAOS-18572 test: Fix rebuild/basic.py failure (#17524) Remove system_ram_reserved setting now the test is running on HW. Signed-off-by: Phil Henderson --- src/tests/ftest/rebuild/basic.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/src/tests/ftest/rebuild/basic.yaml b/src/tests/ftest/rebuild/basic.yaml index e1dc513a44e..100c3716b8d 100644 --- a/src/tests/ftest/rebuild/basic.yaml +++ b/src/tests/ftest/rebuild/basic.yaml @@ -19,7 +19,6 @@ server_config: 0: class: ram scm_mount: /mnt/daos - system_ram_reserved: 6 pool: size: 1G From 32c12818cff9510716d661cdc0a8b0f54244e545 Mon Sep 17 00:00:00 2001 From: Tom Nabarro Date: Thu, 12 Feb 2026 15:14:18 +0000 Subject: [PATCH 200/253] DAOS-18431 bio: Set power management register on NVMe (#17355) Set NVMe power management values for SSDs by setting the new engine DAOS_NVME_POWER_MGMT environment variable to an integer (sets register bits 0-4). Value will be applied by SPDK on devices attached to an engine process. The value will not be reset on engine exit. Signed-off-by: Tom Nabarro --- src/bio/bio_device.c | 119 +++++++++++++++++++++++++++++++++++++++++ src/bio/bio_internal.h | 5 ++ src/bio/bio_xstream.c | 18 ++++++- 3 files changed, 140 insertions(+), 2 deletions(-) diff --git a/src/bio/bio_device.c b/src/bio/bio_device.c index ca72bd09f55..3725210d1fe 100644 --- a/src/bio/bio_device.c +++ b/src/bio/bio_device.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "smd.pb-c.h" @@ -1101,3 +1102,121 @@ bio_led_manage(struct bio_xs_context *xs_ctxt, char *tr_addr, uuid_t dev_uuid, u return led_manage(xs_ctxt, pci_addr, (Ctl__LedAction)action, (Ctl__LedState *)state, duration); } + +struct power_mgmt_context_t { + const char *bdev_name; + unsigned int set_val; + unsigned int inflights; +}; + +static void +set_power_mgmt_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) +{ + struct power_mgmt_context_t *pm_ctx = cb_arg; + int sc; + int sct; + uint32_t cdw0; + + spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc); + if (sc) { + D_ERROR("Set power management failed for device %s (value: 0x%x), NVMe status " + "code/type: 0x%x/0x%x", + pm_ctx->bdev_name, pm_ctx->set_val, sc, sct); + if (sc == SPDK_NVME_SC_INVALID_FIELD && sct == 0) + D_ERROR(" - INVALID_FIELD: Device may not support requested power state\n"); + else + D_ERROR("\n"); + } else { + D_INFO("Power management value set to 0x%x on device %s\n", pm_ctx->set_val, + pm_ctx->bdev_name); + } + + D_ASSERT(pm_ctx->inflights == 1); + pm_ctx->inflights--; + spdk_bdev_free_io(bdev_io); +} + +int +bio_set_power_mgmt(struct bio_xs_context *ctxt, const char *bdev_name) +{ + struct power_mgmt_context_t pm_ctx = {0}; + struct spdk_nvme_cmd cmd = {0}; + struct spdk_bdev *bdev; + struct spdk_bdev_desc *bdev_desc; + struct spdk_io_channel *bdev_io_channel; + int rc = 0; + + /* If default has not been overwritten, skip setting the value */ + if (bio_spdk_power_mgmt_val == NVME_POWER_MGMT_UNINIT) + goto out; + + /* Validate power state value is in valid range (5-bit field) */ + if (bio_spdk_power_mgmt_val > 0x1F) { + D_ERROR("bio_spdk_power_mgmt_val %u exceeds 5-bit limit (0x1F)\n", + bio_spdk_power_mgmt_val); + rc = -DER_INVAL; + goto out; + } + + D_ASSERT(bdev_name != NULL); + + bdev = spdk_bdev_get_by_name(bdev_name); + if (bdev == NULL) { + D_ERROR("No bdev associated with device name %s\n", bdev_name); + rc = -DER_INVAL; + goto out; + } + + if (get_bdev_type(bdev) != BDEV_CLASS_NVME) { + D_DEBUG(DB_MGMT, "Device %s is not NVMe, skipping power management\n", bdev_name); + rc = -DER_NOTSUPPORTED; + goto out; + } + + if (!spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_NVME_ADMIN)) { + D_DEBUG(DB_MGMT, "Bdev NVMe admin passthru not supported for %s\n", bdev_name); + rc = -DER_NOTSUPPORTED; + goto out; + } + + /* Writable descriptor required for applying power management settings */ + rc = spdk_bdev_open_ext(bdev_name, true, bio_bdev_event_cb, NULL, &bdev_desc); + if (rc != 0) { + D_ERROR("Failed to open bdev %s, %d\n", bdev_name, rc); + rc = daos_errno2der(-rc); + goto out; + } + + bdev_io_channel = spdk_bdev_get_io_channel(bdev_desc); + D_ASSERT(bdev_io_channel != NULL); + + /* Build NVMe Set Features command for Power Management */ + cmd.opc = SPDK_NVME_OPC_SET_FEATURES; + cmd.nsid = 0; /* 0 = controller-level feature */ + cmd.cdw10_bits.set_features.fid = SPDK_NVME_FEAT_POWER_MANAGEMENT; + cmd.cdw10_bits.set_features.sv = 0; /* Don't save across resets */ + cmd.cdw11_bits.feat_power_management.bits.ps = bio_spdk_power_mgmt_val; + cmd.cdw11_bits.feat_power_management.bits.wh = 0; /* Workload hint = 0 */ + + pm_ctx.bdev_name = bdev_name; + pm_ctx.set_val = bio_spdk_power_mgmt_val; + pm_ctx.inflights = 1; + + rc = spdk_bdev_nvme_admin_passthru(bdev_desc, bdev_io_channel, &cmd, NULL, 0, + set_power_mgmt_completion, &pm_ctx); + if (rc != 0) { + D_ERROR("Failed to submit power management command to set 0x%x on %s, rc:%d\n", + bio_spdk_power_mgmt_val, bdev_name, rc); + rc = daos_errno2der(-rc); + goto out_chan; + } + + rc = xs_poll_completion(ctxt, &pm_ctx.inflights, 0); + D_ASSERT(rc == 0); + +out_chan: + spdk_put_io_channel(bdev_io_channel); + spdk_bdev_close(bdev_desc); +out: + return rc; +} diff --git a/src/bio/bio_internal.h b/src/bio/bio_internal.h index d8be60405d5..810c337942e 100644 --- a/src/bio/bio_internal.h +++ b/src/bio/bio_internal.h @@ -32,6 +32,8 @@ #define NVME_MONITOR_PERIOD (60ULL * (NSEC_PER_SEC / NSEC_PER_USEC)) #define NVME_MONITOR_SHORT_PERIOD (3ULL * (NSEC_PER_SEC / NSEC_PER_USEC)) +#define NVME_POWER_MGMT_UNINIT UINT32_MAX + struct bio_bulk_args { void *ba_bulk_ctxt; unsigned int ba_bulk_perm; @@ -605,6 +607,7 @@ extern unsigned int bio_numa_node; extern unsigned int bio_spdk_max_unmap_cnt; extern unsigned int bio_max_async_sz; extern unsigned int bio_io_timeout; +extern unsigned int bio_spdk_power_mgmt_val; int xs_poll_completion(struct bio_xs_context *ctxt, unsigned int *inflights, uint64_t timeout); @@ -719,6 +722,8 @@ void trigger_faulty_reaction(struct bio_blobstore *bbs); int fill_in_traddr(struct bio_dev_info *b_info, char *dev_name); struct bio_dev_info * alloc_dev_info(uuid_t dev_id, struct bio_bdev *d_bdev, struct smd_dev_info *s_info); +int +bio_set_power_mgmt(struct bio_xs_context *ctxt, const char *bdev_name); /* bio_config.c */ int diff --git a/src/bio/bio_xstream.c b/src/bio/bio_xstream.c index 757118d3e67..44ec22b12f8 100644 --- a/src/bio/bio_xstream.c +++ b/src/bio/bio_xstream.c @@ -59,6 +59,8 @@ bool bio_scm_rdma; bool bio_spdk_inited; /* SPDK subsystem fini timeout */ unsigned int bio_spdk_subsys_timeout = 25000; /* ms */ +/* SPDK NVMe power management value, use bits 0-4 as per NVMe spec */ +unsigned int bio_spdk_power_mgmt_val = NVME_POWER_MGMT_UNINIT; /* How many blob unmap calls can be called in a row */ unsigned int bio_spdk_max_unmap_cnt = 32; unsigned int bio_max_async_sz = (1UL << 15) /* 32k */; @@ -268,6 +270,11 @@ bio_nvme_init_ext(const char *nvme_conf, int numa_node, unsigned int mem_size, d_getenv_bool("DAOS_SCM_RDMA_ENABLED", &bio_scm_rdma); D_INFO("RDMA to SCM is %s\n", bio_scm_rdma ? "enabled" : "disabled"); + d_getenv_uint("DAOS_NVME_POWER_MGMT", &bio_spdk_power_mgmt_val); + if (bio_spdk_power_mgmt_val != NVME_POWER_MGMT_UNINIT) + D_INFO("NVMe power management setting to be applied is %u\n", + bio_spdk_power_mgmt_val); + d_getenv_uint("DAOS_SPDK_SUBSYS_TIMEOUT", &bio_spdk_subsys_timeout); D_INFO("SPDK subsystem fini timeout is %u ms\n", bio_spdk_subsys_timeout); @@ -940,8 +947,8 @@ create_bio_bdev(struct bio_xs_context *ctxt, const char *bdev_name, unsigned int * Hold the SPDK bdev by an open descriptor, otherwise, the bdev * could be deconstructed by SPDK on device hot remove. */ - rc = spdk_bdev_open_ext(d_bdev->bb_name, false, bio_bdev_event_cb, - d_bdev, &d_bdev->bb_desc); + rc = + spdk_bdev_open_ext(d_bdev->bb_name, false, bio_bdev_event_cb, d_bdev, &d_bdev->bb_desc); if (rc != 0) { D_ERROR("Failed to hold bdev %s, %d\n", d_bdev->bb_name, rc); rc = daos_errno2der(-rc); @@ -949,6 +956,7 @@ create_bio_bdev(struct bio_xs_context *ctxt, const char *bdev_name, unsigned int } D_ASSERT(d_bdev->bb_desc != NULL); + /* Try to load blobstore without specifying 'bstype' first */ bs = load_blobstore(ctxt, d_bdev->bb_name, NULL, false, false, NULL, NULL); @@ -1050,6 +1058,12 @@ init_bio_bdevs(struct bio_xs_context *ctxt) bdev_name = spdk_bdev_get_name(bdev); + /* Apply NVMe power management settings */ + rc = bio_set_power_mgmt(ctxt, bdev_name); + if (rc != 0 && rc != -DER_NOTSUPPORTED) + D_WARN("Failed to set power management for device %s: " DF_RC "\n", + bdev_name, DP_RC(rc)); + rc = bdev_name2roles(bdev_name); if (rc < 0) { D_ERROR("Failed to get role from bdev name '%s', "DF_RC"\n", bdev_name, From 70dfa789d288050aa33bc963fe02de94916cac72 Mon Sep 17 00:00:00 2001 From: Cedric Koch-Hofer <94527853+knard38@users.noreply.github.com> Date: Thu, 12 Feb 2026 16:15:35 +0100 Subject: [PATCH 201/253] DAOS-18533 ddb: Fix ddb completion (#17500) Fix completion function of the ddb commands open, rm_pool and feature. Signed-off-by: Cedric Koch-Hofer --- src/control/cmd/ddb/command_completers.go | 105 ++++++----- .../cmd/ddb/command_completers_test.go | 175 ++++++++++++++++++ src/control/cmd/ddb/main.go | 57 +----- 3 files changed, 245 insertions(+), 92 deletions(-) create mode 100644 src/control/cmd/ddb/command_completers_test.go diff --git a/src/control/cmd/ddb/command_completers.go b/src/control/cmd/ddb/command_completers.go index 3c5a95a97b0..ae55222f911 100644 --- a/src/control/cmd/ddb/command_completers.go +++ b/src/control/cmd/ddb/command_completers.go @@ -1,5 +1,6 @@ // // (C) Copyright 2022-2024 Intel Corporation. +// (C) Copyright 2026 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -7,74 +8,92 @@ package main import ( - "io/fs" + "os" "path/filepath" + "regexp" "strings" ) -const ( - defMntPrefix = "/mnt" +var ( + vosRegexp = regexp.MustCompile(`^.+/((vos-([0-9]|([1-9][0-9]+)))|(rdb-pool))$`) ) -func listDirVos(match string) (result []string) { - if strings.HasSuffix(match, "vos-") { - match = filepath.Dir(match) +func listVosFiles(match string) (result []string) { + result = []string{} + + matches, err := filepath.Glob(match + "*") + if err != nil { + return } - filepath.Walk(match, func(path string, info fs.FileInfo, err error) error { + for _, match := range matches { + path := filepath.Clean(match) + fi, err := os.Stat(path) if err != nil { - /* ignore error */ - return nil + continue } - if strings.Contains(path, "vos-") { + + switch mode := fi.Mode(); { + case mode.IsDir(): + result = append(result, path+string(os.PathSeparator)) + case mode.IsRegular(): + if !vosRegexp.MatchString(path) { + continue + } result = append(result, path) } - return nil - }) + } + return } -func filterSuggestions(prefix string, initialSuggestions, additionalSuggestions []string) []string { - suggestions := append([]string{}, initialSuggestions...) - suggestions = append(suggestions, additionalSuggestions...) +func appendSuggestion(suggestions []string, suggestion string, prefix string) []string { + if len(prefix) == 0 { + return append(suggestions, suggestion) + } - if len(prefix) > 0 { - var newSuggestions []string - for _, s := range suggestions { - if strings.HasPrefix(s, prefix) { - newSuggestions = append(newSuggestions, strings.Trim(s, prefix)) - } - } - suggestions = newSuggestions + if !strings.HasPrefix(suggestion, prefix) { + return suggestions + } + + if len(suggestion) > 2 && suggestion[1] == prefix[0] { + // Workaround to properly handle invalid prefix management + return append(suggestions, suggestion) + } + return append(suggestions, strings.TrimPrefix(suggestion, prefix)) +} + +func filterSuggestions(prefix string, initialSuggestions, additionalSuggestions []string) (suggestions []string) { + suggestions = []string{} + + for _, suggestion := range initialSuggestions { + suggestions = appendSuggestion(suggestions, suggestion, prefix) + } + for _, suggestion := range additionalSuggestions { + suggestions = appendSuggestion(suggestions, suggestion, prefix) } - return suggestions + return } func openCompleter(prefix string, args []string) []string { - return filterSuggestions(prefix, []string{"-h", "-w", "--write_mode"}, listDirVos(defMntPrefix)) + return filterSuggestions( + prefix, + []string{"-w", "--write_mode", "-p", "--db_path=", "-h", "--help"}, + listVosFiles(prefix), + ) } func featureCompleter(prefix string, args []string) []string { - return filterSuggestions(prefix, []string{"-h", "-e", "--enable", "-d", "--disable", "-s", "--show"}, - listDirVos(defMntPrefix)) -} - -func listDirPool(match string) (result []string) { - if strings.HasSuffix(match, "vos-") { - match = filepath.Dir(match) - } - filepath.Walk(match, func(path string, info fs.FileInfo, err error) error { - if err != nil { - /* ignore error */ - return nil - } - result = append(result, path) - return nil - }) - return + return filterSuggestions( + prefix, + []string{"-e", "--enable", "-d", "--disable", "-s", "--show", "-h", "--help"}, + listVosFiles(prefix)) } func rmPoolCompleter(prefix string, args []string) []string { - return filterSuggestions(prefix, []string{"-h"}, listDirPool(defMntPrefix)) + return filterSuggestions( + prefix, + []string{"-h", "--help"}, + listVosFiles(prefix)) } diff --git a/src/control/cmd/ddb/command_completers_test.go b/src/control/cmd/ddb/command_completers_test.go new file mode 100644 index 00000000000..834fbfb048a --- /dev/null +++ b/src/control/cmd/ddb/command_completers_test.go @@ -0,0 +1,175 @@ +package main + +import ( + "os" + "path/filepath" + "testing" + + "github.com/daos-stack/daos/src/control/common/test" +) + +var ( + testPoolDirs = [...]string{"a", "ab", "aac", "aaad"} + testVosFiles = [...]string{"vos-0", "vos-1", "vos-2", "vos-10", "vos-201", "vos-000", "vos-a", "rdb-pool", "rdb-666"} +) + +func createFile(t *testing.T, filePath string) { + t.Helper() + + fd, err := os.Create(filePath) + if err != nil { + t.Fatalf("Failed to create test vos file %s: %v", filePath, err) + } + fd.Close() +} + +func createDirAll(t *testing.T, dirPath string) { + t.Helper() + + if err := os.MkdirAll(dirPath, 0755); err != nil { + t.Fatalf("Failed to create test pool directory %s: %v", dirPath, err) + } +} + +func testSetup(t *testing.T) (tmpDir string, teardown func()) { + t.Helper() + + tmpDir, teardown = test.CreateTestDir(t) + + for _, dir := range testPoolDirs { + createDirAll(t, filepath.Join(tmpDir, dir)) + for _, file := range testVosFiles { + createFile(t, filepath.Join(tmpDir, dir, file)) + } + } + + createDirAll(t, filepath.Join(tmpDir, "foo")) + createFile(t, filepath.Join(tmpDir, "foo", "bar")) + + createDirAll(t, filepath.Join(tmpDir, "bar")) + createDirAll(t, filepath.Join(tmpDir, "bar", "foo")) + createDirAll(t, filepath.Join(tmpDir, "bar", "baz")) + createFile(t, filepath.Join(tmpDir, "bar", "baz", "no_vos")) + + return +} + +func TestListVosFiles(t *testing.T) { + tmpDir, teardown := testSetup(t) + t.Cleanup(teardown) + + for name, tc := range map[string]struct { + args string + expRes []string + }{ + "unaccessible": { + args: "/root/", + expRes: []string{}, + }, + "No match": { + args: filepath.Join(tmpDir, "z"), + expRes: []string{}, + }, + "void director prefix": { + args: tmpDir + string(os.PathSeparator), + expRes: []string{ + filepath.Join(tmpDir, "a") + string(os.PathSeparator), + filepath.Join(tmpDir, "ab") + string(os.PathSeparator), + filepath.Join(tmpDir, "aac") + string(os.PathSeparator), + filepath.Join(tmpDir, "aaad") + string(os.PathSeparator), + filepath.Join(tmpDir, "foo") + string(os.PathSeparator), + filepath.Join(tmpDir, "bar") + string(os.PathSeparator), + }, + }, + "a pool directory prefix": { + args: filepath.Join(tmpDir, "a"), + expRes: []string{ + filepath.Join(tmpDir, "a") + string(os.PathSeparator), + filepath.Join(tmpDir, "ab") + string(os.PathSeparator), + filepath.Join(tmpDir, "aac") + string(os.PathSeparator), + filepath.Join(tmpDir, "aaad") + string(os.PathSeparator), + }, + }, + "aa pool directory prefix": { + args: filepath.Join(tmpDir, "aa"), + expRes: []string{ + filepath.Join(tmpDir, "aac") + string(os.PathSeparator), + filepath.Join(tmpDir, "aaad") + string(os.PathSeparator), + }, + }, + "all vos files": { + args: filepath.Join(tmpDir, "a") + string(os.PathSeparator), + expRes: []string{ + filepath.Join(tmpDir, "a", "vos-0"), + filepath.Join(tmpDir, "a", "vos-1"), + filepath.Join(tmpDir, "a", "vos-2"), + filepath.Join(tmpDir, "a", "vos-10"), + filepath.Join(tmpDir, "a", "vos-201"), + filepath.Join(tmpDir, "a", "rdb-pool"), + }, + }, + "vos-1 prefix files": { + args: filepath.Join(tmpDir, "a", "vos-1"), + expRes: []string{ + filepath.Join(tmpDir, "a", "vos-1"), + filepath.Join(tmpDir, "a", "vos-10"), + }, + }, + } { + t.Run(name, func(t *testing.T) { + results := listVosFiles(tc.args) + test.AssertStringsEqual(t, tc.expRes, results, "listDirVos results do not match expected") + }) + } +} + +func TestFilterSuggestions(t *testing.T) { + // The test cases are designed to cover various prefix scenarios. + // It should notably cover the case where the prefix is a single character that matches the + // second character of a suggestion, which is a special case in the appendSuggestion + // function: Workaround to properly handle invalid prefix management done by the grumble + // completion engine. + var ( + initialSuggestions = []string{"-a", "--all", "-b", "--bar="} + additionalSuggestions = []string{"foo", "a", "ab", "aac", "aaad"} + ) + + for name, tc := range map[string]struct { + prefix string + expRes []string + }{ + "no prefix": { + prefix: "", + expRes: []string{"-a", "--all", "-b", "--bar=", "foo", "a", "ab", "aac", "aaad"}, + }, + "no match prefix": { + prefix: "z", + expRes: []string{}, + }, + "with '-' prefix": { + prefix: "-", + expRes: []string{"a", "--all", "b", "--bar="}, + }, + "with '--' prefix": { + prefix: "--", + expRes: []string{"--all", "--bar="}, + }, + "with 'a' prefix": { + prefix: "a", + expRes: []string{"", "b", "aac", "aaad"}, + }, + "with 'aa' prefix": { + prefix: "aa", + expRes: []string{"aac", "aaad"}, + }, + "with 'aaa' prefix": { + prefix: "aaa", + expRes: []string{"aaad"}, + }, + } { + t.Run(name, func(t *testing.T) { + results := filterSuggestions(tc.prefix, initialSuggestions, additionalSuggestions) + test.AssertStringsEqual(t, tc.expRes, results, "filterSuggestions results do not match expected") + }) + } +} diff --git a/src/control/cmd/ddb/main.go b/src/control/cmd/ddb/main.go index b328821fda4..6dee5bd3363 100644 --- a/src/control/cmd/ddb/main.go +++ b/src/control/cmd/ddb/main.go @@ -13,7 +13,6 @@ import ( "path" "path/filepath" "runtime/debug" - "sort" "strings" "unsafe" @@ -43,15 +42,15 @@ func exitWithError(log logging.Logger, err error) { } type cliOptions struct { - Debug bool `long:"debug" description:"enable debug output"` - WriteMode bool `long:"write_mode" short:"w" description:"Open the vos file in write mode."` - CmdFile string `long:"cmd_file" short:"f" description:"Path to a file containing a sequence of ddb commands to execute."` - SysdbPath string `long:"db_path" short:"p" description:"Path to the sys db."` - VosPath vosPathStr `long:"vos_path" short:"s" description:"Path to the VOS file to open."` - Version bool `short:"v" long:"version" description:"Show version"` + Debug bool `long:"debug" description:"enable debug output"` + WriteMode bool `long:"write_mode" short:"w" description:"Open the vos file in write mode."` + CmdFile string `long:"cmd_file" short:"f" description:"Path to a file containing a sequence of ddb commands to execute."` + SysdbPath string `long:"db_path" short:"p" description:"Path to the sys db."` + VosPath string `long:"vos_path" short:"s" description:"Path to the VOS file to open."` + Version bool `short:"v" long:"version" description:"Show version"` Args struct { - RunCmd ddbCmdStr `positional-arg-name:"ddb_command"` - RunCmdArgs []string `positional-arg-name:"ddb_command_args"` + RunCmd string `positional-arg-name:"ddb_command"` + RunCmdArgs []string `positional-arg-name:"ddb_command_args"` } `positional-args:"yes"` } @@ -92,46 +91,6 @@ Example Paths: const grumbleUnknownCmdErr = "unknown command, try 'help'" -type vosPathStr string - -func (pathStr vosPathStr) Complete(match string) (comps []flags.Completion) { - if match == "" || match == "/" { - match = defMntPrefix - } - for _, comp := range listDirVos(match) { - comps = append(comps, flags.Completion{Item: comp}) - } - sort.Slice(comps, func(i, j int) bool { return comps[i].Item < comps[j].Item }) - - return -} - -type ddbCmdStr string - -func (cmdStr ddbCmdStr) Complete(match string) (comps []flags.Completion) { - // hack to get at command names - ctx, cleanup, err := InitDdb(nil) - if err != nil { - return - } - defer cleanup() - - app := createGrumbleApp(ctx) - for _, cmd := range app.Commands().All() { - if match == "" || strings.HasPrefix(cmd.Name, match) { - comps = append(comps, flags.Completion{Item: cmd.Name}) - } - } - sort.Slice(comps, func(i, j int) bool { return comps[i].Item < comps[j].Item }) - - return -} - -func (cmdStr *ddbCmdStr) UnmarshalFlag(fv string) error { - *cmdStr = ddbCmdStr(fv) - return nil -} - func runFileCmds(log logging.Logger, app *grumble.App, fileName string) error { file, err := os.Open(fileName) if err != nil { From dda09e457deb719d97d1c5af47e7d03cf140bbaf Mon Sep 17 00:00:00 2001 From: Jan Michalski Date: Thu, 12 Feb 2026 15:20:12 +0000 Subject: [PATCH 202/253] dlck: warn when running not as root or a daos_server group member (#17208) warn when running dlck not as root or daos_server daos/mgmt.h, fix error handling when getpwuid() fails, and introduce DLCK_FAULT_GETPWUID fault injection point to test the getpwuid() fail scenario. Signed-off-by: Jan Michalski --- src/engine/init.c | 4 +- src/include/daos/common.h | 17 ++- src/include/daos/mgmt.h | 4 +- src/include/daos/rpc.h | 4 +- src/utils/dlck/dlck_main.c | 115 +++++++++++++++++- .../dlck/tests/fault_injection_dlck.yaml | 18 +-- 6 files changed, 142 insertions(+), 20 deletions(-) diff --git a/src/engine/init.c b/src/engine/init.c index ed3bd80f183..0072b87be33 100644 --- a/src/engine/init.c +++ b/src/engine/init.c @@ -1,8 +1,7 @@ /* * (C) Copyright 2016-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * (C) Copyright 2025 Google LLC - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -24,6 +23,7 @@ #include #include +#include #include #include #include "srv_internal.h" diff --git a/src/include/daos/common.h b/src/include/daos/common.h index 11b35d2aacf..e02c4f28022 100644 --- a/src/include/daos/common.h +++ b/src/include/daos/common.h @@ -937,12 +937,17 @@ enum { #define DAOS_MEM_FAIL_CHECKPOINT (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0x102) /** DLCK fault injection */ -#define DLCK_FAULT_CREATE_LOG_DIR (DAOS_FAIL_SYS_TEST_GROUP_LOC | 0x100) -#define DLCK_FAULT_CREATE_POOL_DIR (DAOS_FAIL_SYS_TEST_GROUP_LOC | 0x101) -#define DLCK_FAULT_ENGINE_START (DAOS_FAIL_SYS_TEST_GROUP_LOC | 0x102) -#define DLCK_FAULT_ENGINE_EXEC (DAOS_FAIL_SYS_TEST_GROUP_LOC | 0x103) -#define DLCK_FAULT_ENGINE_JOIN (DAOS_FAIL_SYS_TEST_GROUP_LOC | 0x104) -#define DLCK_FAULT_ENGINE_STOP (DAOS_FAIL_SYS_TEST_GROUP_LOC | 0x105) +#define DLCK_MOCK_ROOT (DAOS_FAIL_SYS_TEST_GROUP_LOC | 0x100) +#define DLCK_FAULT_GETGRNAM (DAOS_FAIL_SYS_TEST_GROUP_LOC | 0x101) +#define DLCK_MOCK_NO_DAOS_SERVER_GROUP (DAOS_FAIL_SYS_TEST_GROUP_LOC | 0x102) +#define DLCK_FAULT_GETGROUPS (DAOS_FAIL_SYS_TEST_GROUP_LOC | 0x103) +#define DLCK_MOCK_NOT_IN_DAOS_SERVER_GROUP (DAOS_FAIL_SYS_TEST_GROUP_LOC | 0x104) +#define DLCK_FAULT_CREATE_LOG_DIR (DAOS_FAIL_SYS_TEST_GROUP_LOC | 0x105) +#define DLCK_FAULT_CREATE_POOL_DIR (DAOS_FAIL_SYS_TEST_GROUP_LOC | 0x106) +#define DLCK_FAULT_ENGINE_START (DAOS_FAIL_SYS_TEST_GROUP_LOC | 0x107) +#define DLCK_FAULT_ENGINE_EXEC (DAOS_FAIL_SYS_TEST_GROUP_LOC | 0x108) +#define DLCK_FAULT_ENGINE_JOIN (DAOS_FAIL_SYS_TEST_GROUP_LOC | 0x109) +#define DLCK_FAULT_ENGINE_STOP (DAOS_FAIL_SYS_TEST_GROUP_LOC | 0x10a) /** Pool open fault injection */ #define DAOS_FAULT_POOL_NVME_HEALTH (DAOS_FAIL_SYS_TEST_GROUP_LOC | 0x200) diff --git a/src/include/daos/mgmt.h b/src/include/daos/mgmt.h index 5ea8f7cbd1b..fc63ab22036 100644 --- a/src/include/daos/mgmt.h +++ b/src/include/daos/mgmt.h @@ -1,6 +1,6 @@ /** * (C) Copyright 2016-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -18,6 +18,8 @@ #include #include "svc.pb-c.h" +#define DAOS_DEFAULT_SYS_NAME "daos_server" + extern bool d_dynamic_ctx_g; int dc_mgmt_init(void); diff --git a/src/include/daos/rpc.h b/src/include/daos/rpc.h index 1db29cf0901..a92a6752c54 100644 --- a/src/include/daos/rpc.h +++ b/src/include/daos/rpc.h @@ -1,6 +1,6 @@ /* * (C) Copyright 2016-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -311,8 +311,6 @@ int daos_rpc_send(crt_rpc_t *rpc, tse_task_t *task); int daos_rpc_complete(crt_rpc_t *rpc, tse_task_t *task); int daos_rpc_send_wait(crt_rpc_t *rpc); -#define DAOS_DEFAULT_SYS_NAME "daos_server" - /* Currently, this is used on rcs in metadata RPC reply buffers. */ static inline bool daos_rpc_retryable_rc(int rc) diff --git a/src/utils/dlck/dlck_main.c b/src/utils/dlck/dlck_main.c index 01311155847..d380de67c60 100644 --- a/src/utils/dlck/dlck_main.c +++ b/src/utils/dlck/dlck_main.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -7,10 +7,13 @@ #include #include +#include +#include #include #include #include +#include #include #include @@ -18,6 +21,114 @@ #include "dlck_checker.h" #include "dlck_cmds.h" +#define EFFECTIVE_USER_STR "Effective user: " +#define USER_BELONGS_TO_GRP_FMT "User %sbelong%s to group: %s (gid=%" PRIuMAX ")\n" +#define UNEXPECTED_USER_WARNING_MSG \ + "\nWARNING: It is recommended to run this program as root or as a user who belongs to " \ + "the '" DAOS_DEFAULT_SYS_NAME "' group.\n" \ + "Running it under any other account may cause the program to stop due to insufficient " \ + "privileges.\n\n" + +static bool +user_is_root(struct checker *ck) +{ + uid_t euid = geteuid(); + + if (DAOS_FAIL_CHECK(DLCK_MOCK_ROOT)) { /** fault injection */ + /** it does not have ANY effect on the actual privileges of the user */ + euid = 0; + } + + if (euid == 0) { + /** The root user is not always named "root" but its uid is always 0. */ + CK_PRINT(ck, EFFECTIVE_USER_STR "root\n"); + return true; + } + + CK_PRINTF(ck, EFFECTIVE_USER_STR "uid=%" PRIuMAX "\n", (uintmax_t)euid); + return false; +} + +#define MAX_GROUPS 128 + +static bool +user_belongs_to_group(const char *group_name, struct checker *ck) +{ + struct group *group = NULL; + gid_t group_id; + gid_t groups[MAX_GROUPS]; + int rc; + + /** get GID of the requested group */ + if (DAOS_FAIL_CHECK(DLCK_FAULT_GETGRNAM)) { /** fault injection */ + errno = daos_fail_value_get(); + } else if (DAOS_FAIL_CHECK(DLCK_MOCK_NO_DAOS_SERVER_GROUP)) { /** fault injection */ + errno = 0; + } else { + errno = 0; + group = getgrnam(group_name); + } + if (group == NULL) { + if (errno != 0) { + rc = daos_errno2der(errno); + CK_PRINTFL_RC(ck, rc, "getgrnam(%s) failed", group_name); + } else { + CK_PRINTF(ck, "The %s group does not exist.\n", group_name); + } + return false; + } + group_id = group->gr_gid; + + /** check primary group */ + if (getgid() == group_id) { + CK_PRINTF(ck, USER_BELONGS_TO_GRP_FMT, "", "s", group_name, (uintmax_t)group_id); + return true; + } + + /** get supplementary groups */ + if (DAOS_FAIL_CHECK(DLCK_FAULT_GETGROUPS)) { /** fault injection */ + rc = -1; + errno = daos_fail_value_get(); + } else { + rc = getgroups(MAX_GROUPS, groups); + } + if (rc < 0) { + rc = daos_errno2der(errno); + CK_PRINTFL_RC(ck, rc, "getgroups() failed", group_name); + return false; + } + + /** check supplementary groups */ + if (!DAOS_FAIL_CHECK(DLCK_MOCK_NOT_IN_DAOS_SERVER_GROUP)) { /** fault injection */ + for (int i = 0; i < rc; i++) { + if (groups[i] == group_id) { + CK_PRINTF(ck, USER_BELONGS_TO_GRP_FMT, "", "s", group_name, + (uintmax_t)group_id); + return true; + } + } + } + + CK_PRINTF(ck, USER_BELONGS_TO_GRP_FMT, "DOES NOT ", "", group_name, (uintmax_t)group_id); + + return false; +} + +static void +check_user_privileges(struct checker *ck) +{ + if (user_is_root(ck)) { + /** the root user is assumed to have all required privileges */ + return; + } + + if (user_belongs_to_group(DAOS_DEFAULT_SYS_NAME, ck)) { + return; + } + + CK_PRINT(ck, UNEXPECTED_USER_WARNING_MSG); +} + int main(int argc, char *argv[]) { @@ -55,6 +166,8 @@ main(int argc, char *argv[]) goto err_abt_fini; } + check_user_privileges(&ctrl.checker); + rc = dlck_cmd_check(&ctrl); if (rc != DER_SUCCESS) { goto err_print_main_fini; diff --git a/src/utils/dlck/tests/fault_injection_dlck.yaml b/src/utils/dlck/tests/fault_injection_dlck.yaml index 8dd036f3ce0..5d822eaaa02 100644 --- a/src/utils/dlck/tests/fault_injection_dlck.yaml +++ b/src/utils/dlck/tests/fault_injection_dlck.yaml @@ -1,12 +1,16 @@ # Uncomment a fault you would like to trigger -# yamllint disable rule:comments-indentation fault_config: - # - id: 131328 # DLCK_FAULT_CREATE_LOG_DIR - # - id: 131329 # DLCK_FAULT_CREATE_POOL_DIR - # - id: 131330 # DLCK_FAULT_ENGINE_START - # - id: 131331 # DLCK_FAULT_ENGINE_EXEC - # - id: 131332 # DLCK_FAULT_ENGINE_JOIN - # - id: 131333 # DLCK_FAULT_ENGINE_STOP + # - id: 131328 # DLCK_MOCK_ROOT + # - id: 131329 # DLCK_FAULT_GETGRNAM + # - id: 131330 # DLCK_MOCK_NO_DAOS_SERVER_GROUP + # - id: 131331 # DLCK_FAULT_GETGROUPS + # - id: 131332 # DLCK_MOCK_NOT_IN_DAOS_SERVER_GROUP + # - id: 131333 # DLCK_FAULT_CREATE_LOG_DIR + # - id: 131334 # DLCK_FAULT_CREATE_POOL_DIR + # - id: 131335 # DLCK_FAULT_ENGINE_START + # - id: 131336 # DLCK_FAULT_ENGINE_EXEC + # - id: 131337 # DLCK_FAULT_ENGINE_JOIN + # - id: 131338 # DLCK_FAULT_ENGINE_STOP # - id: 131584 # DAOS_FAULT_POOL_NVME_HEALTH # interval: 2 # skip sys_db # - id: 131585 # DAOS_FAULT_POOL_OPEN_BIO From b9c419d37c6c9ea794465ab5d7b57c7c5cf55d54 Mon Sep 17 00:00:00 2001 From: Jan Michalski Date: Thu, 12 Feb 2026 15:21:14 +0000 Subject: [PATCH 203/253] DAOS-18574 test: adjust tsan suppressions (part 2) (#17531) Ref: daos-stack/daos#17471 { Memcheck:Value8 fun:atomic_load<__sanitizer::atomic_uint32_t> fun:LoadShadow fun:ContainsSameAccess fun:MemoryAccessRangeOne fun:_ZN6__tsan18MemoryAccessRangeTILb0EEEvPNS_11ThreadStateEmmm fun:racecall } { Memcheck:Value8 fun:atomic_load<__sanitizer::atomic_uint32_t> fun:LoadShadow fun:CheckRaces fun:MemoryAccessRangeOne fun:_ZN6__tsan18MemoryAccessRangeTILb1EEEvPNS_11ThreadStateEmmm fun:racecall } { Memcheck:Value8 fun:atomic_store<__sanitizer::atomic_uint32_t> fun:StoreShadow fun:CheckRaces fun:MemoryAccessRangeOne fun:_ZN6__tsan18MemoryAccessRangeTILb1EEEvPNS_11ThreadStateEmmm fun:racecall } { Memcheck:Value8 fun:atomic_load<__sanitizer::atomic_uint32_t> fun:LoadShadow fun:ContainsSameAccess fun:MemoryAccess fun:__tsan_write fun:racecall } { Memcheck:Value8 fun:atomic_load<__sanitizer::atomic_uint32_t> fun:LoadShadow fun:CheckRaces fun:MemoryAccess fun:__tsan_write fun:racecall } { Memcheck:Value8 fun:MemoryAccess fun:__tsan_read fun:racecall } { Memcheck:Value8 fun:atomic_load<__sanitizer::atomic_uint32_t> fun:LoadShadow fun:CheckRaces fun:MemoryAccess fun:__tsan_read fun:racecall } { Memcheck:Value8 fun:atomic_store<__sanitizer::atomic_uint32_t> fun:StoreShadow fun:CheckRaces fun:MemoryAccess fun:__tsan_read fun:racecall } Signed-off-by: Jan Michalski --- src/cart/utils/memcheck-cart.supp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/cart/utils/memcheck-cart.supp b/src/cart/utils/memcheck-cart.supp index 30c49052d44..2cb7347ed55 100644 --- a/src/cart/utils/memcheck-cart.supp +++ b/src/cart/utils/memcheck-cart.supp @@ -651,6 +651,7 @@ { tsan::MemoryAccessRange Memcheck:Value8 + ... fun:_ZN6__tsan18MemoryAccessRangeTILb0EEEvPNS_11ThreadStateEmmm ... fun:racecall @@ -658,6 +659,7 @@ { tsan::MemoryAccessRange Memcheck:Value8 + ... fun:_ZN6__tsan18MemoryAccessRangeTILb1EEEvPNS_11ThreadStateEmmm ... fun:racecall @@ -665,6 +667,7 @@ { tsan::TraceRestartMemoryAccess Memcheck:Value8 + ... fun:_ZN6__tsan24TraceRestartMemoryAccessEPNS_11ThreadStateEmmmm ... fun:racecall @@ -672,12 +675,14 @@ { __tsan_read Memcheck:Value8 + ... fun:__tsan_read fun:racecall } { __tsan_write Memcheck:Value8 + ... fun:__tsan_write fun:racecall } From 0a7ffbf057a0d2fb499101952ec0f0ac338ac9b3 Mon Sep 17 00:00:00 2001 From: Kris Jacque Date: Thu, 12 Feb 2026 10:15:51 -0700 Subject: [PATCH 204/253] DAOS-15636 doc: Include engine log mask values in config file (#17551) Engine log_mask values are different from those for the control log mask. Signed-off-by: Kris Jacque --- utils/config/daos_server.yml | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/utils/config/daos_server.yml b/utils/config/daos_server.yml index 88e93fcd3f1..4876c71db74 100644 --- a/utils/config/daos_server.yml +++ b/utils/config/daos_server.yml @@ -378,7 +378,10 @@ # # Force specific debug mask for the engine at start up time. # # By default, just use the default debug mask used by DAOS. # # Mask specifies minimum level of message significance to pass to logger. -# +# # Currently supported values: +# # DEBUG, DBUG (alias for DEBUG), INFO, NOTE, WARN, ERROR, ERR (alias for ERROR), CRIT, ALRT, +# # FATAL, EMRG, EMIT +# # # # default: ERR # log_mask: INFO # @@ -546,7 +549,10 @@ # # Force specific debug mask for the engine at start up time. # # By default, just use the default debug mask used by DAOS. # # Mask specifies minimum level of message significance to pass to logger. -# +# # Currently supported values: +# # DEBUG, DBUG (alias for DEBUG), INFO, NOTE, WARN, ERROR, ERR (alias for ERROR), CRIT, ALRT, +# # FATAL, EMRG, EMIT +# # # # default: ERR # log_mask: INFO # From 4bc31e822a3bb82407782f5b35e0990b5a10db3b Mon Sep 17 00:00:00 2001 From: Alexander Oganezov Date: Thu, 12 Feb 2026 15:04:32 -0800 Subject: [PATCH 205/253] DAOS-18591 cart: add 'dump_counters' option to cart_ctl (#17549) - Adds 'dump_counters' command to cart_ctl and assosiated server side handling code Signed-off-by: Alexander A Oganezov --- src/cart/crt_ctl.c | 23 ++++++++ src/cart/crt_internal.h | 4 +- src/cart/crt_rpc.c | 5 +- src/cart/crt_rpc.h | 114 +++++++++++++++------------------------ src/utils/ctl/cart_ctl.c | 13 ++++- 5 files changed, 84 insertions(+), 75 deletions(-) diff --git a/src/cart/crt_ctl.c b/src/cart/crt_ctl.c index 4be89d72e0c..91b50125cf3 100644 --- a/src/cart/crt_ctl.c +++ b/src/cart/crt_ctl.c @@ -1,5 +1,6 @@ /* * (C) Copyright 2018-2024 Intel Corporation. + * (C) Copyright 2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -151,6 +152,28 @@ crt_hdlr_ctl_get_hostname(crt_rpc_t *rpc_req) D_ERROR("crt_reply_send() failed with rc %d\n", rc); } +void +crt_hdlr_ctl_dump_counters(crt_rpc_t *rpc_req) +{ + char old_dlog_mask[1024]; + + /* HG counters require log levels to be at debug to be printed */ + + /* store current log mask */ + d_log_getmasks(old_dlog_mask, 0, sizeof(old_dlog_mask), 0); + d_log_setmasks("DEBUG", -1); + HG_Set_log_level("debug"); + + HG_Diag_dump_counters(); + + /* restore log masks */ + /* Note: we cannot query log level from HG today so we restore back to 'warning' */ + HG_Set_log_level("warning"); + d_log_setmasks(old_dlog_mask, -1); + + crt_reply_send(rpc_req); +} + void crt_hdlr_ctl_get_pid(crt_rpc_t *rpc_req) { diff --git a/src/cart/crt_internal.h b/src/cart/crt_internal.h index 69d50fe31f0..d62be93af78 100644 --- a/src/cart/crt_internal.h +++ b/src/cart/crt_internal.h @@ -1,6 +1,6 @@ /* * (C) Copyright 2016-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -153,6 +153,8 @@ void crt_hdlr_ctl_get_hostname(crt_rpc_t *rpc_req); void crt_hdlr_ctl_get_pid(crt_rpc_t *rpc_req); +void +crt_hdlr_ctl_dump_counters(crt_rpc_t *rpc_req); void crt_iv_init(crt_init_options_t *ops); diff --git a/src/cart/crt_rpc.c b/src/cart/crt_rpc.c index da2a8383908..b858964cdbd 100644 --- a/src/cart/crt_rpc.c +++ b/src/cart/crt_rpc.c @@ -1,6 +1,6 @@ /* * (C) Copyright 2016-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -165,7 +165,7 @@ crt_proc_struct_crt_grp_cache(crt_proc_t proc, crt_proc_op_t proc_op, return crt_proc_crt_grp_cache(proc, data); } -/* !! All of the following 4 RPC definition should have the same input fields !! +/* !! All of the following 5 RPC definition should have the same input fields !! * All of them are verified in one function: * int verify_ctl_in_args(struct crt_ctl_ep_ls_in *in_args) */ @@ -173,6 +173,7 @@ CRT_RPC_DEFINE(crt_ctl_get_uri_cache, CRT_ISEQ_CTL, CRT_OSEQ_CTL_GET_URI_CACHE) CRT_RPC_DEFINE(crt_ctl_ep_ls, CRT_ISEQ_CTL, CRT_OSEQ_CTL_EP_LS) CRT_RPC_DEFINE(crt_ctl_get_host, CRT_ISEQ_CTL, CRT_OSEQ_CTL_GET_HOST) CRT_RPC_DEFINE(crt_ctl_get_pid, CRT_ISEQ_CTL, CRT_OSEQ_CTL_GET_PID) +CRT_RPC_DEFINE(crt_ctl_dump_counters, CRT_ISEQ_CTL, CRT_OSEQ_CTL_DUMP_COUNTERS) CRT_RPC_DEFINE(crt_proto_query, CRT_ISEQ_PROTO_QUERY, CRT_OSEQ_PROTO_QUERY) diff --git a/src/cart/crt_rpc.h b/src/cart/crt_rpc.h index c99a80f4e26..8af18e0166d 100644 --- a/src/cart/crt_rpc.h +++ b/src/cart/crt_rpc.h @@ -1,6 +1,6 @@ /* * (C) Copyright 2016-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -262,76 +262,44 @@ crt_rpc_unlock(struct crt_rpc_priv *rpc_priv) * this to ping the server waiting for start so needs to work before * proto_query() can be called. */ -#define CRT_INTERNAL_RPCS_LIST \ - X(CRT_OPC_URI_LOOKUP, \ - 0, &CQF_crt_uri_lookup, \ - crt_hdlr_uri_lookup, NULL) \ - X(CRT_OPC_PROTO_QUERY, \ - 0, &CQF_crt_proto_query, \ - crt_hdlr_proto_query, NULL) \ - X(CRT_OPC_CTL_LS, \ - 0, &CQF_crt_ctl_ep_ls, \ - crt_hdlr_ctl_ls, NULL) \ - -#define CRT_FI_RPCS_LIST \ - X(CRT_OPC_CTL_FI_TOGGLE, \ - 0, &CQF_crt_ctl_fi_toggle, \ - crt_hdlr_ctl_fi_toggle, NULL) \ - X(CRT_OPC_CTL_FI_SET_ATTR, \ - 0, &CQF_crt_ctl_fi_attr_set, \ - crt_hdlr_ctl_fi_attr_set, NULL) \ - -#define CRT_ST_RPCS_LIST \ - X(CRT_OPC_SELF_TEST_BOTH_EMPTY, \ - 0, NULL, \ - crt_self_test_msg_handler, NULL) \ - X(CRT_OPC_SELF_TEST_SEND_ID_REPLY_IOV, \ - 0, &CQF_crt_st_send_id_reply_iov, \ - crt_self_test_msg_handler, NULL) \ - X(CRT_OPC_SELF_TEST_SEND_IOV_REPLY_EMPTY, \ - 0, &CQF_crt_st_send_iov_reply_empty, \ - crt_self_test_msg_handler, NULL) \ - X(CRT_OPC_SELF_TEST_BOTH_IOV, \ - 0, &CQF_crt_st_both_iov, \ - crt_self_test_msg_handler, NULL) \ - X(CRT_OPC_SELF_TEST_SEND_BULK_REPLY_IOV, \ - 0, &CQF_crt_st_send_bulk_reply_iov, \ - crt_self_test_msg_handler, NULL) \ - X(CRT_OPC_SELF_TEST_SEND_IOV_REPLY_BULK, \ - 0, &CQF_crt_st_send_iov_reply_bulk, \ - crt_self_test_msg_handler, NULL) \ - X(CRT_OPC_SELF_TEST_BOTH_BULK, \ - 0, &CQF_crt_st_both_bulk, \ - crt_self_test_msg_handler, NULL) \ - X(CRT_OPC_SELF_TEST_OPEN_SESSION, \ - 0, &CQF_crt_st_open_session, \ - crt_self_test_open_session_handler, NULL) \ - X(CRT_OPC_SELF_TEST_CLOSE_SESSION, \ - 0, &CQF_crt_st_close_session, \ - crt_self_test_close_session_handler, NULL) \ - X(CRT_OPC_SELF_TEST_START, \ - 0, &CQF_crt_st_start, \ - crt_self_test_start_handler, NULL) \ - X(CRT_OPC_SELF_TEST_STATUS_REQ, \ - 0, &CQF_crt_st_status_req, \ - crt_self_test_status_req_handler, NULL) \ - -#define CRT_CTL_RPCS_LIST \ - X(CRT_OPC_CTL_LOG_SET, \ - 0, &CQF_crt_ctl_log_set, \ - crt_hdlr_ctl_log_set, NULL) \ - X(CRT_OPC_CTL_LOG_ADD_MSG, \ - 0, &CQF_crt_ctl_log_add_msg, \ - crt_hdlr_ctl_log_add_msg, NULL) \ - X(CRT_OPC_CTL_GET_URI_CACHE, \ - 0, &CQF_crt_ctl_get_uri_cache, \ - crt_hdlr_ctl_get_uri_cache, NULL) \ - X(CRT_OPC_CTL_GET_HOSTNAME, \ - 0, &CQF_crt_ctl_get_host, \ - crt_hdlr_ctl_get_hostname, NULL) \ - X(CRT_OPC_CTL_GET_PID, \ - 0, &CQF_crt_ctl_get_pid, \ - crt_hdlr_ctl_get_pid, NULL) \ +#define CRT_INTERNAL_RPCS_LIST \ + X(CRT_OPC_URI_LOOKUP, 0, &CQF_crt_uri_lookup, crt_hdlr_uri_lookup, NULL) \ + X(CRT_OPC_PROTO_QUERY, 0, &CQF_crt_proto_query, crt_hdlr_proto_query, NULL) \ + X(CRT_OPC_CTL_LS, 0, &CQF_crt_ctl_ep_ls, crt_hdlr_ctl_ls, NULL) + +#define CRT_FI_RPCS_LIST \ + X(CRT_OPC_CTL_FI_TOGGLE, 0, &CQF_crt_ctl_fi_toggle, crt_hdlr_ctl_fi_toggle, NULL) \ + X(CRT_OPC_CTL_FI_SET_ATTR, 0, &CQF_crt_ctl_fi_attr_set, crt_hdlr_ctl_fi_attr_set, NULL) + +#define CRT_ST_RPCS_LIST \ + X(CRT_OPC_SELF_TEST_BOTH_EMPTY, 0, NULL, crt_self_test_msg_handler, NULL) \ + X(CRT_OPC_SELF_TEST_SEND_ID_REPLY_IOV, 0, &CQF_crt_st_send_id_reply_iov, \ + crt_self_test_msg_handler, NULL) \ + X(CRT_OPC_SELF_TEST_SEND_IOV_REPLY_EMPTY, 0, &CQF_crt_st_send_iov_reply_empty, \ + crt_self_test_msg_handler, NULL) \ + X(CRT_OPC_SELF_TEST_BOTH_IOV, 0, &CQF_crt_st_both_iov, crt_self_test_msg_handler, NULL) \ + X(CRT_OPC_SELF_TEST_SEND_BULK_REPLY_IOV, 0, &CQF_crt_st_send_bulk_reply_iov, \ + crt_self_test_msg_handler, NULL) \ + X(CRT_OPC_SELF_TEST_SEND_IOV_REPLY_BULK, 0, &CQF_crt_st_send_iov_reply_bulk, \ + crt_self_test_msg_handler, NULL) \ + X(CRT_OPC_SELF_TEST_BOTH_BULK, 0, &CQF_crt_st_both_bulk, crt_self_test_msg_handler, NULL) \ + X(CRT_OPC_SELF_TEST_OPEN_SESSION, 0, &CQF_crt_st_open_session, \ + crt_self_test_open_session_handler, NULL) \ + X(CRT_OPC_SELF_TEST_CLOSE_SESSION, 0, &CQF_crt_st_close_session, \ + crt_self_test_close_session_handler, NULL) \ + X(CRT_OPC_SELF_TEST_START, 0, &CQF_crt_st_start, crt_self_test_start_handler, NULL) \ + X(CRT_OPC_SELF_TEST_STATUS_REQ, 0, &CQF_crt_st_status_req, \ + crt_self_test_status_req_handler, NULL) + +#define CRT_CTL_RPCS_LIST \ + X(CRT_OPC_CTL_LOG_SET, 0, &CQF_crt_ctl_log_set, crt_hdlr_ctl_log_set, NULL) \ + X(CRT_OPC_CTL_LOG_ADD_MSG, 0, &CQF_crt_ctl_log_add_msg, crt_hdlr_ctl_log_add_msg, NULL) \ + X(CRT_OPC_CTL_GET_URI_CACHE, 0, &CQF_crt_ctl_get_uri_cache, crt_hdlr_ctl_get_uri_cache, \ + NULL) \ + X(CRT_OPC_CTL_GET_HOSTNAME, 0, &CQF_crt_ctl_get_host, crt_hdlr_ctl_get_hostname, NULL) \ + X(CRT_OPC_CTL_GET_PID, 0, &CQF_crt_ctl_get_pid, crt_hdlr_ctl_get_pid, NULL) \ + X(CRT_OPC_CTL_DUMP_COUNTERS, 0, &CQF_crt_ctl_dump_counters, crt_hdlr_ctl_dump_counters, \ + NULL) #define CRT_IV_RPCS_LIST \ X(CRT_OPC_IV_FETCH, \ @@ -580,6 +548,10 @@ CRT_RPC_DECLARE(crt_ctl_get_uri_cache, CRT_ISEQ_CTL, CRT_OSEQ_CTL_GET_URI_CACHE) CRT_RPC_DECLARE(crt_ctl_get_host, CRT_ISEQ_CTL, CRT_OSEQ_CTL_GET_HOST) +#define CRT_OSEQ_CTL_DUMP_COUNTERS /* output fields */ ((uint32_t)(rc)CRT_VAR) + +CRT_RPC_DECLARE(crt_ctl_dump_counters, CRT_ISEQ_CTL, CRT_OSEQ_CTL_DUMP_COUNTERS) + #define CRT_OSEQ_CTL_GET_PID /* output fields */ \ ((int32_t) (cgp_pid) CRT_VAR) \ ((int32_t) (cgp_rc) CRT_VAR) diff --git a/src/utils/ctl/cart_ctl.c b/src/utils/ctl/cart_ctl.c index 3bdf65b2bbb..e135fe365f1 100644 --- a/src/utils/ctl/cart_ctl.c +++ b/src/utils/ctl/cart_ctl.c @@ -1,5 +1,6 @@ /* * (C) Copyright 2018-2024 Intel Corporation. + * (C) Copyright 2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -54,6 +55,7 @@ enum cmd_t { CMD_SET_FI_ATTR, CMD_LOG_SET, CMD_LOG_ADD_MSG, + CMD_DUMP_COUNTERS, }; struct cmd_info { @@ -78,6 +80,7 @@ struct cmd_info cmds[] = { DEF_CMD(CMD_SET_FI_ATTR, CRT_OPC_CTL_FI_SET_ATTR), DEF_CMD(CMD_LOG_SET, CRT_OPC_CTL_LOG_SET), DEF_CMD(CMD_LOG_ADD_MSG, CRT_OPC_CTL_LOG_ADD_MSG), + DEF_CMD(CMD_DUMP_COUNTERS, CRT_OPC_CTL_DUMP_COUNTERS), }; static char * @@ -270,7 +273,7 @@ print_usage_msg(const char *msg) msg("Usage: cart_ctl --group-name name --rank " "start-end,start-end,rank,rank\n"); msg("\ncmds: get_uri_cache, list_ctx, get_hostname, get_pid, "); - msg("set_log, set_fi_attr, add_log_msg\n"); + msg("set_log, set_fi_attr, add_log_msg, dump_counters\n"); msg("\nset_log:\n"); msg("\tSet log to mask passed via -l argument\n"); msg("\nget_uri_cache:\n"); @@ -281,6 +284,8 @@ print_usage_msg(const char *msg) msg("\tPrint hostnames of specified ranks\n"); msg("\nget_pid:\n"); msg("\tReturn pids of the specified ranks\n"); + msg("\ndump_counters:\n"); + msg("\tDump mercury counters into the server log\n"); msg("\nset_fi_attr\n"); msg("\tset fault injection attributes for a fault ID. This command\n" "\tmust be accompanied by the option\n" @@ -337,6 +342,8 @@ parse_args(int argc, char **argv) ctl_gdata.cg_cmd_code = CMD_LOG_ADD_MSG; else if (strcmp(argv[1], "use_daos_agent_env") == 0) ctl_gdata.cg_use_daos_agent_env = true; + else if (strcmp(argv[1], "dump_counters") == 0) + ctl_gdata.cg_cmd_code = CMD_DUMP_COUNTERS; else { print_usage_msg("Invalid command\n"); D_GOTO(out, rc = -DER_INVAL); @@ -523,6 +530,10 @@ ctl_cli_cb(const struct crt_cb_info *cb_info) msg("pid: %d\n", out->cgp_pid); } break; + case CMD_DUMP_COUNTERS: { + msg("counters dumped into a server log\n"); + } break; + default: break; } From 1634f7955a53b4d1336bf7f02224db7283a00a51 Mon Sep 17 00:00:00 2001 From: Ken Cain Date: Fri, 13 Feb 2026 07:45:02 -0500 Subject: [PATCH 206/253] DAOS-18425 rebuild: NAK certain rebuild stop commands (#17421) * DAOS-18425 rebuild: NAK certain rebuild stop commands When a dmg pool rebuild stop (or system rebuild stop) command is run, the PS leader should refuse to stop a currently-running rebuild if there are more scheduled rebuilds for the pool in the rg_queue_list. In this case, -DER_NO_PERM is returned to the dmg command. Also, for usability of the feature, the handling of the stop command will return errors when: - there is no currently-running rebuild (-DER_NONEXIST) - the rebuild has effectively finsihed, and is simply cleaning up (i.e., it is in op:Reclaim now) (-DER_BUSY) Rework daos_test functions for interactive rebuild testing to address test timing problems. - Remove reliance on pre-command sleep in test functions that perform pool rebuild stop commands.. - Change rebuild stop functions to check for -DER_NONEXIST NAK and loop until that condition disappears (prevent stop commands too early). - Add test_rebuild_wait_to_start_lower() for tests to monitor for transition from op:Rebuild to op:Fail_reclaim. - Add test_rebuild_wait_to_start_next() for tests to wait for tests to particularly monitor for Fail_reclaim->Rebuild (retry). - Copy pool query results into test arg pool info when an interactive rebuild test invokes the various test functions to (loop and) wait for certain rebuild conditions to occur. Remove manual pool query and map/rs_version monitoring from test case code, replacing it with simpler test function calls. Signed-off-by: Kenneth Cain --- src/rebuild/srv.c | 81 ++++++++++++--- src/tests/suite/daos_rebuild_common.c | 43 ++++---- src/tests/suite/daos_rebuild_interactive.c | 112 +++++++++++---------- src/tests/suite/daos_test.h | 4 +- src/tests/suite/daos_test_common.c | 82 ++++++++++++--- 5 files changed, 217 insertions(+), 105 deletions(-) diff --git a/src/rebuild/srv.c b/src/rebuild/srv.c index aeb8b2daa87..91185bb5cc3 100644 --- a/src/rebuild/srv.c +++ b/src/rebuild/srv.c @@ -891,14 +891,62 @@ enum { }; static bool -rebuild_is_stoppable(struct rebuild_global_pool_tracker *rgt, bool force) +rebuild_is_stoppable(struct rebuild_global_pool_tracker *rgt, bool force, int *rcp) { - if ((rgt->rgt_opc == RB_OP_REBUILD) || (rgt->rgt_opc == RB_OP_UPGRADE)) + /* NAK if nothing is rebuilding */ + if (rgt == NULL) { + *rcp = -DER_NONEXIST; + return false; + } + + /* NAK if another rebuild is queued for the same pool (it would run after this one stopped) + */ + if (!d_list_empty(&rebuild_gst.rg_queue_list)) { + struct rebuild_task *task; + + d_list_for_each_entry(task, &rebuild_gst.rg_queue_list, dst_list) { + if (uuid_compare(task->dst_pool_uuid, rgt->rgt_pool_uuid) == 0) { + *rcp = -DER_NO_PERM; + return false; + } + } + } + + if ((rgt->rgt_opc == RB_OP_REBUILD) || (rgt->rgt_opc == RB_OP_UPGRADE)) { + *rcp = 0; return true; + } - if ((rgt->rgt_opc == RB_OP_FAIL_RECLAIM) && force && (rgt->rgt_num_op_freclaim_fail > 0)) + /* Defer stop for many Fail_reclaim cases (until after it finishes). Do not return errors. + * Only allow force-stop of repeating failures in Fail_reclaim + */ + if (rgt->rgt_opc == RB_OP_FAIL_RECLAIM && force) { + if (rgt->rgt_num_op_freclaim_fail == 0) { + D_INFO(DF_RB + ": cannot force-stop op:Fail_reclaim with 0 failures - defer stop " + "until after it finishes\n", + DP_RB_RGT(rgt)); + *rcp = 0; + return false; + } + D_INFO(DF_RB ": force-stop in op:Fail_reclaim after %u failures\n", DP_RB_RGT(rgt), + rgt->rgt_num_op_freclaim_fail); + *rcp = 0; return true; + } else if (rgt->rgt_opc == RB_OP_FAIL_RECLAIM) { + D_INFO(DF_RB ": defer stop until after op:Fail_reclaim finishes\n", DP_RB_RGT(rgt)); + *rcp = 0; + return false; + } + + /* NAK if this rebuild is Reclaim (i.e., it's effectively done) */ + if (rgt->rgt_opc == RB_OP_RECLAIM) { + *rcp = -DER_BUSY; + return false; + } + /* Not expected */ + *rcp = -DER_MISC; return false; } @@ -907,34 +955,35 @@ int ds_rebuild_admin_stop(struct ds_pool *pool, uint32_t force) { struct rebuild_global_pool_tracker *rgt; + int rc = 0; /* look up the running rebuild and mark it as aborted (and by the administrator) */ rgt = rebuild_global_pool_tracker_lookup(pool->sp_uuid, -1 /* ver */, -1 /* gen */); - if (rgt == NULL) { - /* nothing running, make it a no-op */ - D_INFO(DF_UUID ": received request to stop rebuild - but nothing found to stop\n", - DP_UUID(pool->sp_uuid)); - return 0; - } - /* admin stop command does not terminate reclaim/fail_reclaim jobs (unless forced) */ - if (rebuild_is_stoppable(rgt, force)) { + /* admin stop command only for specific cases (and force option for failing op:Fail_reclaim) + */ + if (rebuild_is_stoppable(rgt, force, &rc)) { D_INFO(DF_RB ": stopping rebuild force=%u opc %u(%s)\n", DP_RB_RGT(rgt), force, rgt->rgt_opc, RB_OP_STR(rgt->rgt_opc)); rgt->rgt_abort = 1; rgt->rgt_status.rs_errno = -DER_OP_CANCELED; } else { - D_INFO(DF_RB ": NOT stopping rebuild during opc %u(%s)\n", DP_RB_RGT(rgt), - rgt->rgt_opc, RB_OP_STR(rgt->rgt_opc)); + if (rgt) { + D_INFO(DF_RB ": NOT stopping rebuild force=%u opc %u(%s), rc=%d\n", + DP_RB_RGT(rgt), force, rgt->rgt_opc, RB_OP_STR(rgt->rgt_opc), rc); + } else { + DL_INFO(rc, DF_UUID ": nothing found to stop", DP_UUID(pool->sp_uuid)); + return rc; + } } - /* admin stop command does not terminate op:Fail_reclaim, but it is remembered to avoid - * retrying the original op:Rebuild. + /* admin stop command does not usually terminate op:Fail_reclaim, but it is always + * remembered to avoid retrying the original op:Rebuild. */ if (rgt->rgt_abort || (rgt->rgt_opc == RB_OP_FAIL_RECLAIM)) rgt->rgt_stop_admin = 1; rgt_put(rgt); - return 0; + return rc; } /* diff --git a/src/tests/suite/daos_rebuild_common.c b/src/tests/suite/daos_rebuild_common.c index 8407d4af5b0..b0817efdb15 100644 --- a/src/tests/suite/daos_rebuild_common.c +++ b/src/tests/suite/daos_rebuild_common.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2016-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -1246,8 +1246,7 @@ rebuild_stop_with_dmg_internal(const char *cfg, const uuid_t uuid, const char *g rc = dmg_pool_rebuild_stop(cfg, uuid, grp, force); print_message("dmg pool rebuild stop " DF_UUID ", force=%d, rc=%d\n", DP_UUID(uuid), force, rc); - assert_rc_equal(rc, 0); - return 0; + return rc; } /* stop an in-progress rebuild with dmg pool rebuild stop command */ @@ -1255,14 +1254,18 @@ int rebuild_stop_with_dmg(void *data) { test_arg_t *arg = data; + int rc; - print_message("(before stopping) wait for rebuild to start for pool " DF_UUID "\n", - DP_UUID(arg->pool.pool_uuid)); - test_rebuild_wait_to_start(&arg, 1); - sleep(4); - - return rebuild_stop_with_dmg_internal(arg->dmg_config, arg->pool.pool_uuid, arg->group, - false); + /* Rebuild might be only queued (not yet launched) */ + while (true) { + rc = rebuild_stop_with_dmg_internal(arg->dmg_config, arg->pool.pool_uuid, + arg->group, false); + if (rc != -DER_NONEXIST) + break; + print_message("waiting for stop command to run during active rebuild ...\n"); + sleep(1); + } + return rc; } /* stop an in-progress rebuild with dmg pool rebuild stop command (force stop option) */ @@ -1270,14 +1273,18 @@ int rebuild_force_stop_with_dmg(void *data) { test_arg_t *arg = data; + int rc; - print_message("(before stopping) wait for rebuild to start for pool " DF_UUID "\n", - DP_UUID(arg->pool.pool_uuid)); - test_rebuild_wait_to_start(&arg, 1); - sleep(5); - - return rebuild_stop_with_dmg_internal(arg->dmg_config, arg->pool.pool_uuid, arg->group, - true); + /* Rebuild might be only queued (not yet launched) */ + while (true) { + rc = rebuild_stop_with_dmg_internal(arg->dmg_config, arg->pool.pool_uuid, + arg->group, true); + if (rc != -DER_NONEXIST) + break; + print_message("waiting for force-stop command to run during active rebuild ...\n"); + sleep(1); + } + return rc; } /* start/reesume a stopped rebuild with dmg pool rebuild start command */ @@ -1323,7 +1330,7 @@ rebuild_resume_wait_to_start(void *data) rc = rebuild_start_with_dmg(data); assert_rc_equal(rc, 0); - /* Verify that the rebuild is no longer stopped (has been restarted). */ + /* Verify that the current rebuild is no longer stopped (has been restarted). */ test_rebuild_wait_to_start(&arg, 1); return 0; diff --git a/src/tests/suite/daos_rebuild_interactive.c b/src/tests/suite/daos_rebuild_interactive.c index ea4dc200ffb..dd2607eb583 100644 --- a/src/tests/suite/daos_rebuild_interactive.c +++ b/src/tests/suite/daos_rebuild_interactive.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -101,7 +101,6 @@ int_rebuild_snap_update_recs(void **state) ioreq_fini(&req); /* insert rebuild stop|start into the exclude rebuild execution */ - arg->interactive_rebuild = 1; arg->rebuild_cb = rebuild_stop_with_dmg; arg->rebuild_post_cb = rebuild_resume_wait; rebuild_single_pool_target(arg, ranks_to_kill[0], tgt, false); @@ -190,30 +189,28 @@ rebuild_wait_error_reset_fail_cb(void *data) test_arg_t *arg = data; int rc; - print_message("wait until rebuild errors (and starts Fail_reclaim)\n"); + print_message("wait until rebuild starts erroring\n"); test_rebuild_wait_to_error(&arg, 1); - print_message("check rebuild errored, rs_errno=%d (expecting -DER_IO=%d)\n", + print_message("rebuild version %u erroring, check rs_errno=%d (expecting -DER_IO=%d)\n", + arg->pool.pool_info.pi_rebuild_st.rs_version, arg->pool.pool_info.pi_rebuild_st.rs_errno, -DER_IO); assert_int_equal(arg->pool.pool_info.pi_rebuild_st.rs_errno, -DER_IO); - print_message("rebuild error code check passed\n"); print_message("clearing fault injection on all engines\n"); daos_debug_set_params(arg->group, -1, DMG_KEY_FAIL_LOC, 0, 0, NULL); daos_debug_set_params(arg->group, -1, DMG_KEY_FAIL_VALUE, 0, 0, NULL); daos_debug_set_params(arg->group, -1, DMG_KEY_FAIL_NUM, 0, 0, NULL); - /* Give time for transition from op:Rebuild into op:Fail_reclaim */ - sleep(2); + print_message("wait until Fail_reclaim starts\n"); + test_rebuild_wait_to_start_lower(&arg, 1); print_message( "send rebuild stop --force request during first/only Fail_reclaim operation\n"); rc = rebuild_force_stop_with_dmg(data); - if (rc != 0) - print_message("rebuild_force_stop_with_dmg failed, rc=%d\n", rc); + assert_rc_equal(rc, 0); - print_message("wait for rebuild to be stopped\n"); - test_rebuild_wait(&arg, 1); - /* Verifying rs_state/rs_errno will happen in post_cb rebuild_resume_wait() */ + /* Wait for stop, verify rs_state/rs_errno happens in rebuild_post_cb rebuild_resume_wait() + */ return rc; } @@ -231,7 +228,6 @@ int_rebuild_many_objects_with_failure(void **state) return; T_BEGIN(); - arg->interactive_rebuild = 1; D_ALLOC_ARRAY(oids, NUM_OBJS); for (i = 0; i < NUM_OBJS; i++) { char buffer[256]; @@ -257,11 +253,9 @@ int_rebuild_many_objects_with_failure(void **state) } /* For interactive rebuild, we need: - * 1. trigger rebuild (which will fail), query pool reubild state until op:Rebuild fails - * and op:Fail_reclaim begins. See test_rebuild_wait_to_error(). - * 2. Then, while rebuild is in op:Fail_reclaim, issue dmg system stop to test that you - * can't stop during Fail_reclaim (though the command will take effect by not retrying - * rebuild). + * 1. trigger rebuild (which will fail), wait until op:Fail_reclaim begins. + * 2. During op:Fail_reclaim, issue dmg system stop (test that stop does not interrupt + * reclaim, but takes effect by not retrying the rebuild. */ arg->rebuild_cb = rebuild_wait_error_reset_fail_cb; arg->rebuild_post_cb = rebuild_resume_wait; @@ -370,24 +364,30 @@ int_drain_fail_and_retry_objects(void **state) arg->no_rebuild = 1; drain_single_pool_rank(arg, ranks_to_kill[0], false); + arg->no_rebuild = 0; print_message("wait drain to fail and exit\n"); /* NB: could be better to wait (in drain_single_pool_rank or test_rebuild_wait), but that * requires new logic in rebuild_task_complete_schedule() to update state after * Fail_reclaim */ - print_message("wait for op:Reclaim to get -DER_IO\n"); + print_message("wait for drain reubild to get -DER_IO\n"); test_rebuild_wait_to_error(&arg, 1); - print_message("sleep for op:Fail_reclaim to run\n"); - sleep(30); - arg->no_rebuild = 0; + print_message("wait for op:Fail_reclaim to start\n"); + test_rebuild_wait_to_start_lower(&arg, 1); + print_message("clear fault injection on all engines and wait for retry rebuild\n"); daos_debug_set_params(arg->group, -1, DMG_KEY_FAIL_LOC, 0, 0, NULL); + test_rebuild_wait_to_start_next(&arg, 1); + print_message("drain rebuild retry started, version=%u\n", + arg->pool.pool_info.pi_rebuild_st.rs_version); rebuild_io_validate(arg, oids, OBJ_NR); arg->interactive_rebuild = 1; arg->rebuild_cb = reintegrate_inflight_io; arg->rebuild_cb_arg = &oids[OBJ_NR - 1]; + print_message("inflight IO during drain (that will be stopped/restarted)\n"); drain_single_pool_rank(arg, ranks_to_kill[0], false); + print_message("final data verification\n"); rebuild_io_validate(arg, oids, OBJ_NR); reintegrate_inflight_io_verify(arg); T_END(); @@ -421,8 +421,10 @@ int_extend_drain_cb_internal(void *arg) test_arg->interactive_rebuild ? "stop rebuild before " : "", opc, extend_drain_opstrs[opc]); - if (test_arg->interactive_rebuild) - rebuild_stop_with_dmg(arg); + if (test_arg->interactive_rebuild) { + rc = rebuild_stop_with_dmg(arg); + assert_rc_equal(rc, 0); + } /* Kill another rank during extend */ switch (opc) { @@ -526,19 +528,18 @@ int_extend_cb_internal(void *arg) daos_anchor_t anchor = {0}; bool do_stop = (!cb_arg->kill && test_arg->interactive_rebuild); const char *pre_op = (cb_arg->kill ? "kill" : "extend"); - daos_pool_info_t pinfo = {0}; int rc; int i; - /* get rebuild version for first extend, so we can wait for second rebuild to start - * (by waiting for an in-progress rebuild with version > pinfo.pi_rebuild_st.rs_version) + /* wait for first extend, and (as post-effect) get rebuild version so we can wait for + * the second rebuild to start (by waiting for a rebuild with version > first rs_version) */ - pinfo.pi_bits = DPI_REBUILD_STATUS; - rc = test_pool_get_info(test_arg, &pinfo, NULL /* engine_ranks */); - assert_rc_equal(rc, 0); - + print_message("before waiting for rebuild to start, pmap_ver=%u, rs_version=%u\n", + test_arg->pool.pool_info.pi_map_ver, + test_arg->pool.pool_info.pi_rebuild_st.rs_version); + test_rebuild_wait_to_start_next(&test_arg, 1); print_message("Extending (rs_version=%u), sleep 10, %s rank %u, %sand start op %d (%s)\n", - pinfo.pi_rebuild_st.rs_version, pre_op, cb_arg->rank, + test_arg->pool.pool_info.pi_rebuild_st.rs_version, pre_op, cb_arg->rank, do_stop ? "stop rebuild, " : "", opc, extend_opstrs[opc]); sleep(10); @@ -558,10 +559,14 @@ int_extend_cb_internal(void *arg) if (do_stop) { daos_debug_set_params(test_arg->group, -1, DMG_KEY_FAIL_LOC, 0, 0, NULL); - test_rebuild_wait_to_start_after_ver( - &test_arg, 1, - pinfo.pi_rebuild_st.rs_version /* original extend rebuild version */); - rebuild_stop_with_dmg(arg); /* then stop the new rebuild */ + print_message("before waiting for rebuild to start, pmap_ver=%u, rs_version=%u\n", + test_arg->pool.pool_info.pi_map_ver, + test_arg->pool.pool_info.pi_rebuild_st.rs_version); + test_rebuild_wait_to_start_next(&test_arg, 1); + print_message("rebuild version=%u running\n", + test_arg->pool.pool_info.pi_rebuild_st.rs_version); + rc = rebuild_stop_with_dmg(arg); + assert_rc_equal(rc, 0); test_rebuild_wait_to_error(&test_arg, 1); } @@ -639,11 +644,8 @@ static void int_rebuild_dkeys_stop_failing(void **state) { test_arg_t *arg = *state; - daos_pool_info_t pinfo = {0}; d_rank_t kill_rank = 0; int kill_rank_nr; - uint32_t excl_rebuild_ver; - uint32_t reclaim_rebuild_ver; daos_obj_id_t oid; struct ioreq req; int i; @@ -677,6 +679,11 @@ int_rebuild_dkeys_stop_failing(void **state) insert_recxs(key, "a_key_1M", 1, DAOS_TX_NONE, &recx, 1, data, DATA_SIZE, &req); } + /* Quick check that rebuild stop will return -DER_NONEXIST if nothing is rebuilding */ + rc = dmg_pool_rebuild_stop(arg->dmg_config, arg->pool.pool_uuid, arg->group, + false /* force */); + assert_int_equal(rc, -DER_NONEXIST); + get_killing_rank_by_oid(arg, oid, 1, 0, &kill_rank, &kill_rank_nr); ioreq_fini(&req); @@ -687,32 +694,28 @@ int_rebuild_dkeys_stop_failing(void **state) DAOS_REBUILD_OBJ_FAIL | DAOS_FAIL_ALWAYS, 0, NULL); } - /* Trigger exclude and rebuild, fail twice, force-stop it during the second Fail_reclaim */ + /* Trigger exclude and rebuild, fail twice, force-stop command during second Fail_reclaim + * NB: stop will be deferred until after Fail_reclaim (since it did not fail). + */ arg->no_rebuild = 1; rebuild_single_pool_target(arg, kill_rank, -1, false); arg->no_rebuild = 0; + print_message("before waiting for rebuild to start, pmap_ver=%u, rs_version=%u\n", + arg->pool.pool_info.pi_map_ver, arg->pool.pool_info.pi_rebuild_st.rs_version); test_rebuild_wait_to_start(&arg, 1); - pinfo.pi_bits = DPI_REBUILD_STATUS; - rc = test_pool_get_info(arg, &pinfo, NULL /* engine_ranks */); - assert_rc_equal(rc, 0); - excl_rebuild_ver = pinfo.pi_rebuild_st.rs_version; print_message("Wait for exclude rebuild ver %u to fail (and start Fail_reclaim)\n", - excl_rebuild_ver); - test_rebuild_wait_to_start_before_ver(&arg, 1, excl_rebuild_ver); - rc = test_pool_get_info(arg, &pinfo, NULL /* engine_ranks */); - assert_rc_equal(rc, 0); - reclaim_rebuild_ver = pinfo.pi_rebuild_st.rs_version; - + arg->pool.pool_info.pi_rebuild_st.rs_version); + test_rebuild_wait_to_start_lower(&arg, 1); print_message("Wait for Fail_reclaim to finish (and start retry of exclude rebuild)\n"); - test_rebuild_wait_to_start_after_ver(&arg, 1, reclaim_rebuild_ver); + test_rebuild_wait_to_start_next(&arg, 1); print_message("Wait for second exclude rebuild to fail (and start Fail_reclaim)\n"); - test_rebuild_wait_to_start_before_ver(&arg, 1, excl_rebuild_ver); - sleep(2); + test_rebuild_wait_to_start_lower(&arg, 1); print_message("Force-stop runaway failing exclude rebuild retries\n"); rc = rebuild_force_stop_with_dmg(arg); assert_rc_equal(rc, 0); + print_message("Waiting for exclude rebuild to stop\n"); test_rebuild_wait(&arg, 1); assert_int_equal(arg->pool.pool_info.pi_rebuild_st.rs_state, DRS_NOT_STARTED); assert_int_equal(arg->pool.pool_info.pi_rebuild_st.rs_errno, -DER_OP_CANCELED); @@ -723,8 +726,7 @@ int_rebuild_dkeys_stop_failing(void **state) /* Do not restart the rebuild ; instead, go directly to reintegrate the rank */ reintegrate_with_inflight_io(arg, &oid, kill_rank, -1); rc = daos_obj_verify(arg->coh, oid, DAOS_EPOCH_MAX); - if (rc != 0) - assert_rc_equal(rc, -DER_NOSYS); + assert_rc_equal(rc, 0); T_END(); } diff --git a/src/tests/suite/daos_test.h b/src/tests/suite/daos_test.h index 33353a3cb2f..501a943c942 100644 --- a/src/tests/suite/daos_test.h +++ b/src/tests/suite/daos_test.h @@ -421,9 +421,9 @@ void test_rebuild_wait(test_arg_t **args, int args_cnt); void test_rebuild_wait_to_start(test_arg_t **args, int args_cnt); void -test_rebuild_wait_to_start_after_ver(test_arg_t **args, int args_cnt, uint32_t rs_version); +test_rebuild_wait_to_start_next(test_arg_t **args, int args_cnt); void -test_rebuild_wait_to_start_before_ver(test_arg_t **args, int args_cnt, uint32_t rs_version); +test_rebuild_wait_to_start_lower(test_arg_t **args, int args_cnt); void test_rebuild_wait_to_error(test_arg_t **args, int args_cnt); int daos_pool_set_prop(const uuid_t pool_uuid, const char *name, diff --git a/src/tests/suite/daos_test_common.c b/src/tests/suite/daos_test_common.c index 437a3e13ab0..4716fb646d8 100644 --- a/src/tests/suite/daos_test_common.c +++ b/src/tests/suite/daos_test_common.c @@ -867,7 +867,13 @@ rebuild_pool_started_after_ver(test_arg_t *arg, uint32_t rs_version) "(waiting for > %d)\n", DP_UUID(arg->pool.pool_uuid), in_progress ? "" : "not yet ", rst->rs_version, rs_version); - return in_progress && (rst->rs_version > rs_version); + if (in_progress && (rst->rs_version > rs_version)) { + /* save final pool query info to be able to inspect rebuild status */ + memcpy(&arg->pool.pool_info, &pinfo, sizeof(pinfo)); + + return true; + } + return false; } } @@ -889,11 +895,17 @@ rebuild_pool_started_before_ver(test_arg_t *arg, uint32_t rs_version) return false; } else { bool in_progress = (rst->rs_state == DRS_IN_PROGRESS); + print_message("rebuild for pool " DF_UUIDF "has %sstarted, rs_version=%u " "(waiting for < %d)\n", DP_UUID(arg->pool.pool_uuid), in_progress ? "" : "not yet ", rst->rs_version, rs_version); - return in_progress && (rst->rs_version < rs_version); + if (in_progress && (rst->rs_version < rs_version)) { + /* save final pool query info to be able to inspect rebuild status */ + memcpy(&arg->pool.pool_info, &pinfo, sizeof(pinfo)); + return true; + } + return false; } } @@ -999,8 +1011,8 @@ test_get_last_svr_rank(test_arg_t *arg) return arg->srv_nnodes - disable_nodes - 1; } -bool -test_rebuild_started_after_ver(test_arg_t **args, int args_cnt, uint32_t rs_version) +static bool +test_rebuild_started_before(test_arg_t **args, int args_cnt, uint32_t *cur_versions) { bool all_started = true; int i; @@ -1009,7 +1021,7 @@ test_rebuild_started_after_ver(test_arg_t **args, int args_cnt, uint32_t rs_vers bool started = true; if (!args[i]->pool.destroyed) - started = rebuild_pool_started_after_ver(args[i], rs_version); + started = rebuild_pool_started_before_ver(args[i], cur_versions[i]); if (!started) all_started = false; @@ -1017,8 +1029,8 @@ test_rebuild_started_after_ver(test_arg_t **args, int args_cnt, uint32_t rs_vers return all_started; } -bool -test_rebuild_started_before_ver(test_arg_t **args, int args_cnt, uint32_t rs_version) +static bool +test_rebuild_started_after(test_arg_t **args, int args_cnt, uint32_t *cur_versions) { bool all_started = true; int i; @@ -1027,7 +1039,7 @@ test_rebuild_started_before_ver(test_arg_t **args, int args_cnt, uint32_t rs_ver bool started = true; if (!args[i]->pool.destroyed) - started = rebuild_pool_started_before_ver(args[i], rs_version); + started = rebuild_pool_started_after_ver(args[i], cur_versions[i]); if (!started) all_started = false; @@ -1035,25 +1047,67 @@ test_rebuild_started_before_ver(test_arg_t **args, int args_cnt, uint32_t rs_ver return all_started; } +/* wait until pools start rebuilds with rs_version < current (e.g.,. expecting op:Fail_reclaim) */ void -test_rebuild_wait_to_start(test_arg_t **args, int args_cnt) +test_rebuild_wait_to_start_lower(test_arg_t **args, int args_cnt) { - while (!test_rebuild_started_after_ver(args, args_cnt, 0 /* don't care rs_version */)) + uint32_t *cur_versions; + int i; + + D_ALLOC_ARRAY(cur_versions, args_cnt); + assert_true(cur_versions != NULL); + for (i = 0; i < args_cnt; i++) + cur_versions[i] = args[i]->pool.pool_info.pi_rebuild_st.rs_version; + + while (!test_rebuild_started_before(args, args_cnt, cur_versions)) sleep(2); + + /* NB: when control reaches here, each pool's current rs_version has been updated + * (for subsequent calls that will rely on it as a baseline) + */ + D_FREE(cur_versions); } +/* wait until pools start rebuilds with rs_version > current (e.g.,. expecting op:Rebuild) */ void -test_rebuild_wait_to_start_after_ver(test_arg_t **args, int args_cnt, uint32_t rs_version) +test_rebuild_wait_to_start_next(test_arg_t **args, int args_cnt) { - while (!test_rebuild_started_after_ver(args, args_cnt, rs_version)) + uint32_t *cur_versions; + int i; + + D_ALLOC_ARRAY(cur_versions, args_cnt); + assert_true(cur_versions != NULL); + for (i = 0; i < args_cnt; i++) + cur_versions[i] = args[i]->pool.pool_info.pi_rebuild_st.rs_version; + + while (!test_rebuild_started_after(args, args_cnt, cur_versions)) sleep(2); + + /* NB: when control reaches here, each pool's current rs_version has been updated + * (for subsequent calls that will rely on it as a baseline) + */ + D_FREE(cur_versions); } +/* wait until pools start rebuilds with any rs_version > 0 (whatever is current) */ void -test_rebuild_wait_to_start_before_ver(test_arg_t **args, int args_cnt, uint32_t rs_version) +test_rebuild_wait_to_start(test_arg_t **args, int args_cnt) { - while (!test_rebuild_started_before_ver(args, args_cnt, rs_version)) + uint32_t *cur_versions; + int i; + + D_ALLOC_ARRAY(cur_versions, args_cnt); + assert_true(cur_versions != NULL); + for (i = 0; i < args_cnt; i++) + cur_versions[i] = 0; + + while (!test_rebuild_started_after(args, args_cnt, cur_versions)) sleep(2); + + /* NB: when control reaches here, each pool's current rs_version has been updated + * (for subsequent calls that will rely on it as a baseline) + */ + D_FREE(cur_versions); } bool From 1b03543334017aae7a1127d482b62913855d1178 Mon Sep 17 00:00:00 2001 From: Phil Henderson Date: Fri, 13 Feb 2026 18:16:07 -0500 Subject: [PATCH 207/253] DAOS-18576 test: Fix test pool properties. (#17553) Additional tests that need to use the old default pool properties updated by commit 291b503. Signed-off-by: Phil Henderson --- src/tests/ftest/erasurecode/cell_size_property.py | 9 +++++---- src/tests/ftest/nvme/io.yaml | 2 +- src/tests/ftest/nvme/pool_extend.py | 3 ++- src/tests/ftest/server/metadata.py | 4 ++-- src/tests/ftest/server/metadata.yaml | 2 +- 5 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/tests/ftest/erasurecode/cell_size_property.py b/src/tests/ftest/erasurecode/cell_size_property.py index e8e23608cd7..dcfceababb1 100644 --- a/src/tests/ftest/erasurecode/cell_size_property.py +++ b/src/tests/ftest/erasurecode/cell_size_property.py @@ -1,6 +1,6 @@ ''' (C) Copyright 2020-2023 Intel Corporation. - (C) Copyright 2025 Hewlett Packard Enterprise Development LP + (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent ''' @@ -57,10 +57,10 @@ def test_ec_pool_property(self): for pool_cell_size in pool_cell_sizes: # Create the pool - self.pool = self.get_pool(properties=f"ec_cell_sz:{pool_cell_size}") + self.pool = self.get_pool(properties=f"rd_fac:0,space_rb:0,ec_cell_sz:{pool_cell_size}") # Verify pool EC cell size - pool_prop_expected = int(self.pool.properties.value.split(":")[1]) + pool_prop_expected = int(self.pool.properties.value.split(",")[-1].split(":")[1]) self.assertEqual( pool_prop_expected, self.pool.get_property("ec_cell_sz"), "pool get-prop ec_cell_sz does not match set property") @@ -72,7 +72,8 @@ def test_ec_pool_property(self): # Use the default pool property for container and do not update if cont_cell != pool_prop_expected: - self.container.properties.update(f"ec_cell_sz:{cont_cell}") + self.container.properties.update( + f"cksum:off,srv_cksum:off,ec_cell_sz:{cont_cell}") # Create the container and open handle self.container.create() diff --git a/src/tests/ftest/nvme/io.yaml b/src/tests/ftest/nvme/io.yaml index d671c4f84d9..972149d633b 100644 --- a/src/tests/ftest/nvme/io.yaml +++ b/src/tests/ftest/nvme/io.yaml @@ -13,7 +13,7 @@ server_config: storage: auto pool: - properties: reclaim:disabled + properties: rd_fac:0,space_rb:0,reclaim:disabled container: control_method: daos diff --git a/src/tests/ftest/nvme/pool_extend.py b/src/tests/ftest/nvme/pool_extend.py index 876050a8561..485c6b3ee88 100644 --- a/src/tests/ftest/nvme/pool_extend.py +++ b/src/tests/ftest/nvme/pool_extend.py @@ -1,5 +1,6 @@ """ (C) Copyright 2020-2023 Intel Corporation. + (C) Copyright 2026 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -54,7 +55,7 @@ def run_nvme_pool_extend(self, num_pool, oclass=None): pools = [] for _ in range(0, num_pool): pools.append(self.get_pool(namespace="/run/pool_qty_{}/*".format(num_pool), - properties="reclaim:disabled")) + properties="rd_fac:0,space_rb:0,reclaim:disabled")) # On each pool (max 3), extend the ranks # eg: ranks : 4,5 ; 6,7; 8,9. diff --git a/src/tests/ftest/server/metadata.py b/src/tests/ftest/server/metadata.py index 6d86273c5ce..958686c2f12 100644 --- a/src/tests/ftest/server/metadata.py +++ b/src/tests/ftest/server/metadata.py @@ -1,6 +1,6 @@ """ (C) Copyright 2019-2024 Intel Corporation. - (C) Copyright 2025 Hewlett Packard Enterprise Development LP + (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -81,7 +81,7 @@ def create_pool(self, svc_ops_enabled=True): self.add_pool() else: params = {} - params['properties'] = "svc_ops_enabled:0" + params['properties'] = "rd_fac:0,space_rb:0,svc_ops_enabled:0" self.add_pool(**params) self.log.info("Created %s: svc ranks:", str(self.pool)) for index, rank in enumerate(self.pool.svc_ranks): diff --git a/src/tests/ftest/server/metadata.yaml b/src/tests/ftest/server/metadata.yaml index 93ca27ab35b..b9ce987770e 100644 --- a/src/tests/ftest/server/metadata.yaml +++ b/src/tests/ftest/server/metadata.yaml @@ -45,7 +45,7 @@ server_config: pool: svcn: 5 scm_size: 1G - properties: svc_ops_entry_age:60 + properties: rd_fac:0,space_rb:0,svc_ops_entry_age:60 # Uncomment the following for manual test with different svc_ops_entry_age value # properties: svc_ops_entry_age:150 # properties: svc_ops_entry_age:300 From b8236d09bf92d3210826b64ab099b23b731a9f09 Mon Sep 17 00:00:00 2001 From: Tom Nabarro Date: Tue, 17 Feb 2026 16:10:35 +0000 Subject: [PATCH 208/253] DAOS-18567 bio: Set SPDK/DPDK engine log level via env (#17497) Allow administrator to configure the engine process log levels for SPDK and DPDK via environment variables DAOS_[SD]PDK_LOG_LEVEL. SPDK accepts 0-4 (ERR,WARN,NOTICE,INFO,DEBUG) and DPDK accepts 1-8. Signed-off-by: Tom Nabarro --- ci/test_files_to_stash.txt | 1 + src/bio/bio_xstream.c | 35 ++++++- src/common/control.c | 104 +++++++++++++------- src/common/tests/SConscript | 5 + src/common/tests/dpdk_cli_opts_tests.c | 126 +++++++++++++++++++++++++ src/control/lib/spdk/spdk_default.go | 4 +- src/include/daos_srv/control.h | 40 ++++++-- utils/utest.yaml | 3 +- 8 files changed, 271 insertions(+), 47 deletions(-) create mode 100644 src/common/tests/dpdk_cli_opts_tests.c diff --git a/ci/test_files_to_stash.txt b/ci/test_files_to_stash.txt index a36ef0a13c4..7214ed71499 100755 --- a/ci/test_files_to_stash.txt +++ b/ci/test_files_to_stash.txt @@ -11,6 +11,7 @@ build/*/*/src/common/tests/btree, build/*/*/src/common/tests/common_test, build/*/*/src/common/tests/sched, build/*/*/src/common/tests/drpc_tests, +build/*/*/src/common/tests/control_tests, build/*/*/src/common/tests/acl_api_tests, build/*/*/src/common/tests/acl_valid_tests, build/*/*/src/common/tests/acl_util_tests, diff --git a/src/bio/bio_xstream.c b/src/bio/bio_xstream.c index 44ec22b12f8..3fe1dbdb319 100644 --- a/src/bio/bio_xstream.c +++ b/src/bio/bio_xstream.c @@ -151,15 +151,44 @@ static int bio_spdk_env_init(void) { struct spdk_env_opts opts; + const char *dpdk_opts; + unsigned int spdk_level = DAOS_SPDK_LOG_DEFAULT; + unsigned int dpdk_level = DAOS_DPDK_LOG_DEFAULT; int rc; - /* Only print error and more severe to stderr. */ - spdk_log_set_print_level(SPDK_LOG_ERROR); + /* Check for SPDK log level from environment */ + d_getenv_uint("DAOS_SPDK_LOG_LEVEL", &spdk_level); + if (spdk_level > DAOS_SPDK_LOG_MAX) { + D_WARN("Invalid DAOS_DPDK_LOG_LEVEL=%u, using default (%u)\n", dpdk_level, + DAOS_SPDK_LOG_DEFAULT); + spdk_level = DAOS_SPDK_LOG_DEFAULT; + } + + /* Check for DPDK log level from environment */ + d_getenv_uint("DAOS_DPDK_LOG_LEVEL", &dpdk_level); + if (dpdk_level < DAOS_DPDK_LOG_MIN || dpdk_level > DAOS_DPDK_LOG_MAX) { + D_WARN("Invalid DAOS_DPDK_LOG_LEVEL=%u, using default (%u)\n", dpdk_level, + DAOS_DPDK_LOG_DEFAULT); + dpdk_level = DAOS_DPDK_LOG_DEFAULT; + } + + D_INFO("SPDK log level: %u, DPDK log level: %u\n", spdk_level, dpdk_level); + + /* Set SPDK log print level to configured value */ + spdk_log_set_print_level(spdk_level); + + /* Build DPDK options with specified log level for all DPDK log facilities */ + dpdk_opts = dpdk_cli_build_opts(dpdk_level, dpdk_level); + if (dpdk_opts == NULL) { + D_ERROR("Failed to build DPDK options\n"); + rc = -DER_NOMEM; + goto out; + } opts.opts_size = sizeof(opts); spdk_env_opts_init(&opts); opts.name = "daos_engine"; - opts.env_context = (char *)dpdk_cli_override_opts; + opts.env_context = (char *)dpdk_opts; /** * TODO: Set opts.mem_size to nvme_glb.bd_mem_size diff --git a/src/common/control.c b/src/common/control.c index 8977667bc9c..75f5ee7db0d 100644 --- a/src/common/control.c +++ b/src/common/control.c @@ -1,50 +1,21 @@ /** * (C) Copyright 2020-2021 Intel Corporation. + * (C) Copyright 2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ /** * This file implements functions shared with the control-plane. */ + #include #include #include +#include +#include -/* - * Disable DPDK telemetry to avoid socket file clashes and quiet DPDK - * logging by setting specific facility masks. - */ -const char * -dpdk_cli_override_opts = "--log-level=lib.eal:4 " - "--log-level=lib.malloc:4 " - "--log-level=lib.ring:4 " - "--log-level=lib.mempool:4 " - "--log-level=lib.timer:4 " - "--log-level=pmd:4 " - "--log-level=lib.hash:4 " - "--log-level=lib.lpm:4 " - "--log-level=lib.kni:4 " - "--log-level=lib.acl:4 " - "--log-level=lib.power:4 " - "--log-level=lib.meter:4 " - "--log-level=lib.sched:4 " - "--log-level=lib.port:4 " - "--log-level=lib.table:4 " - "--log-level=lib.pipeline:4 " - "--log-level=lib.mbuf:4 " - "--log-level=lib.cryptodev:4 " - "--log-level=lib.efd:4 " - "--log-level=lib.eventdev:4 " - "--log-level=lib.gso:4 " - "--log-level=user1:4 " - "--log-level=user2:4 " - "--log-level=user3:4 " - "--log-level=user4:4 " - "--log-level=user5:4 " - "--log-level=user6:4 " - "--log-level=user7:4 " - "--log-level=user8:4 " - "--no-telemetry"; +/* Buffer to hold dynamically generated DPDK CLI options */ +static char dpdk_cli_opts_buffer[2048]; int copy_ascii(char *dst, size_t dst_sz, const void *src, size_t src_sz) @@ -72,3 +43,66 @@ copy_ascii(char *dst, size_t dst_sz, const void *src, size_t src_sz) return 0; } + +/** + * Build DPDK CLI options string with per-facility log levels. Function is not thread safe. + * + * \param eal_level Log level for Environment Abstraction Layer facility (1-8) + * \param default_level Default log level for other facilities (1-8) + * + * \return Pointer to static buffer containing DPDK CLI options string, + * or NULL on error. + */ +const char * +dpdk_cli_build_opts(int eal_level, int default_level) +{ + int ret; + + /* Validate log levels */ + if (eal_level < 1 || eal_level > 8 || default_level < 1 || default_level > 8) + return NULL; + + /* Build with custom EAL level, others at default */ + ret = snprintf(dpdk_cli_opts_buffer, sizeof(dpdk_cli_opts_buffer), + "--log-level=lib.eal:%d " + "--log-level=lib.malloc:%d " + "--log-level=lib.ring:%d " + "--log-level=lib.mempool:%d " + "--log-level=lib.timer:%d " + "--log-level=pmd:%d " + "--log-level=lib.hash:%d " + "--log-level=lib.lpm:%d " + "--log-level=lib.kni:%d " + "--log-level=lib.acl:%d " + "--log-level=lib.power:%d " + "--log-level=lib.meter:%d " + "--log-level=lib.sched:%d " + "--log-level=lib.port:%d " + "--log-level=lib.table:%d " + "--log-level=lib.pipeline:%d " + "--log-level=lib.mbuf:%d " + "--log-level=lib.cryptodev:%d " + "--log-level=lib.efd:%d " + "--log-level=lib.eventdev:%d " + "--log-level=lib.gso:%d " + "--log-level=user1:%d " + "--log-level=user2:%d " + "--log-level=user3:%d " + "--log-level=user4:%d " + "--log-level=user5:%d " + "--log-level=user6:%d " + "--log-level=user7:%d " + "--log-level=user8:%d " + "--no-telemetry", + eal_level, default_level, default_level, default_level, default_level, + default_level, default_level, default_level, default_level, default_level, + default_level, default_level, default_level, default_level, default_level, + default_level, default_level, default_level, default_level, default_level, + default_level, default_level, default_level, default_level, default_level, + default_level, default_level, default_level, default_level); + + if (ret < 0 || ret >= sizeof(dpdk_cli_opts_buffer)) + return NULL; + + return dpdk_cli_opts_buffer; +} diff --git a/src/common/tests/SConscript b/src/common/tests/SConscript index fedea1b9915..49a15f9deac 100644 --- a/src/common/tests/SConscript +++ b/src/common/tests/SConscript @@ -77,6 +77,11 @@ def scons(): ['drpc_tests.c', '../drpc.c', '../drpc.pb-c.c', mock_test_utils], LIBS=['protobuf-c', 'daos_common', 'gurt', 'cmocka']) + Depends('control_tests', common_mock_ld_script) + unit_env.d_test_program('control_tests', + ['dpdk_cli_opts_tests.c', '../control.c', mock_test_utils], + LIBS=['daos_common', 'gurt', 'cmocka']) + if __name__ == "SCons.Script": scons() diff --git a/src/common/tests/dpdk_cli_opts_tests.c b/src/common/tests/dpdk_cli_opts_tests.c new file mode 100644 index 00000000000..503623ea729 --- /dev/null +++ b/src/common/tests/dpdk_cli_opts_tests.c @@ -0,0 +1,126 @@ +/** + * (C) Copyright 2026 Hewlett Packard Enterprise Development LP + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ + +#include +#include +#include +#include +#include + +#include + +/* Test dpdk_cli_build_opts with valid log levels */ +static void +test_dpdk_cli_build_opts_valid(void **state) +{ + const char *opts; + int log_level; + + /* Test each valid log level */ + for (log_level = 1; log_level <= 8; log_level++) { + opts = dpdk_cli_build_opts(log_level, log_level); + assert_non_null(opts); + + /* Verify the string contains the correct log level */ + char expected[64]; + snprintf(expected, sizeof(expected), "--log-level=lib.eal:%d ", log_level); + assert_non_null(strstr(opts, expected)); + + /* Verify it contains --no-telemetry */ + assert_non_null(strstr(opts, "--no-telemetry")); + } +} + +/* Test dpdk_cli_build_opts with invalid log levels */ +static void +test_dpdk_cli_build_opts_invalid(void **state) +{ + const char *opts; + + /* Test below minimum */ + opts = dpdk_cli_build_opts(0, 1); + assert_null(opts); + + /* Test above maximum */ + opts = dpdk_cli_build_opts(9, 1); + assert_null(opts); + + /* Test negative */ + opts = dpdk_cli_build_opts(-1, 1); + + /* Test the same for the second input */ + + opts = dpdk_cli_build_opts(1, 0); + assert_null(opts); + + opts = dpdk_cli_build_opts(1, 9); + assert_null(opts); + + opts = dpdk_cli_build_opts(1, -1); + assert_null(opts); +} + +/* Test dpdk_cli_build_opts_selective */ +static void +test_dpdk_cli_build_opts_selective(void **state) +{ + const char *opts; + + /* Test EAL at DEBUG, others at ERROR */ + opts = dpdk_cli_build_opts(8, 4); + assert_non_null(opts); + + /* Verify EAL is at level 8 */ + assert_non_null(strstr(opts, "--log-level=lib.eal:8 ")); + + /* Verify malloc is at level 4 */ + assert_non_null(strstr(opts, "--log-level=lib.malloc:4 ")); +} + +/* Test that different log levels produce different strings */ +static void +test_dpdk_cli_build_opts_different_levels(void **state) +{ + const char *tmp; + char opts4[2048]; + const char *opts8; + + /** + * Returned will be the single string buffer and it will be overridden on each call to the + * function so copy to a local buffer before comparison. + */ + tmp = dpdk_cli_build_opts(4, 4); + strcpy(opts4, tmp); + opts8 = dpdk_cli_build_opts(8, 8); + + assert_non_null(opts4); + assert_non_null(opts8); + assert_non_null(tmp); + + /* Should be different strings */ + assert_string_not_equal(opts4, opts8); + + /* opts4 should have ":4 " */ + assert_non_null(strstr(opts4, ":4 ")); + assert_null(strstr(opts4, ":8 ")); + + /* opts8 should have ":8 " */ + assert_non_null(strstr(opts8, ":8 ")); + assert_null(strstr(opts8, ":4 ")); +} + +int +main(void) +{ + const struct CMUnitTest tests[] = { + cmocka_unit_test(test_dpdk_cli_build_opts_valid), + cmocka_unit_test(test_dpdk_cli_build_opts_invalid), + cmocka_unit_test(test_dpdk_cli_build_opts_selective), + cmocka_unit_test(test_dpdk_cli_build_opts_different_levels), + }; + + return cmocka_run_group_tests(tests, NULL, NULL); +} diff --git a/src/control/lib/spdk/spdk_default.go b/src/control/lib/spdk/spdk_default.go index 77f382f0268..4f755d4d568 100644 --- a/src/control/lib/spdk/spdk_default.go +++ b/src/control/lib/spdk/spdk_default.go @@ -1,5 +1,6 @@ // // (C) Copyright 2022 Intel Corporation. +// (C) Copyright 2026 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -98,7 +99,8 @@ func (ei *EnvImpl) InitSPDKEnv(log logging.Logger, opts *EnvOptions) error { C.setArrayString(cAllowList, C.CString(s), C.int(i)) } - envCtx := C.dpdk_cli_override_opts + // Use default logging level for all DPDK facilities. + envCtx := C.dpdk_cli_build_opts(C.DAOS_DPDK_LOG_DEFAULT, C.DAOS_DPDK_LOG_DEFAULT) retPtr := C.daos_spdk_init(0, envCtx, C.ulong(opts.PCIAllowList.Len()), cAllowList) diff --git a/src/include/daos_srv/control.h b/src/include/daos_srv/control.h index fa0d64cb623..b977fb69f7f 100644 --- a/src/include/daos_srv/control.h +++ b/src/include/daos_srv/control.h @@ -1,5 +1,6 @@ /** * (C) Copyright 2020-2024 Intel Corporation. + * (C) Copyright 2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -17,13 +18,6 @@ #include #include -/** - * Space separated string of CLI options to pass to DPDK when started during - * spdk_env_init(). These options will override the DPDK defaults. - */ -extern const char * -dpdk_cli_override_opts; - #define NVME_PCI_DEV_TYPE_VMD "vmd" #define NVME_DETAIL_BUFLEN 1024 /** @@ -74,6 +68,17 @@ dpdk_cli_override_opts; #define NVME_ROLE_ALL (NVME_ROLE_DATA | NVME_ROLE_META | NVME_ROLE_WAL) +/* Default SPDK log level (one of ERROR,WARN,NOTICE,INFO,DEBUG) */ +#define DAOS_SPDK_LOG_DEFAULT SPDK_LOG_ERROR +/* Max SPDK log level */ +#define DAOS_SPDK_LOG_MAX SPDK_LOG_DEBUG +/* Default DPDK log level: RTE_LOG_ERR (dpdk/lib/eal/include/rte_log.h) */ +#define DAOS_DPDK_LOG_DEFAULT 4 +/* Min DPDK log level: RTE_LOG_EMERG */ +#define DAOS_DPDK_LOG_MIN 1 +/* Max DPDK log level: RTE_LOG_MAX */ +#define DAOS_DPDK_LOG_MAX 8 + /** * Current device health state (health statistics). Periodically updated in * bio_bs_monitor(). Used to determine faulty device status. @@ -169,4 +174,25 @@ struct nvme_ns_t { * \return Zero on success, negative value on error */ int copy_ascii(char *dst, size_t dst_sz, const void *src, size_t src_sz); + +/** + * Build DPDK CLI options string with per-facility log levels. + * Useful for debugging specific facilities while keeping others quiet. + * + * DPDK log level (1-8): 1=EMERG, 2=ALERT, 3=CRIT, 4=ERR, 5=WARNING, + * 6=NOTICE, 7=INFO, 8=DEBUG + * + * \param eal_level Log level for Environment Abstraction Layer facility (1-8) + * \param default_level Default log level for other facilities (1-8) + * + * \return Pointer to static buffer containing DPDK CLI options string, + * or NULL if log levels are out of range. + * + * Example: + * // DEBUG for EAL, ERROR for rest + * const char *opts = dpdk_cli_build_opts(8, 4); + */ +const char * +dpdk_cli_build_opts(int eal_level, int default_level); + #endif /** __CONTROL_H_ */ diff --git a/utils/utest.yaml b/utils/utest.yaml index 84ad7513d8f..e5077e57202 100644 --- a/utils/utest.yaml +++ b/utils/utest.yaml @@ -1,5 +1,5 @@ # (C) Copyright 2023-2024 Intel Corporation. -# (C) Copyright 2025 Hewlett Packard Enterprise Development LP. +# (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP. # # SPDX-License-Identifier: BSD-2-Clause-Patent - name: common @@ -14,6 +14,7 @@ - cmd: ["src/common/tests/acl_real_tests"] - cmd: ["src/common/tests/prop_tests"] - cmd: ["src/common/tests/fault_domain_tests"] + - cmd: ["src/common/tests/control_tests"] - name: common_md_on_ssd base: "BUILD_DIR" required_src: ["src/common/tests/ad_mem_tests.c"] From 0ca4740fdb2f7251bd7ff25046cf4938c39dfe44 Mon Sep 17 00:00:00 2001 From: Michael MacDonald Date: Tue, 17 Feb 2026 12:44:21 -0500 Subject: [PATCH 209/253] DAOS-16311 control: Sort check reports in the API (#17554) The test helper was modified to sort the reports, but the correct place to add sorting is in the API method so that all callers may benefit from stable report order. Signed-off-by: Michael MacDonald --- src/common/tests_dmg_helpers.c | 20 -------------- src/control/lib/control/check.go | 12 ++++++++- src/control/lib/control/check_test.go | 39 ++++++++++++++++++++++++++- 3 files changed, 49 insertions(+), 22 deletions(-) diff --git a/src/common/tests_dmg_helpers.c b/src/common/tests_dmg_helpers.c index 851119e8f88..c77bc880820 100644 --- a/src/common/tests_dmg_helpers.c +++ b/src/common/tests_dmg_helpers.c @@ -1974,21 +1974,6 @@ dmg_check_stop(const char *dmg_config_file, uint32_t pool_nr, uuid_t uuids[]) return rc; } -static int -check_query_reports_cmp(const void *p1, const void *p2) -{ - const struct daos_check_report_info *dcri1 = p1; - const struct daos_check_report_info *dcri2 = p2; - - if (dcri1->dcri_class > dcri2->dcri_class) - return 1; - - if (dcri1->dcri_class < dcri2->dcri_class) - return -1; - - return 0; -} - static int parse_check_query_pool(struct json_object *obj, uuid_t uuid, struct daos_check_info *dci) { @@ -2147,11 +2132,6 @@ parse_check_query_info(struct json_object *query_output, uint32_t pool_nr, uuid_ return rc; } - /* Sort the inconsistency reports for easy verification. */ - if (dci->dci_report_nr > 1) - qsort(dci->dci_reports, dci->dci_report_nr, sizeof(dci->dci_reports[0]), - check_query_reports_cmp); - return 0; } diff --git a/src/control/lib/control/check.go b/src/control/lib/control/check.go index c7297e05989..dec901099a4 100644 --- a/src/control/lib/control/check.go +++ b/src/control/lib/control/check.go @@ -1,6 +1,6 @@ // // (C) Copyright 2022-2023 Intel Corporation. -// (C) Copyright 2025 Hewlett Packard Enterprise Development LP +// (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -11,6 +11,7 @@ import ( "context" "encoding/json" "fmt" + "sort" "strings" "time" @@ -602,6 +603,15 @@ func SystemCheckQuery(ctx context.Context, rpcClient UnaryInvoker, req *SystemCh proto.Merge(rpt, pbReport) resp.Reports = append(resp.Reports, rpt) } + + // Sort reports by class, then sequence for consistent ordering. + sort.Slice(resp.Reports, func(i, j int) bool { + if resp.Reports[i].Class != resp.Reports[j].Class { + return resp.Reports[i].Class < resp.Reports[j].Class + } + return resp.Reports[i].Seq < resp.Reports[j].Seq + }) + return resp, nil } diff --git a/src/control/lib/control/check_test.go b/src/control/lib/control/check_test.go index a25f02c9217..b4f65b4978a 100644 --- a/src/control/lib/control/check_test.go +++ b/src/control/lib/control/check_test.go @@ -1,6 +1,6 @@ // // (C) Copyright 2023 Intel Corporation. -// (C) Copyright 2025 Hewlett Packard Enterprise Development LP +// (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -11,8 +11,10 @@ import ( "testing" "github.com/google/go-cmp/cmp" + "google.golang.org/protobuf/testing/protocmp" chkpb "github.com/daos-stack/daos/src/control/common/proto/chk" + mgmtpb "github.com/daos-stack/daos/src/control/common/proto/mgmt" "github.com/daos-stack/daos/src/control/common/test" "github.com/daos-stack/daos/src/control/lib/daos" ) @@ -180,3 +182,38 @@ func TestControl_SystemCheckReport_IsStale(t *testing.T) { }) } } + +func TestControl_SystemCheckQuery_ReportsSorted(t *testing.T) { + // Reports are returned in scrambled order to verify that + // SystemCheckQuery sorts them by class, then by sequence. + mockResp := &mgmtpb.CheckQueryResp{ + Reports: []*chkpb.CheckReport{ + {Seq: 3, Class: chkpb.CheckInconsistClass_CIC_POOL_BAD_LABEL}, + {Seq: 1, Class: chkpb.CheckInconsistClass_CIC_POOL_BAD_LABEL}, + {Seq: 5, Class: chkpb.CheckInconsistClass_CIC_POOL_NONEXIST_ON_MS}, + {Seq: 4, Class: chkpb.CheckInconsistClass_CIC_CONT_NONEXIST_ON_PS}, + {Seq: 2, Class: chkpb.CheckInconsistClass_CIC_POOL_MORE_SVC}, + }, + } + + mi := NewMockInvoker(nil, &MockInvokerConfig{ + UnaryResponse: MockMSResponse("", nil, mockResp), + }) + + resp, err := SystemCheckQuery(test.Context(t), mi, &SystemCheckQueryReq{}) + if err != nil { + t.Fatal(err) + } + + expReports := []*SystemCheckReport{ + {chkpb.CheckReport{Seq: 2, Class: chkpb.CheckInconsistClass_CIC_POOL_MORE_SVC}}, + {chkpb.CheckReport{Seq: 5, Class: chkpb.CheckInconsistClass_CIC_POOL_NONEXIST_ON_MS}}, + {chkpb.CheckReport{Seq: 1, Class: chkpb.CheckInconsistClass_CIC_POOL_BAD_LABEL}}, + {chkpb.CheckReport{Seq: 3, Class: chkpb.CheckInconsistClass_CIC_POOL_BAD_LABEL}}, + {chkpb.CheckReport{Seq: 4, Class: chkpb.CheckInconsistClass_CIC_CONT_NONEXIST_ON_PS}}, + } + + if diff := cmp.Diff(expReports, resp.Reports, protocmp.Transform()); diff != "" { + t.Fatalf("reports not sorted (-want +got):\n%s", diff) + } +} From a53f7bfab44fccef7b38f720e232a609206a0d79 Mon Sep 17 00:00:00 2001 From: Tomasz Gromadzki Date: Tue, 17 Feb 2026 18:46:12 +0100 Subject: [PATCH 210/253] SRE-3591 ci: pip install to not showing progress (#17522) The pip install command produces a lot of unnecessary lines in the logs showing the progress of download. This makes analysis of the logs very difficult as you have to scroll many screens to skip the installation command. Signed-off-by: Tomasz Gromadzki --- ci/rpm/test_daos_node.sh | 8 ++++++++ ci/unit/test_main_node.sh | 14 +++++++++----- ci/unit/test_nlt_node.sh | 12 ++++++++---- src/tests/ftest/scripts/main.sh | 8 ++++++++ utils/scripts/helpers/repo-helper-el8.sh | 3 +++ utils/scripts/helpers/repo-helper-el9.sh | 3 +++ utils/scripts/helpers/repo-helper-leap15.sh | 3 +++ utils/scripts/helpers/repo-helper-ubuntu.sh | 3 +++ 8 files changed, 45 insertions(+), 9 deletions(-) diff --git a/ci/rpm/test_daos_node.sh b/ci/rpm/test_daos_node.sh index 9968d1ec49d..36bd44fd4a3 100755 --- a/ci/rpm/test_daos_node.sh +++ b/ci/rpm/test_daos_node.sh @@ -105,6 +105,14 @@ FTEST=/usr/lib/daos/TESTING/ftest python3 -m venv venv # shellcheck disable=SC1091 source venv/bin/activate + +cat < venv/pip.conf +[global] + progress_bar = off + no_color = true + quiet = 1 +EOF + pip install --upgrade pip pip install -r $FTEST/requirements-ftest.txt diff --git a/ci/unit/test_main_node.sh b/ci/unit/test_main_node.sh index 0afbf26fea6..ad03978ee84 100755 --- a/ci/unit/test_main_node.sh +++ b/ci/unit/test_main_node.sh @@ -1,7 +1,7 @@ #!/bin/bash # # Copyright 2020-2023 Intel Corporation. -# Copyright 2025 Hewlett Packard Enterprise Development LP +# Copyright 2025-2026 Hewlett Packard Enterprise Development LP # # SPDX-License-Identifier: BSD-2-Clause-Patent # @@ -84,13 +84,17 @@ rm -rf "$test_log_dir" python3 -m venv venv # shellcheck disable=SC1091 source venv/bin/activate -# touch venv/pip.conf -# pip config set global.progress_bar off -# pip config set global.no_color true + +cat < venv/pip.conf +[global] + progress_bar = off + no_color = true + quiet = 1 +EOF pip install --upgrade pip -pip install --requirement requirements-utest.txt +pip install --requirement requirements-utest.txt pip install /opt/daos/lib/daos/python/ HTTPS_PROXY="${DAOS_HTTPS_PROXY:-}" utils/run_utest.py $RUN_TEST_VALGRIND \ diff --git a/ci/unit/test_nlt_node.sh b/ci/unit/test_nlt_node.sh index fa422586ad9..730dea30423 100755 --- a/ci/unit/test_nlt_node.sh +++ b/ci/unit/test_nlt_node.sh @@ -28,13 +28,17 @@ sudo bash -c ". ./utils/sl/setup_local.sh; ./utils/setup_daos_server_helper.sh" python3.11 -m venv venv # shellcheck disable=SC1091 source venv/bin/activate -touch venv/pip.conf -pip config set global.progress_bar off -pip config set global.no_color true + +cat < venv/pip.conf +[global] + progress_bar = off + no_color = true + quiet = 1 +EOF pip install --upgrade pip -pip install --requirement requirements-utest.txt +pip install --requirement requirements-utest.txt pip install /opt/daos/lib/daos/python/ # set high open file limit in the shell to avoid extra warning diff --git a/src/tests/ftest/scripts/main.sh b/src/tests/ftest/scripts/main.sh index f6b4ebb4500..3f3e2c7cb69 100755 --- a/src/tests/ftest/scripts/main.sh +++ b/src/tests/ftest/scripts/main.sh @@ -27,7 +27,15 @@ python3 -m venv venv # shellcheck disable=SC1091 source venv/bin/activate +cat < venv/pip.conf +[global] + progress_bar = off + no_color = true + quiet = 1 +EOF + pip install --upgrade pip + pip install -r "$PREFIX"/lib/daos/TESTING/ftest/requirements-ftest.txt if $TEST_RPMS; then diff --git a/utils/scripts/helpers/repo-helper-el8.sh b/utils/scripts/helpers/repo-helper-el8.sh index 3572699d634..28b2e7f01a3 100755 --- a/utils/scripts/helpers/repo-helper-el8.sh +++ b/utils/scripts/helpers/repo-helper-el8.sh @@ -134,5 +134,8 @@ if [ -n "$REPO_FILE_URL" ]; then [global] trusted-host = ${trusted_host} index-url = https://${trusted_host}/artifactory/api/pypi/pypi-proxy/simple + progress_bar = off + no_color = true + quiet = 1 EOF fi diff --git a/utils/scripts/helpers/repo-helper-el9.sh b/utils/scripts/helpers/repo-helper-el9.sh index b595d5d5c15..ece81cf786f 100644 --- a/utils/scripts/helpers/repo-helper-el9.sh +++ b/utils/scripts/helpers/repo-helper-el9.sh @@ -130,5 +130,8 @@ if [ -n "$REPO_FILE_URL" ]; then [global] trusted-host = ${trusted_host} index-url = https://${trusted_host}/artifactory/api/pypi/pypi-proxy/simple + progress_bar = off + no_color = true + quiet = 1 EOF fi diff --git a/utils/scripts/helpers/repo-helper-leap15.sh b/utils/scripts/helpers/repo-helper-leap15.sh index 98ced4e82ea..bacae8b2698 100755 --- a/utils/scripts/helpers/repo-helper-leap15.sh +++ b/utils/scripts/helpers/repo-helper-leap15.sh @@ -178,5 +178,8 @@ if [ -n "$REPO_FILE_URL" ]; then [global] trusted-host = ${trusted_host} index-url = https://${trusted_host}/artifactory/api/pypi/pypi-proxy/simple + progress_bar = off + no_color = true + quiet = 1 EOF fi diff --git a/utils/scripts/helpers/repo-helper-ubuntu.sh b/utils/scripts/helpers/repo-helper-ubuntu.sh index c7738a982de..54fd74b669c 100644 --- a/utils/scripts/helpers/repo-helper-ubuntu.sh +++ b/utils/scripts/helpers/repo-helper-ubuntu.sh @@ -115,5 +115,8 @@ if [ -n "$REPO_FILE_URL" ]; then [global] trusted-host = ${trusted_host} index-url = https://${trusted_host}/artifactory/api/pypi/pypi-proxy/simple + progress_bar = off + no_color = true + quiet = 1 EOF fi From 7ed838b59c1367eb63b4eeb776bb2a49a59075bc Mon Sep 17 00:00:00 2001 From: Kris Jacque Date: Tue, 17 Feb 2026 11:19:21 -0700 Subject: [PATCH 211/253] DAOS-18570 control: Allow net-test with non-default system (#17507) - Call GetAttachInfo if system name isn't supplied when calling daos health net-test. - Fix a crash that occurred displaying net-test results if all RPCs failed. Signed-off-by: Kris Jacque --- src/control/cmd/daos/health.go | 27 +++++++--- src/control/cmd/daos/pretty/selftest.go | 7 ++- src/control/cmd/daos/pretty/selftest_test.go | 53 +++++++++++++++++++- 3 files changed, 78 insertions(+), 9 deletions(-) diff --git a/src/control/cmd/daos/health.go b/src/control/cmd/daos/health.go index a91916ffe72..90f7c9adcad 100644 --- a/src/control/cmd/daos/health.go +++ b/src/control/cmd/daos/health.go @@ -1,5 +1,6 @@ // // (C) Copyright 2024 Intel Corporation. +// (C) Copyright 2026 Hewlett Packard Enterprise Development LP // (C) Copyright 2025 Google LLC // // SPDX-License-Identifier: BSD-2-Clause-Patent @@ -11,10 +12,10 @@ import ( "strings" "github.com/google/uuid" + "github.com/pkg/errors" "github.com/daos-stack/daos/src/control/build" "github.com/daos-stack/daos/src/control/cmd/daos/pretty" - "github.com/daos-stack/daos/src/control/common/cmdutil" "github.com/daos-stack/daos/src/control/lib/daos" "github.com/daos-stack/daos/src/control/lib/daos/api" "github.com/daos-stack/daos/src/control/lib/ranklist" @@ -82,7 +83,7 @@ func (cmd *healthCheckCmd) Execute([]string) error { sysInfo, err := cmd.apiProvider.GetSystemInfo(ctx) if err != nil { - cmd.Errorf("failed to query system information: %v", err) + return errors.Wrapf(err, "failed to query system information") } systemHealth.SystemInfo = sysInfo @@ -154,9 +155,7 @@ func (cmd *healthCheckCmd) Execute([]string) error { } type netTestCmd struct { - cmdutil.JSONOutputCmd - cmdutil.LogCmd - sysCmd + daosCmd Ranks ui.RankSetFlag `short:"r" long:"ranks" description:"Use the specified ranks as test endpoints (default: all)"` Tags ui.RankSetFlag `short:"t" long:"tags" description:"Use the specified tags on ranks" default:"0"` XferSize ui.ByteSizeFlag `short:"s" long:"size" description:"Per-RPC transfer size (send/reply)"` @@ -167,8 +166,22 @@ type netTestCmd struct { } func (cmd *netTestCmd) Execute(_ []string) error { + ctx := cmd.MustLogCtx() + + sys := cmd.SysName + if sys == "" { + sysInfo, err := cmd.apiProvider.GetSystemInfo(ctx) + if err != nil { + return errors.Wrapf(err, "failed to query system information") + } + sys = sysInfo.Name + } + // Cart self-test requires the ability to initialize as server, so we have to clean up our + // client initialization. + cmd.apiProvider.Cleanup() + cfg := &daos.SelfTestConfig{ - GroupName: cmd.SysName, + GroupName: sys, EndpointRanks: cmd.Ranks.Ranks(), EndpointTags: ranklist.RanksToUint32(cmd.Tags.Ranks()), MaxInflightRPCs: cmd.MaxInflight, @@ -192,7 +205,7 @@ func (cmd *netTestCmd) Execute(_ []string) error { cmd.Info("Starting non-destructive network test (duration depends on performance)...\n\n") } - res, err := RunSelfTest(cmd.MustLogCtx(), cfg) + res, err := RunSelfTest(ctx, cfg) if err != nil { return err } diff --git a/src/control/cmd/daos/pretty/selftest.go b/src/control/cmd/daos/pretty/selftest.go index bf880dcd0b5..79ec47a8471 100644 --- a/src/control/cmd/daos/pretty/selftest.go +++ b/src/control/cmd/daos/pretty/selftest.go @@ -1,5 +1,6 @@ // // (C) Copyright 2024 Intel Corporation. +// (C) Copyright 2026 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -148,7 +149,11 @@ func PrintSelfTestResult(out io.Writer, result *daos.SelfTestResult, verbose, sh for _, pct := range pcts { pctTitles[pct] = fmt.Sprintf("%d%%", pct) - row[pctTitles[pct]] = printLatencyVal(buckets[pct].UpperBound, dispUnit) + val := "N/A" + if b, found := buckets[pct]; found { + val = printLatencyVal(b.UpperBound, dispUnit) + } + row[pctTitles[pct]] = val } table = append(table, row) diff --git a/src/control/cmd/daos/pretty/selftest_test.go b/src/control/cmd/daos/pretty/selftest_test.go index 3bfaacf0dd0..d8fb8b79c51 100644 --- a/src/control/cmd/daos/pretty/selftest_test.go +++ b/src/control/cmd/daos/pretty/selftest_test.go @@ -1,5 +1,6 @@ // // (C) Copyright 2024 Intel Corporation. +// (C) Copyright 2026 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -96,6 +97,7 @@ Client/Server Network Test Parameters }, "custom - verbose": { cfg: genCfg(func(cfg *daos.SelfTestConfig) { + cfg.GroupName = "daos_server_test" cfg.EndpointRanks = []ranklist.Rank{0, 1, 2} cfg.EndpointTags = []uint32{0, 1, 2} cfg.SendSizes = []uint64{1024, 1024 * 1024} @@ -109,7 +111,7 @@ Client/Server Network Test Parameters Send RPC Sizes : [1.00 KiB 1.00 MiB] Reply RPC Sizes : [2.00 MiB 2.00 GiB] RPCs Per Server : 10000 - System Name : daos_server + System Name : daos_server_test Tags : [0-2] Max In-Flight RPCs: 16 @@ -292,6 +294,55 @@ Per-Target Latency Results 0:0 0.00ms 15.00ms 22.50ms 27.00ms 28.50ms 29.70ms 30.00ms 15.00ms 8.66ms 20.0% 1:0 0.00ms 15.00ms 22.50ms 27.00ms 28.50ms 29.70ms 30.00ms 15.00ms 8.66ms 20.0% 2:0 0.00ms 15.00ms 22.50ms 27.00ms 28.50ms 29.70ms 30.00ms 15.00ms 8.66ms 20.0% +`, + }, + "verbose with all failures": { + result: func() *daos.SelfTestResult { + cfg := &daos.SelfTestConfig{} + cfg.SetDefaults() + r := &daos.SelfTestResult{ + MasterEndpoint: daos.SelfTestEndpoint{Rank: 3, Tag: 0}, + TargetEndpoints: []daos.SelfTestEndpoint{ + {Rank: 0, Tag: 0}, + {Rank: 1, Tag: 0}, + {Rank: 2, Tag: 0}, + }, + Repetitions: cfg.Repetitions * 3, + SendSize: cfg.SendSizes[0], + ReplySize: cfg.ReplySizes[0], + BufferAlignment: cfg.BufferAlignment, + Duration: 8500 * time.Millisecond, + MasterLatency: &daos.EndpointLatency{ + TotalRPCs: uint64(cfg.Repetitions), + }, + } + for i := int64(1); i <= int64(r.Repetitions); i++ { + r.MasterLatency.AddValue(-1) + r.AddTargetLatency(ranklist.Rank(i%3), 0, -1) + } + return r + }(), + verbose: true, + expStr: ` +Client/Server Network Test Summary +---------------------------------- + Server Endpoints: [0-2]:0 + RPC Throughput : 1176.47 RPC/s + RPC Bandwidth : 19.28 Mbps + Average Latency : 0.00ms + Client Endpoint : 3:0 + Duration : 8.5s + Repetitions : 30000 + Send Size : 1.00 KiB + Reply Size : 1.00 KiB + Failed RPCs : 30000 (100.0%) + +Per-Target Latency Results + Target Min 50% 75% 90% 95% 99% Max Average StdDev Failed + ------ --- --- --- --- --- --- --- ------- ------ ------ + 0:0 0.00ms N/A N/A N/A N/A N/A 0.00ms 0.00ms 0.00ms 100.0% + 1:0 0.00ms N/A N/A N/A N/A N/A 0.00ms 0.00ms 0.00ms 100.0% + 2:0 0.00ms N/A N/A N/A N/A N/A 0.00ms 0.00ms 0.00ms 100.0% `, }, } { From b7f602dd8ff27fda56b1c8255e771ae01a364ccc Mon Sep 17 00:00:00 2001 From: Michael MacDonald Date: Tue, 17 Feb 2026 14:24:26 -0500 Subject: [PATCH 212/253] DAOS-623 control: Several unit test fixes (#17366) TestSystemLinux_GetfsType Remove low-value test case that assumes /dev will always have the nosuid mount option. TestStorage_ProviderUpgradeBdevConfig Allow setting a mock topology getter for tests to avoid environmental dependencies. TestSystemLinux_GetDeviceLabel Exclude problematic block devices (e.g. /dev/loopN) that don't have labels set. Signed-off-by: Michael MacDonald --- src/control/provider/system/system_linux_test.go | 13 ++++--------- src/control/server/storage/mocks.go | 3 ++- src/control/server/storage/provider.go | 14 ++++++++++++-- src/control/server/storage/provider_test.go | 2 ++ 4 files changed, 20 insertions(+), 12 deletions(-) diff --git a/src/control/provider/system/system_linux_test.go b/src/control/provider/system/system_linux_test.go index a8ec337b6ac..3cdcb2652cf 100644 --- a/src/control/provider/system/system_linux_test.go +++ b/src/control/provider/system/system_linux_test.go @@ -184,13 +184,6 @@ func TestSystemLinux_GetfsType(t *testing.T) { path: "notreal", expErr: syscall.ENOENT, }, - "temp dir": { - path: "/dev", - expResult: &FsType{ - Name: "tmpfs", - NoSUID: true, - }, - }, } { t.Run(name, func(t *testing.T) { result, err := DefaultProvider().GetfsType(tc.path) @@ -206,8 +199,10 @@ func TestSystemLinux_GetfsType(t *testing.T) { func validDev(t *testing.T) string { t.Helper() - // Only want numbered partitions, not whole disks + // Only want numbered partitions, not whole disks. + // Exclude loop/nbd devices which may not be attached. re := regexp.MustCompile(`^[a-zA-Z]+[0-9]+$`) + exclude := regexp.MustCompile(`^(loop|nbd|zram)`) sysRoot := "/sys/class/block/" entries, err := os.ReadDir(sysRoot) @@ -216,7 +211,7 @@ func validDev(t *testing.T) string { } for _, entry := range entries { - if !re.MatchString(entry.Name()) { + if !re.MatchString(entry.Name()) || exclude.MatchString(entry.Name()) { continue } diff --git a/src/control/server/storage/mocks.go b/src/control/server/storage/mocks.go index 3d1647ad20c..f795c29dc29 100644 --- a/src/control/server/storage/mocks.go +++ b/src/control/server/storage/mocks.go @@ -1,6 +1,6 @@ // // (C) Copyright 2019-2024 Intel Corporation. -// (C) Copyright 2025 Hewlett Packard Enterprise Development LP +// (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP // (C) Copyright 2025 Google LLC // // SPDX-License-Identifier: BSD-2-Clause-Patent @@ -244,6 +244,7 @@ func MockProvider(log logging.Logger, idx int, engineStorage *Config, sys System p.scm = scm p.bdev = bdev p.metadata = meta + p.getTopology = MockGetTopology return p } diff --git a/src/control/server/storage/provider.go b/src/control/server/storage/provider.go index 7696ececacb..459cc31349a 100644 --- a/src/control/server/storage/provider.go +++ b/src/control/server/storage/provider.go @@ -45,6 +45,7 @@ type Provider struct { scm ScmProvider bdev BdevProvider vmdEnabled bool + getTopology topologyGetter } // DefaultProvider returns a provider populated with default parameters. @@ -52,8 +53,10 @@ func DefaultProvider(log logging.Logger, idx int, engineStorage *Config) *Provid if engineStorage == nil { engineStorage = new(Config) } - return NewProvider(log, idx, engineStorage, system.DefaultProvider(), + p := NewProvider(log, idx, engineStorage, system.DefaultProvider(), NewScmForwarder(log), NewBdevForwarder(log), NewMetadataForwarder(log)) + p.getTopology = hwloc.NewProvider(log).GetTopology + return p } // FormatControlMetadata formats the storage used for control metadata. @@ -616,10 +619,11 @@ func (p *Provider) WriteNvmeConfig(ctx context.Context, log logging.Logger, ctrl vmdEnabled := p.vmdEnabled engineIndex := p.engineIndex engineStorage := p.engineStorage + getTopology := p.getTopology p.RUnlock() req, err := BdevWriteConfigRequestFromConfig(ctx, log, engineStorage, - vmdEnabled, hwloc.NewProvider(log).GetTopology) + vmdEnabled, getTopology) if err != nil { return errors.Wrap(err, "creating write config request") } @@ -729,3 +733,9 @@ func NewProvider(log logging.Logger, idx int, engineStorage *Config, sys SystemP metadata: meta, } } + +// setTopologyGetter sets the topology getter function for the provider. This is +// used in tests to inject a mock topology. +func (p *Provider) setTopologyGetter(fn topologyGetter) { + p.getTopology = fn +} diff --git a/src/control/server/storage/provider_test.go b/src/control/server/storage/provider_test.go index 043d5ca6a0d..e60e6a1441f 100644 --- a/src/control/server/storage/provider_test.go +++ b/src/control/server/storage/provider_test.go @@ -1,5 +1,6 @@ // // (C) Copyright 2021-2023 Intel Corporation. +// (C) Copyright 2026 Hewlett Packard Enterprise Development LP // (C) Copyright 2025 Google LLC // // SPDX-License-Identifier: BSD-2-Clause-Patent @@ -342,6 +343,7 @@ func TestStorage_ProviderUpgradeBdevConfig(t *testing.T) { ctx := test.MustLogContext(t, test.Context(t)) p := NewProvider(logging.FromContext(ctx), 0, tc.cfg, nil, nil, tc.bdevProv, nil) + p.setTopologyGetter(MockGetTopology) gotErr := p.UpgradeBdevConfig(ctx, tc.ctrlrs) test.CmpErr(t, tc.expErr, gotErr) if tc.expErr != nil { From 4f6622c3893d32cea5de6c855abfd4897d82252d Mon Sep 17 00:00:00 2001 From: Jerome Soumagne Date: Wed, 18 Feb 2026 10:48:03 -0600 Subject: [PATCH 213/253] DAOS-16935 cart: add D_MEM_DEVICE and cio_mem_device init option (#15937) This enables the detection and use of memory devices for RMA transfers Clean up cart init info print Consolidate parsing of cart init options Signed-off-by: Jerome Soumagne --- src/cart/README.env | 5 + src/cart/crt_hg.c | 5 +- src/cart/crt_init.c | 532 +++++++++++++++------------------- src/cart/crt_internal_types.h | 8 +- src/include/cart/types.h | 5 +- 5 files changed, 258 insertions(+), 297 deletions(-) diff --git a/src/cart/README.env b/src/cart/README.env index 93e1801c65e..befc0fd8f4f 100644 --- a/src/cart/README.env +++ b/src/cart/README.env @@ -226,3 +226,8 @@ This file lists the environment variables used in CaRT. D_PROGRESS_BUSY Force busy polling when progressing, preventing from sleeping when waiting for new messages. + + D_MEM_DEVICE + Enable detection and use of memory devices (GPU, etc) to perform RMA transfers to/from. + Be wary of potential performance impacts if this variable is set and memory devices + are not used. diff --git a/src/cart/crt_hg.c b/src/cart/crt_hg.c index 19d97dd0523..082e1de7bd2 100644 --- a/src/cart/crt_hg.c +++ b/src/cart/crt_hg.c @@ -1,7 +1,7 @@ /* * (C) Copyright 2016-2024 Intel Corporation. * (C) Copyright 2025 Google LLC - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -846,7 +846,7 @@ crt_hg_class_init(crt_provider_t provider, int ctx_idx, bool primary, int iface_ init_info.na_init_info.auth_key = prov_data->cpg_na_config.noc_auth_key; - if (crt_provider_is_block_mode(provider) && !prov_data->cpg_progress_busy) + if (crt_provider_is_block_mode(provider) && !crt_gdata.cg_progress_busy) init_info.na_init_info.progress_mode = 0; else init_info.na_init_info.progress_mode = NA_NO_BLOCK; @@ -872,6 +872,7 @@ crt_hg_class_init(crt_provider_t provider, int ctx_idx, bool primary, int iface_ init_info.traffic_class = (enum na_traffic_class)crt_gdata.cg_swim_tc; if (thread_mode_single) init_info.na_init_info.thread_mode = NA_THREAD_MODE_SINGLE; + init_info.na_init_info.request_mem_device = crt_gdata.cg_mem_device; retry: hg_class = HG_Init_opt2(info_string, crt_is_service(), HG_VERSION(2, 4), &init_info); if (hg_class == NULL) { diff --git a/src/cart/crt_init.c b/src/cart/crt_init.c index e4556b8693d..cf4c17db79c 100644 --- a/src/cart/crt_init.c +++ b/src/cart/crt_init.c @@ -24,6 +24,19 @@ static bool g_prov_settings_applied[CRT_PROV_COUNT]; static const char *const crt_tc_name[] = {CRT_TRAFFIC_CLASSES}; #undef X +#define CRT_ENV_OPT_GET(opt, x, env) \ + do { \ + if (opt != NULL && opt->cio_##x) \ + x = opt->cio_##x; \ + else \ + crt_env_get(env, &x); \ + } while (0) + +static int +crt_init_prov(crt_provider_t provider, bool primary, struct crt_prov_gdata *prov_gdata, + const char *interface, const char *domain, const char *port, const char *auth_key, + bool port_auto_adjust, crt_init_options_t *opt); + static void crt_lib_init(void) __attribute__((__constructor__)); @@ -77,31 +90,26 @@ dump_opt(crt_init_options_t *opt) D_INFO("options:\n"); D_INFO("crt_timeout = %d\n", opt->cio_crt_timeout); D_INFO("max_ctx_num = %d\n", opt->cio_ctx_max_num); - D_INFO("swim_idx = %d\n", opt->cio_swim_crt_idx); - D_INFO("provider = %s\n", opt->cio_provider); - D_INFO("interface = %s\n", opt->cio_interface); - D_INFO("domain = %s\n", opt->cio_domain); - D_INFO("port = %s\n", opt->cio_port); - D_INFO("Flags: fi: %d, use_credits: %d, use_sensors: %d\n", opt->cio_fault_inject, - opt->cio_use_credits, opt->cio_use_sensors); + D_INFO("swim_idx = %d\n", opt->cio_swim_crt_idx); + D_INFO("provider = %s\n", opt->cio_provider); + D_INFO("interface = %s\n", opt->cio_interface); + D_INFO("domain = %s\n", opt->cio_domain); + D_INFO("port = %s\n", opt->cio_port); + D_INFO("auth_key = %s\n", opt->cio_auth_key); + D_INFO("Flags: fault_inject = %d, use_credits = %d, use_sensors = %d, " + "thread_mode_single = %d, progress_busy = %d, mem_device = %d\n", + opt->cio_fault_inject, opt->cio_use_credits, opt->cio_use_sensors, + opt->cio_thread_mode_single, opt->cio_progress_busy, opt->cio_mem_device); if (opt->cio_use_expected_size) D_INFO("max_expected_size = %d\n", opt->cio_max_expected_size); if (opt->cio_use_unexpected_size) D_INFO("max_unexpect_size = %d\n", opt->cio_max_unexpected_size); - - /* Handle similar to D_PROVIDER_AUTH_KEY */ - if (opt->cio_auth_key) - D_INFO("auth_key is set\n"); - if (opt->cio_thread_mode_single) - D_INFO("thread mode single is set\n"); - if (opt->cio_progress_busy) - D_INFO("progress busy mode is set\n"); } static int -crt_na_config_init(bool primary, crt_provider_t provider, char *interface, char *domain, char *port, - char *auth_key, bool port_auto_adjust); +crt_na_config_init(bool primary, crt_provider_t provider, const char *interface, const char *domain, + const char *port, const char *auth_key, bool port_auto_adjust); /* Workaround for CART-890 */ static void @@ -202,14 +210,6 @@ prov_data_init(struct crt_prov_gdata *prov_data, crt_provider_t provider, bool p prov_data->cpg_max_unexp_size = max_unexpect_size; prov_data->cpg_primary = primary; - if (opt && opt->cio_progress_busy) { - prov_data->cpg_progress_busy = opt->cio_progress_busy; - } else { - bool progress_busy = false; - crt_env_get(D_PROGRESS_BUSY, &progress_busy); - prov_data->cpg_progress_busy = progress_busy; - } - for (i = 0; i < CRT_SRV_CONTEXT_NUM; i++) prov_data->cpg_used_idx[i] = false; @@ -427,38 +427,6 @@ crt_plugin_fini(void) D_MUTEX_DESTROY(&crt_plugin_gdata.cpg_mutex); } -static int -__split_arg(char *s_arg_to_split, const char *delim, char **first_arg, char **second_arg) -{ - char *save_ptr = NULL; - char *arg_to_split; - - D_ASSERT(first_arg != NULL); - D_ASSERT(second_arg != NULL); - - /* no-op, not an error case */ - if (s_arg_to_split == NULL) { - *first_arg = NULL; - *second_arg = NULL; - return DER_SUCCESS; - } - - D_STRNDUP(arg_to_split, s_arg_to_split, CRT_ENV_STR_MAX_SIZE); - if (!arg_to_split) { - *first_arg = NULL; - *second_arg = NULL; - return -DER_NOMEM; - } - - *first_arg = 0; - *second_arg = 0; - - *first_arg = strtok_r(arg_to_split, delim, &save_ptr); - *second_arg = save_ptr; - - return DER_SUCCESS; -} - crt_provider_t crt_str_to_provider(const char *str_provider) { @@ -630,33 +598,16 @@ crt_init_opt(crt_group_id_t grpid, uint32_t flags, crt_init_options_t *opt) { bool server = flags & CRT_FLAG_BIT_SERVER; int rc = 0; - crt_provider_t primary_provider; - crt_provider_t secondary_provider; - crt_provider_t tmp_prov; - char *provider = NULL; - char *provider_env = NULL; - char *interface = NULL; - char *interface_env = NULL; - char *domain = NULL; - char *domain_env = NULL; - char *auth_key = NULL; - char *auth_key_env = NULL; - char *path = NULL; - char *provider_str0 = NULL; - char *provider_str1 = NULL; - char *port = NULL; - char *port_env = NULL; - char *port0 = NULL; - char *port1 = NULL; - char *iface0 = NULL; - char *iface1 = NULL; - char *domain0 = NULL; - char *domain1 = NULL; - char *auth_key0 = NULL; - char *auth_key1 = NULL; - int num_secondaries = 0; - bool port_auto_adjust = false; - int i; + crt_provider_t prov; + char *provider = NULL, *interface = NULL, *domain = NULL, *port = NULL, *auth_key = NULL; + char *path = NULL; + char *provider_str = NULL, *interface_str = NULL, *domain_str = NULL, *port_str = NULL, + *auth_key_str = NULL; + char *save_provider_str = NULL, *save_interface_str = NULL, *save_domain_str = NULL, + *save_port_str = NULL, *save_auth_key_str = NULL; + bool port_auto_adjust = false, thread_mode_single = false, progress_busy = false, + mem_device = false; + int i; d_signal_register(); @@ -672,7 +623,7 @@ crt_init_opt(crt_group_id_t grpid, uint32_t flags, crt_init_options_t *opt) D_INFO("libcart (%s) v%s initializing\n", server ? "server" : "client", CART_VERSION); crt_env_init(); - if (opt) + if (opt != NULL) dump_opt(opt); /* d_fault_inject_init() is reference counted */ @@ -697,246 +648,222 @@ crt_init_opt(crt_group_id_t grpid, uint32_t flags, crt_init_options_t *opt) D_ASSERT(gdata_init_flag == 1); D_RWLOCK_WRLOCK(&crt_gdata.cg_rwlock); - if (crt_gdata.cg_inited == 0) { - crt_gdata.cg_server = server; - crt_gdata.cg_auto_swim_disable = (flags & CRT_FLAG_BIT_AUTO_SWIM_DISABLE) ? 1 : 0; - - crt_env_get(CRT_ATTACH_INFO_PATH, &path); - if (path != NULL && strlen(path) > 0) { - rc = crt_group_config_path_set(path); - if (rc != 0) - D_ERROR("Got %s from ENV CRT_ATTACH_INFO_PATH, " - "but crt_group_config_path_set failed " - "rc: %d, ignore the ENV.\n", - path, rc); - else - D_DEBUG(DB_ALL, "set group_config_path as %s.\n", path); + if (crt_gdata.cg_inited) { + if (!crt_gdata.cg_server && server) { + D_ERROR("CRT initialized as client, cannot set as server again.\n"); + D_GOTO(unlock, rc = -DER_INVAL); } + crt_gdata.cg_refcount++; + D_GOTO(unlock, rc); + } - if (opt && opt->cio_thread_mode_single) { - crt_gdata.cg_thread_mode_single = opt->cio_thread_mode_single; - } else { - bool thread_mode_single = false; - crt_env_get(D_THREAD_MODE_SINGLE, &thread_mode_single); - crt_gdata.cg_thread_mode_single = thread_mode_single; - } + crt_gdata.cg_server = server; + crt_gdata.cg_auto_swim_disable = (flags & CRT_FLAG_BIT_AUTO_SWIM_DISABLE) ? 1 : 0; - if (opt && opt->cio_auth_key) - auth_key = opt->cio_auth_key; - else { - crt_env_get(D_PROVIDER_AUTH_KEY, &auth_key_env); - auth_key = auth_key_env; - } + crt_env_get(CRT_ATTACH_INFO_PATH, &path); + if (path != NULL && strlen(path) > 0) { + rc = crt_group_config_path_set(path); + if (rc != 0) + D_ERROR("Got %s from ENV CRT_ATTACH_INFO_PATH, " + "but crt_group_config_path_set failed " + "rc: %d, ignore the ENV.\n", + path, rc); + else + D_DEBUG(DB_ALL, "set group_config_path as %s.\n", path); + } + + CRT_ENV_OPT_GET(opt, provider, D_PROVIDER); + CRT_ENV_OPT_GET(opt, interface, D_INTERFACE); + CRT_ENV_OPT_GET(opt, domain, D_DOMAIN); + CRT_ENV_OPT_GET(opt, port, D_PORT); + CRT_ENV_OPT_GET(opt, auth_key, D_PROVIDER_AUTH_KEY); - if (opt && opt->cio_provider) - provider = opt->cio_provider; - else { - crt_env_get(D_PROVIDER, &provider_env); - provider = provider_env; + crt_env_get(D_PORT_AUTO_ADJUST, &port_auto_adjust); + + /* TODO kept as unique globals but may want to distinguish for multi-provider case */ + CRT_ENV_OPT_GET(opt, thread_mode_single, D_THREAD_MODE_SINGLE); + crt_gdata.cg_thread_mode_single = thread_mode_single; + + CRT_ENV_OPT_GET(opt, progress_busy, D_PROGRESS_BUSY); + crt_gdata.cg_progress_busy = progress_busy; + + CRT_ENV_OPT_GET(opt, mem_device, D_MEM_DEVICE); + crt_gdata.cg_mem_device = mem_device; + + if (provider == NULL) { + D_ERROR("No provider specified\n"); + D_GOTO(unlock, rc = -DER_INVAL); + } + /* + * A coma-separated list of arguments for interfaces, domains, ports, keys is + * interpreted differently, depending whether it is on a client or on a server side. + * + * On a client, a coma-separated list means multi-interface selection, while on a + * server it means a multi-provider selection. + */ + if (!crt_is_service()) { + if (strchr(provider, ',') != NULL) { + D_ERROR("Multiple providers specified in provider string, but secondary " + "provider only supported on server side\n"); + D_GOTO(unlock, rc = -DER_INVAL); + } + } else if (strchr(provider, ',') != NULL) { + D_STRNDUP(provider_str, provider, CRT_ENV_STR_MAX_SIZE); + if (provider_str == NULL) + D_GOTO(unlock, rc = -DER_NOMEM); + provider = strtok_r(provider_str, ",", &save_provider_str); + + if (interface != NULL) { + D_STRNDUP(interface_str, interface, CRT_ENV_STR_MAX_SIZE); + if (interface_str == NULL) + D_GOTO(unlock, rc = -DER_NOMEM); + interface = strtok_r(interface_str, ",", &save_interface_str); } - if (opt && opt->cio_interface) - interface = opt->cio_interface; - else { - crt_env_get(D_INTERFACE, &interface_env); - interface = interface_env; + if (domain != NULL) { + D_STRNDUP(domain_str, domain, CRT_ENV_STR_MAX_SIZE); + if (domain_str == NULL) + D_GOTO(unlock, rc = -DER_NOMEM); + domain = strtok_r(domain_str, ",", &save_domain_str); } - if (opt && opt->cio_domain) - domain = opt->cio_domain; - else { - crt_env_get(D_DOMAIN, &domain_env); - domain = domain_env; + if (port != NULL) { + D_STRNDUP(port_str, port, CRT_ENV_STR_MAX_SIZE); + if (port_str == NULL) + D_GOTO(unlock, rc = -DER_NOMEM); + port = strtok_r(port_str, ",", &save_port_str); } - if (opt && opt->cio_port) - port = opt->cio_port; - else { - crt_env_get(D_PORT, &port_env); - port = port_env; + if (auth_key != NULL) { + D_STRNDUP(auth_key_str, auth_key, CRT_ENV_STR_MAX_SIZE); + if (auth_key_str == NULL) + D_GOTO(unlock, rc = -DER_NOMEM); + auth_key = strtok_r(auth_key_str, ",", &save_auth_key_str); } + } - crt_env_get(D_PORT_AUTO_ADJUST, &port_auto_adjust); - rc = __split_arg(provider, ",", &provider_str0, &provider_str1); - if (rc != 0) - D_GOTO(unlock, rc); + prov = crt_str_to_provider(provider); + if (prov == CRT_PROV_UNKNOWN) { + D_ERROR("Requested provider %s not found\n", provider); + D_GOTO(unlock, rc = -DER_NONEXIST); + } + + /* CXI doesn't use interface value, instead uses domain */ + if (interface == NULL && prov != CRT_PROV_OFI_CXI) + D_WARN("No interface specified\n"); + + crt_gdata.cg_primary_prov = prov; + /* + * Note: If on the client the 'interface' contains a + * coma-separated list then it will be later parsed out + * and processed in crt_na_config_init(). + */ + rc = crt_init_prov(prov, true, &crt_gdata.cg_prov_gdata_primary, interface, domain, port, + auth_key, port_auto_adjust, opt); + if (rc != 0) + D_GOTO(unlock, rc); - primary_provider = crt_str_to_provider(provider_str0); - secondary_provider = crt_str_to_provider(provider_str1); + if (provider_str != NULL) { /* multi-provider case */ + int num_secondaries = 1; + const char *provider_ptr = save_provider_str; - if (primary_provider == CRT_PROV_UNKNOWN) { - D_ERROR("Requested provider %s not found\n", provider); - D_GOTO(unlock, rc = -DER_NONEXIST); + while (provider_ptr = strchr(provider_ptr, ','), provider_ptr != NULL) { + num_secondaries++; + provider_ptr++; } + crt_gdata.cg_num_secondary_provs = num_secondaries; - /* - * A coma-separated list of arguments for interfaces, domains, ports, keys is - * interpreted differently, depending whether it is on a client or on a server side. - * - * On a client, a coma-separated list means multi-interface selection, while on a - * server it means a multi-provider selection. - */ - if (crt_is_service()) { - rc = __split_arg(interface, ",", &iface0, &iface1); - if (rc != 0) - D_GOTO(unlock, rc); - rc = __split_arg(domain, ",", &domain0, &domain1); - if (rc != 0) - D_GOTO(unlock, rc); - rc = __split_arg(port, ",", &port0, &port1); - if (rc != 0) - D_GOTO(unlock, rc); - rc = __split_arg(auth_key, ",", &auth_key0, &auth_key1); - if (rc != 0) - D_GOTO(unlock, rc); - } else { - /* - * Note: If on the client the 'interface' contains a - * coma-separated list then it will be later parsed out - * and processed in crt_na_config_init(). - */ - if (interface) { - D_STRNDUP(iface0, interface, CRT_ENV_STR_MAX_SIZE); - if (!iface0) - D_GOTO(unlock, rc = -DER_NOMEM); - } + D_ALLOC_ARRAY(crt_gdata.cg_secondary_provs, num_secondaries); + if (crt_gdata.cg_secondary_provs == NULL) + D_GOTO(cleanup, rc = -DER_NOMEM); - if (domain) { - D_STRNDUP(domain0, domain, CRT_ENV_STR_MAX_SIZE); - if (!domain0) - D_GOTO(unlock, rc = -DER_NOMEM); - } + D_ALLOC_ARRAY(crt_gdata.cg_prov_gdata_secondary, num_secondaries); + if (crt_gdata.cg_prov_gdata_secondary == NULL) + D_GOTO(cleanup, rc = -DER_NOMEM); - if (port) { - D_STRNDUP(port0, port, CRT_ENV_STR_MAX_SIZE); - if (!port0) - D_GOTO(unlock, rc = -DER_NOMEM); + for (i = 0; i < num_secondaries; i++) { + provider = strtok_r(NULL, ",", &save_provider_str); + if (provider == NULL) { + D_ERROR("Failed to parse secondary provider\n"); + D_GOTO(cleanup, rc = -DER_INVAL); } - if (auth_key) { - D_STRNDUP(auth_key0, auth_key, CRT_ENV_STR_MAX_SIZE); - if (!auth_key0) - D_GOTO(unlock, rc = -DER_NOMEM); + prov = crt_str_to_provider(provider); + if (prov == CRT_PROV_UNKNOWN) { + D_ERROR("Requested secondary provider %s not found\n", provider); + D_GOTO(cleanup, rc = -DER_NONEXIST); } - } + crt_gdata.cg_secondary_provs[i] = prov; - /* Secondary provider is specified */ - if (secondary_provider != CRT_PROV_UNKNOWN) { - /* Multi provider mode only supported on the server side */ - if (!crt_is_service()) { - D_ERROR("Secondary provider only supported on the server side\n"); - D_GOTO(unlock, rc = -DER_INVAL); - } + if (interface != NULL) + interface = strtok_r(NULL, ",", &save_interface_str); + if (domain != NULL) + domain = strtok_r(NULL, ",", &save_domain_str); + if (port != NULL) + port = strtok_r(NULL, ",", &save_port_str); + if (auth_key != NULL) + auth_key = strtok_r(NULL, ",", &save_auth_key_str); /* Secondary provider needs its own interface or domain */ - if (iface1 == NULL && domain1 == NULL) { + if (interface == NULL && domain == NULL) { D_ERROR( "Either a secondary domain or interface must be specified\n"); D_GOTO(unlock, rc = -DER_INVAL); } - /* Note: secondary ports and auth keys are optional */ - } - - /* CXI doesn't use interface value, instead uses domain */ - if (iface0 == NULL && primary_provider != CRT_PROV_OFI_CXI) - D_WARN("No interface specified\n"); - - rc = prov_data_init(&crt_gdata.cg_prov_gdata_primary, primary_provider, true, opt); - if (rc != 0) - D_GOTO(unlock, rc); - - prov_settings_apply(true, primary_provider, opt); - crt_gdata.cg_primary_prov = primary_provider; - - rc = crt_na_config_init(true, primary_provider, iface0, domain0, port0, auth_key0, - port_auto_adjust); - if (rc != 0) { - D_ERROR("crt_na_config_init() failed, " DF_RC "\n", DP_RC(rc)); - D_GOTO(unlock, rc); - } - - if (secondary_provider != CRT_PROV_UNKNOWN) { - num_secondaries = 1; - crt_gdata.cg_num_secondary_provs = num_secondaries; - - if (port1 == NULL || port1[0] == '\0') { - port1 = port0; - } - - D_ALLOC_ARRAY(crt_gdata.cg_secondary_provs, num_secondaries); - if (crt_gdata.cg_secondary_provs == NULL) - D_GOTO(cleanup, rc = -DER_NOMEM); - - D_ALLOC_ARRAY(crt_gdata.cg_prov_gdata_secondary, num_secondaries); - if (crt_gdata.cg_prov_gdata_secondary == NULL) - D_GOTO(cleanup, rc = -DER_NOMEM); - - crt_gdata.cg_secondary_provs[0] = secondary_provider; - } - - for (i = 0; i < num_secondaries; i++) { - tmp_prov = crt_gdata.cg_secondary_provs[i]; - - rc = prov_data_init(&crt_gdata.cg_prov_gdata_secondary[i], tmp_prov, false, - opt); - if (rc != 0) - D_GOTO(cleanup, rc); - - prov_settings_apply(false, tmp_prov, opt); + if (port == NULL || port[0] == '\0') + D_WARN("No port specified for secondary provider\n"); - rc = crt_na_config_init(false, tmp_prov, iface1, domain1, port1, auth_key1, - port_auto_adjust); + rc = crt_init_prov(crt_gdata.cg_secondary_provs[i], false, + &crt_gdata.cg_prov_gdata_secondary[i], interface, domain, + port, auth_key, port_auto_adjust, opt); if (rc != 0) { - D_ERROR("crt_na_config_init() failed, " DF_RC "\n", DP_RC(rc)); + D_ERROR("crt_init_prov() failed for secondary provider, " DF_RC + "\n", + DP_RC(rc)); D_GOTO(cleanup, rc); } } + } - rc = crt_hg_init(); - if (rc != 0) { - D_ERROR("crt_hg_init() failed, " DF_RC "\n", DP_RC(rc)); - D_GOTO(cleanup, rc); - } + rc = crt_hg_init(); + if (rc != 0) { + D_ERROR("crt_hg_init() failed, " DF_RC "\n", DP_RC(rc)); + D_GOTO(cleanup, rc); + } - rc = crt_grp_init(grpid); + rc = crt_grp_init(grpid); + if (rc != 0) { + D_ERROR("crt_grp_init() failed, " DF_RC "\n", DP_RC(rc)); + D_GOTO(cleanup, rc); + } + + if (crt_plugin_gdata.cpg_inited == 0) { + rc = crt_plugin_init(); if (rc != 0) { - D_ERROR("crt_grp_init() failed, " DF_RC "\n", DP_RC(rc)); + D_ERROR("crt_plugin_init() failed, " DF_RC "\n", DP_RC(rc)); D_GOTO(cleanup, rc); } + } - if (crt_plugin_gdata.cpg_inited == 0) { - rc = crt_plugin_init(); - if (rc != 0) { - D_ERROR("crt_plugin_init() failed, " DF_RC "\n", DP_RC(rc)); - D_GOTO(cleanup, rc); - } - } - - crt_self_test_init(); + crt_self_test_init(); - crt_iv_init(opt); - rc = crt_opc_map_create(); - if (rc != 0) { - D_ERROR("crt_opc_map_create() failed, " DF_RC "\n", DP_RC(rc)); - D_GOTO(self_test, rc); - } + crt_iv_init(opt); + rc = crt_opc_map_create(); + if (rc != 0) { + D_ERROR("crt_opc_map_create() failed, " DF_RC "\n", DP_RC(rc)); + D_GOTO(self_test, rc); + } - rc = crt_internal_rpc_register(server); - if (rc != 0) { - D_ERROR("crt_internal_rpc_register() failed, " DF_RC "\n", DP_RC(rc)); - D_GOTO(self_test, rc); - } + rc = crt_internal_rpc_register(server); + if (rc != 0) { + D_ERROR("crt_internal_rpc_register() failed, " DF_RC "\n", DP_RC(rc)); + D_GOTO(self_test, rc); + } - D_ASSERT(crt_gdata.cg_opc_map != NULL); + D_ASSERT(crt_gdata.cg_opc_map != NULL); - crt_gdata.cg_inited = 1; - } else { - if (crt_gdata.cg_server == false && server == true) { - D_ERROR("CRT initialized as client, cannot set as server again.\n"); - D_GOTO(unlock, rc = -DER_INVAL); - } - } + crt_gdata.cg_inited = 1; crt_gdata.cg_refcount++; @@ -963,15 +890,11 @@ crt_init_opt(crt_group_id_t grpid, uint32_t flags, crt_init_options_t *opt) D_RWLOCK_UNLOCK(&crt_gdata.cg_rwlock); out: - /* - * We don't need to free port1, iface1 and domain1 as - * they occupy the same original string as port0, iface0 and domain0 - */ - D_FREE(port0); - D_FREE(iface0); - D_FREE(domain0); - D_FREE(provider_str0); - D_FREE(auth_key0); + D_FREE(provider_str); + D_FREE(interface_str); + D_FREE(domain_str); + D_FREE(port_str); + D_FREE(auth_key_str); if (rc != 0) { D_ERROR("failed, " DF_RC "\n", DP_RC(rc)); @@ -981,6 +904,29 @@ crt_init_opt(crt_group_id_t grpid, uint32_t flags, crt_init_options_t *opt) return rc; } +static int +crt_init_prov(crt_provider_t provider, bool primary, struct crt_prov_gdata *prov_gdata, + const char *interface, const char *domain, const char *port, const char *auth_key, + bool port_auto_adjust, crt_init_options_t *opt) +{ + int rc; + + rc = prov_data_init(prov_gdata, provider, primary, opt); + if (rc != 0) + return rc; + + prov_settings_apply(primary, provider, opt); + + rc = crt_na_config_init(primary, provider, interface, domain, port, auth_key, + port_auto_adjust); + if (rc != 0) { + D_ERROR("crt_na_config_init() failed, " DF_RC "\n", DP_RC(rc)); + return rc; + } + + return 0; +} + bool crt_initialized() { @@ -1079,7 +1025,7 @@ crt_finalize(void) } static inline bool -is_integer_str(char *str) +is_integer_str(const char *str) { const char *p; @@ -1173,8 +1119,8 @@ crt_port_range_verify(int port) } static int -crt_na_config_init(bool primary, crt_provider_t provider, char *interface, char *domain, - char *port_str, char *auth_key, bool port_auto_adjust) +crt_na_config_init(bool primary, crt_provider_t provider, const char *interface, const char *domain, + const char *port_str, const char *auth_key, bool port_auto_adjust) { struct crt_na_config *na_cfg; int rc = 0; diff --git a/src/cart/crt_internal_types.h b/src/cart/crt_internal_types.h index 9690213737b..0d532244c55 100644 --- a/src/cart/crt_internal_types.h +++ b/src/cart/crt_internal_types.h @@ -86,7 +86,6 @@ struct crt_prov_gdata { bool cpg_primary; bool cpg_contig_ports; bool cpg_inited; - bool cpg_progress_busy; /** Mutext to protect fields above */ pthread_mutex_t cpg_mutex; @@ -153,6 +152,12 @@ struct crt_gdata { /** use single thread to access context */ bool cg_thread_mode_single; + /** use busy polling for progress */ + bool cg_progress_busy; + + /** use memory device */ + bool cg_mem_device; + ATOMIC uint64_t cg_rpcid; /* rpc id */ /* protects crt_gdata (see the lock order comment on crp_mutex) */ @@ -232,6 +237,7 @@ struct crt_event_cb_priv { ENV(D_PORT_AUTO_ADJUST) \ ENV(D_THREAD_MODE_SINGLE) \ ENV(D_PROGRESS_BUSY) \ + ENV(D_MEM_DEVICE) \ ENV(D_POST_INCR) \ ENV(D_POST_INIT) \ ENV(D_MRECV_BUF) \ diff --git a/src/include/cart/types.h b/src/include/cart/types.h index 81cfb2ef0b9..c3119883a96 100644 --- a/src/include/cart/types.h +++ b/src/include/cart/types.h @@ -1,6 +1,6 @@ /* * (C) Copyright 2016-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -99,6 +99,9 @@ typedef struct crt_init_options { /** force busy wait (testing only, not in production) */ bool cio_progress_busy; + + /** use memory device */ + bool cio_mem_device; } crt_init_options_t; typedef int crt_status_t; From 154dbdc958bc9bea996046eb58d58cab508c48a9 Mon Sep 17 00:00:00 2001 From: Alexander Oganezov Date: Wed, 18 Feb 2026 13:54:29 -0800 Subject: [PATCH 214/253] DAOS-18589 cart: test cleanup (#17541) - few issues fixed in multisend: - bulk wasnt freed properly in some test modes - not specifying sync or async modes resulted before in a mixture - dua_iface_server no longer overwrites D_LOG_MASK if it was already set - self_test no longer sets self rank when running as a controller app, avoiding error printout when running with --master-endpoint argument Signed-off-by: Alexander A Oganezov --- src/tests/ftest/cart/dual_iface_server.c | 4 ++-- src/tests/ftest/cart/test_multisend_client.c | 15 +++++++++------ src/tests/ftest/cart/test_multisend_common.h | 7 +++++++ src/utils/self_test/self_test_lib.c | 13 ++++++++----- 4 files changed, 26 insertions(+), 13 deletions(-) diff --git a/src/tests/ftest/cart/dual_iface_server.c b/src/tests/ftest/cart/dual_iface_server.c index 21993da7c4b..44f41a95c24 100644 --- a/src/tests/ftest/cart/dual_iface_server.c +++ b/src/tests/ftest/cart/dual_iface_server.c @@ -1,5 +1,6 @@ /* * (C) Copyright 2018-2022 Intel Corporation. + * (C) Copyright 2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -242,8 +243,7 @@ server_main(d_rank_t my_rank, const char *str_port, const char *str_interface, struct stat st; crt_init_options_t init_opts = {0}; - d_setenv("FI_UNIVERSE_SIZE", "1024", 1); - d_setenv("D_LOG_MASK", "ERR", 1); + d_setenv("D_LOG_MASK", "ERR", 0); d_setenv("D_PORT_AUTO_ADJUST", "1", 1); /* rank, num_attach_retries, is_server, assert_on_error */ diff --git a/src/tests/ftest/cart/test_multisend_client.c b/src/tests/ftest/cart/test_multisend_client.c index 42b5364fab8..eda15df973a 100644 --- a/src/tests/ftest/cart/test_multisend_client.c +++ b/src/tests/ftest/cart/test_multisend_client.c @@ -1,6 +1,6 @@ /* * (C) Copyright 2018-2022 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -14,15 +14,15 @@ static void rpc_cb_common(const struct crt_cb_info *info) { - crt_bulk_t *p_blk; + crt_bulk_t blk; int rc; - p_blk = (crt_bulk_t *)info->cci_arg; + blk = (crt_bulk_t)info->cci_arg; D_ASSERTF(info->cci_rc == 0, "rpc response failed. rc: %d\n", info->cci_rc); - if (p_blk && *p_blk) { - rc = crt_bulk_free(*p_blk); + if (blk != CRT_BULK_NULL) { + rc = crt_bulk_free(blk); if (rc) D_ERROR("bulk free failed with %d\n", rc); } @@ -151,6 +151,7 @@ test_run() /* TODO: for now rdma is disabled when forcing all rpcs to the same rank */ if (test.tg_force_rank == -1) { rc = d_sgl_init(&sgl, 1); + D_ASSERTF(rc == 0, "d_sgl_init() failed; rc: %d\n", rc); sgl.sg_iovs[0].iov_buf = dma_buff + (chunk_size * chunk_index); @@ -165,14 +166,16 @@ test_run() input->chunk_size = chunk_size; input->chunk_index = chunk_index; input->do_put = test.tg_do_put; + } else { + D_WARN("Disabling rdma transfer for forced rank for now\n"); input->chunk_size = 0; input->bulk_hdl = CRT_BULK_NULL; input->chunk_index = 0; input->do_put = false; } - rc = crt_req_send(rpc_req, rpc_cb_common, &bulk_hdl[chunk_index]); + rc = crt_req_send(rpc_req, rpc_cb_common, input->bulk_hdl); D_ASSERTF(rc == 0, "crt_req_send() failed. rc: %d\n", rc); if (test.tg_test_mode == TEST_MODE_SYNC) diff --git a/src/tests/ftest/cart/test_multisend_common.h b/src/tests/ftest/cart/test_multisend_common.h index 8caf06445ce..58ac1c9545b 100644 --- a/src/tests/ftest/cart/test_multisend_common.h +++ b/src/tests/ftest/cart/test_multisend_common.h @@ -1,5 +1,6 @@ /* * (C) Copyright 2018-2022 Intel Corporation. + * (C) Copyright 2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -186,6 +187,12 @@ test_parse_args(int argc, char **argv) break; case 'm': test.tg_test_mode = atoi(optarg); + if ((test.tg_test_mode != TEST_MODE_ASYNC) && + (test.tg_test_mode != TEST_MODE_SYNC)) { + printf("Unknown test_mode=%d specified, defaulting to sync", + test.tg_test_mode); + test.tg_test_mode = TEST_MODE_SYNC; + } break; case 'n': test.tg_num_iterations = atoi(optarg); diff --git a/src/utils/self_test/self_test_lib.c b/src/utils/self_test/self_test_lib.c index 85eb435c6f3..0c39f7af5e1 100644 --- a/src/utils/self_test/self_test_lib.c +++ b/src/utils/self_test/self_test_lib.c @@ -1,6 +1,6 @@ /* * (C) Copyright 2016-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -176,10 +176,13 @@ self_test_init(char *dest_name, crt_context_t *crt_ctx, crt_group_t **srv_grp, p d_rank_list_free(rank_list); - ret = crt_rank_self_set(max_rank + 1, 1 /* group_version_min */); - if (ret != 0) { - D_ERROR("crt_rank_self_set failed; ret = %d\n", ret); - return ret; + /* when running as a server set the rank to next highest one unused */ + if (listen) { + ret = crt_rank_self_set(max_rank + 1, 1 /* group_version_min */); + if (ret != 0) { + D_ERROR("crt_rank_self_set failed; ret = %d\n", ret); + return ret; + } } return 0; From 91cd4c4fbd08b21ce86fe5d47f7fc1dba3783ddf Mon Sep 17 00:00:00 2001 From: Michael MacDonald Date: Wed, 18 Feb 2026 20:37:58 -0500 Subject: [PATCH 215/253] DAOS-3985 control: Add ControlInterface to server config (#17367) By default, the control plane server binds to 0.0.0.0, which means that it is listening to all addresses on all interfaces. In some cases, the admin may prefer to specify a single interface to be used for control plane traffic. When control_iface is set in daos_server.yml, the server will use the lowest IPv4 address on that interface as both the listen address and the address recorded in the management database. If the prometheus listener is configured, it will also use the same address found for the control interface. Signed-off-by: Michael MacDonald --- src/control/fault/code/codes.go | 4 +- src/control/lib/telemetry/promexp/httpd.go | 14 +- src/control/server/config/faults.go | 21 ++- src/control/server/config/server.go | 9 +- src/control/server/config/server_test.go | 1 + src/control/server/server.go | 24 ++- src/control/server/server_utils.go | 69 +++++++- src/control/server/server_utils_test.go | 158 +++++++++++++++++- src/control/server/telemetry.go | 9 +- src/tests/ftest/server/daos_server_config.py | 12 +- .../ftest/server/daos_server_config.yaml | 20 +++ src/tests/ftest/util/environment_utils.py | 66 +++++++- src/tests/ftest/util/server_utils_params.py | 3 +- utils/config/daos_server.yml | 14 ++ 14 files changed, 399 insertions(+), 25 deletions(-) diff --git a/src/control/fault/code/codes.go b/src/control/fault/code/codes.go index 56282d003d2..1f11b8637a5 100644 --- a/src/control/fault/code/codes.go +++ b/src/control/fault/code/codes.go @@ -1,6 +1,6 @@ // // (C) Copyright 2018-2024 Intel Corporation. -// (C) Copyright 2025 Hewlett Packard Enterprise Development LP +// (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP // (C) Copyright 2025 Google LLC // // SPDX-License-Identifier: BSD-2-Clause-Patent @@ -205,6 +205,8 @@ const ( ServerConfigBdevExcludeClash ServerConfigHugepagesDisabledWithNrSet ServerConfigScmHugeEnabled + ServerConfigBadControlInterface + ServerConfigControlInterfaceMismatch ) // SPDK library bindings codes diff --git a/src/control/lib/telemetry/promexp/httpd.go b/src/control/lib/telemetry/promexp/httpd.go index 2f4c86d485d..238e4d69fe9 100644 --- a/src/control/lib/telemetry/promexp/httpd.go +++ b/src/control/lib/telemetry/promexp/httpd.go @@ -1,5 +1,6 @@ // // (C) Copyright 2021-2024 Intel Corporation. +// (C) Copyright 2026 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -29,9 +30,10 @@ type ( // ExporterConfig defines the configuration for the Prometheus exporter. ExporterConfig struct { - Port int - Title string - Register RegMonFn + Port int + BindAddress string // optional: IP address to bind to (default: 0.0.0.0) + Title string + Register RegMonFn } ) @@ -60,7 +62,11 @@ func StartExporter(ctx context.Context, log logging.Logger, cfg *ExporterConfig) return nil, errors.Wrap(err, "failed to register client monitor") } - listenAddress := fmt.Sprintf("0.0.0.0:%d", cfg.Port) + bindAddr := cfg.BindAddress + if bindAddr == "" { + bindAddr = "0.0.0.0" + } + listenAddress := fmt.Sprintf("%s:%d", bindAddr, cfg.Port) srv := http.Server{Addr: listenAddress} http.Handle("/metrics", promhttp.HandlerFor( diff --git a/src/control/server/config/faults.go b/src/control/server/config/faults.go index 5bc36a3c85d..f295b413254 100644 --- a/src/control/server/config/faults.go +++ b/src/control/server/config/faults.go @@ -1,6 +1,6 @@ // // (C) Copyright 2020-2024 Intel Corporation. -// (C) Copyright 2025 Hewlett Packard Enterprise Development LP +// (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -283,6 +283,25 @@ func FaultConfigEngineNUMAImbalance(nodeMap map[int]int) *fault.Fault { ) } +// FaultConfigBadControlInterface creates a fault for an invalid control plane network interface. +func FaultConfigBadControlInterface(iface string, err error) *fault.Fault { + return serverConfigFault( + code.ServerConfigBadControlInterface, + fmt.Sprintf("control_iface %q is invalid: %s", iface, err), + "update the 'control_iface' parameter with a valid network interface and restart", + ) +} + +// FaultConfigControlInterfaceMismatch creates a fault when the control interface address +// doesn't match the configured MS replica address. +func FaultConfigControlInterfaceMismatch(ifaceAddr, replicaAddr string) *fault.Fault { + return serverConfigFault( + code.ServerConfigControlInterfaceMismatch, + fmt.Sprintf("control_iface address %s doesn't match configured MS replica address %s", ifaceAddr, replicaAddr), + "ensure 'control_iface' specifies an interface with an address matching this server's entry in 'mgmt_svc_replicas'", + ) +} + func serverConfigFault(code code.Code, desc, res string) *fault.Fault { return &fault.Fault{ Domain: "serverconfig", diff --git a/src/control/server/config/server.go b/src/control/server/config/server.go index 3ea27442a6f..7f889b0b39a 100644 --- a/src/control/server/config/server.go +++ b/src/control/server/config/server.go @@ -1,6 +1,6 @@ // // (C) Copyright 2020-2024 Intel Corporation. -// (C) Copyright 2025 Hewlett Packard Enterprise Development LP +// (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -58,6 +58,7 @@ type deprecatedParams struct { type Server struct { // control-specific ControlPort int `yaml:"port"` + ControlInterface string `yaml:"control_iface,omitempty"` TransportConfig *security.TransportConfig `yaml:"transport_config"` Engines []*engine.Config `yaml:"engines"` BdevExclude []string `yaml:"bdev_exclude,omitempty"` @@ -231,6 +232,12 @@ func (cfg *Server) WithControlPort(port int) *Server { return cfg } +// WithControlInterface sets the network interface for the control plane listener. +func (cfg *Server) WithControlInterface(iface string) *Server { + cfg.ControlInterface = iface + return cfg +} + // WithTransportConfig sets the gRPC transport configuration. func (cfg *Server) WithTransportConfig(cfgTransport *security.TransportConfig) *Server { cfg.TransportConfig = cfgTransport diff --git a/src/control/server/config/server_test.go b/src/control/server/config/server_test.go index 97462d60ea6..119f9a503ac 100644 --- a/src/control/server/config/server_test.go +++ b/src/control/server/config/server_test.go @@ -240,6 +240,7 @@ func TestServerConfig_Constructed(t *testing.T) { // possible to construct an identical configuration with the helpers. constructed := DefaultServer(). WithControlPort(10001). + WithControlInterface("eth0"). WithControlMetadata(storage.ControlMetadata{ Path: "/home/daos_server/control_meta", DevicePath: "/dev/sdb1", diff --git a/src/control/server/server.go b/src/control/server/server.go index 14155682c76..4ea72603f3d 100644 --- a/src/control/server/server.go +++ b/src/control/server/server.go @@ -1,6 +1,6 @@ // // (C) Copyright 2018-2024 Intel Corporation. -// (C) Copyright 2025 Hewlett Packard Enterprise Development LP +// (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -308,21 +308,37 @@ func (srv *server) setCoreDumpFilter() error { func (srv *server) initNetwork() error { defer srv.logDuration(track("time to init network")) - ctlAddr, err := getControlAddr(ctlAddrParams{ + params := ctlAddrParams{ port: srv.cfg.ControlPort, replicaAddrSrc: srv.sysdb, lookupHost: net.LookupIP, - }) + } + + // If a control interface is configured, look it up and pass it to getControlAddr. + // Also track whether we should bind to a specific IP (only when control_iface is set). + bindToCtlAddr := false + if srv.cfg.ControlInterface != "" { + iface, err := net.InterfaceByName(srv.cfg.ControlInterface) + if err != nil { + return config.FaultConfigBadControlInterface(srv.cfg.ControlInterface, err) + } + params.ctlIface = iface + bindToCtlAddr = true + srv.log.Debugf("using control interface %s for listener", srv.cfg.ControlInterface) + } + + ctlAddr, err := getControlAddr(params) if err != nil { return err } - listener, err := createListener(ctlAddr, net.Listen) + listener, err := createListener(ctlAddr, net.Listen, bindToCtlAddr) if err != nil { return err } srv.ctlAddr = ctlAddr srv.listener = listener + srv.log.Debugf("control plane listener bound to %s", ctlAddr) return nil } diff --git a/src/control/server/server_utils.go b/src/control/server/server_utils.go index 7d3ed9f6c44..b9b57481f50 100644 --- a/src/control/server/server_utils.go +++ b/src/control/server/server_utils.go @@ -1,6 +1,6 @@ // // (C) Copyright 2021-2024 Intel Corporation. -// (C) Copyright 2025 Hewlett Packard Enterprise Development LP +// (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -150,11 +150,30 @@ type ctlAddrParams struct { port int replicaAddrSrc replicaAddrGetter lookupHost ipLookupFn + ctlIface netInterface // optional: if set, use this interface for bind address } func getControlAddr(params ctlAddrParams) (*net.TCPAddr, error) { - ipStr := "0.0.0.0" + // If a control interface is configured, use its first IPv4 address. + if params.ctlIface != nil { + ip, err := getFirstIPv4Addr(params.ctlIface) + if err != nil { + return nil, errors.Wrap(err, "getting control interface address") + } + + // If this node is a replica, verify the control interface address matches + // the configured replica address. A mismatch would break raft connectivity. + if repAddr, err := params.replicaAddrSrc.ReplicaAddr(); err == nil { + if !repAddr.IP.Equal(ip) { + return nil, config.FaultConfigControlInterfaceMismatch(ip.String(), repAddr.IP.String()) + } + } + return &net.TCPAddr{IP: ip, Port: params.port}, nil + } + + // Fall back to legacy behavior: use replica address if available, otherwise 0.0.0.0. + ipStr := "0.0.0.0" if repAddr, err := params.replicaAddrSrc.ReplicaAddr(); err == nil { ipStr = repAddr.IP.String() } @@ -167,11 +186,17 @@ func getControlAddr(params ctlAddrParams) (*net.TCPAddr, error) { return ctlAddr, nil } -func createListener(ctlAddr *net.TCPAddr, listen netListenFn) (net.Listener, error) { +func createListener(ctlAddr *net.TCPAddr, listen netListenFn, bindToCtlAddr bool) (net.Listener, error) { // Create and start listener on management network. - lis, err := listen("tcp4", fmt.Sprintf("0.0.0.0:%d", ctlAddr.Port)) + // Only bind to ctlAddr.IP if explicitly requested (i.e., control_iface is set), + // otherwise bind to all interfaces (0.0.0.0) for backwards compatibility. + bindAddr := fmt.Sprintf("0.0.0.0:%d", ctlAddr.Port) + if bindToCtlAddr { + bindAddr = ctlAddr.String() + } + lis, err := listen("tcp4", bindAddr) if err != nil { - return nil, errors.Wrap(err, "unable to listen on management interface") + return nil, errors.Wrapf(err, "unable to listen on %s", bindAddr) } return lis, nil @@ -730,9 +755,12 @@ func registerTelemetryCallbacks(ctx context.Context, srv *server) { return } + // Use the same bind address as the control plane listener. + bindAddr := srv.ctlAddr.IP.String() + srv.OnEnginesStarted(func(ctxIn context.Context) error { srv.log.Debug("starting Prometheus exporter") - cleanup, err := startPrometheusExporter(ctxIn, srv.log, telemPort, srv.harness.Instances()) + cleanup, err := startPrometheusExporter(ctxIn, srv.log, telemPort, bindAddr, srv.harness.Instances()) if err != nil { return err } @@ -875,6 +903,35 @@ type netInterface interface { Addrs() ([]net.Addr, error) } +// getFirstIPv4Addr returns the first (lowest) IPv4 address from the interface. +// If multiple IPv4 addresses exist, the lowest one is returned for determinism. +func getFirstIPv4Addr(iface netInterface) (net.IP, error) { + addrs, err := iface.Addrs() + if err != nil { + return nil, errors.Wrap(err, "failed to get interface addresses") + } + + var ipv4s []net.IP + for _, a := range addrs { + if ipNet, ok := a.(*net.IPNet); ok && ipNet.IP != nil { + if v4 := ipNet.IP.To4(); v4 != nil { + ipv4s = append(ipv4s, v4) + } + } + } + + if len(ipv4s) == 0 { + return nil, errors.New("no IPv4 addresses on interface") + } + + // Sort for deterministic selection (lowest address first). + sort.Slice(ipv4s, func(i, j int) bool { + return bytes.Compare(ipv4s[i], ipv4s[j]) < 0 + }) + + return ipv4s[0], nil +} + func getSrxSetting(cfg *config.Server) (int32, error) { if len(cfg.Engines) == 0 { return -1, nil diff --git a/src/control/server/server_utils_test.go b/src/control/server/server_utils_test.go index 69852dd9e93..9536c20d8aa 100644 --- a/src/control/server/server_utils_test.go +++ b/src/control/server/server_utils_test.go @@ -1,6 +1,6 @@ // // (C) Copyright 2021-2024 Intel Corporation. -// (C) Copyright 2025 Hewlett Packard Enterprise Development LP +// (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -173,6 +173,96 @@ func TestServer_checkFabricInterface(t *testing.T) { } } +func TestServer_getFirstIPv4Addr(t *testing.T) { + for name, tc := range map[string]struct { + iface netInterface + expIP net.IP + expErr error + }{ + "Addrs fails": { + iface: &mockInterface{ + err: errors.New("mock Addrs error"), + }, + expErr: errors.New("mock Addrs error"), + }, + "no addresses": { + iface: &mockInterface{ + addrs: []net.Addr{}, + }, + expErr: errors.New("no IPv4 addresses"), + }, + "only IPv6 addresses": { + iface: &mockInterface{ + addrs: []net.Addr{ + &net.IPNet{IP: net.ParseIP("::1")}, + &net.IPNet{IP: net.ParseIP("fe80::1")}, + }, + }, + expErr: errors.New("no IPv4 addresses"), + }, + "single IPv4 address": { + iface: &mockInterface{ + addrs: []net.Addr{ + &net.IPNet{IP: net.ParseIP("192.168.1.100")}, + }, + }, + expIP: net.ParseIP("192.168.1.100").To4(), + }, + "multiple IPv4 addresses - returns lowest": { + iface: &mockInterface{ + addrs: []net.Addr{ + &net.IPNet{IP: net.ParseIP("192.168.1.100")}, + &net.IPNet{IP: net.ParseIP("10.0.0.5")}, + &net.IPNet{IP: net.ParseIP("172.16.0.1")}, + }, + }, + expIP: net.ParseIP("10.0.0.5").To4(), + }, + "mixed IPv4 and IPv6 - returns lowest IPv4": { + iface: &mockInterface{ + addrs: []net.Addr{ + &net.IPNet{IP: net.ParseIP("::1")}, + &net.IPNet{IP: net.ParseIP("192.168.1.100")}, + &net.IPNet{IP: net.ParseIP("fe80::1")}, + &net.IPNet{IP: net.ParseIP("10.0.0.5")}, + }, + }, + expIP: net.ParseIP("10.0.0.5").To4(), + }, + "non-IPNet addresses ignored": { + iface: &mockInterface{ + addrs: []net.Addr{ + &mockAddr{}, // not a *net.IPNet + &net.IPNet{IP: net.ParseIP("192.168.1.100")}, + }, + }, + expIP: net.ParseIP("192.168.1.100").To4(), + }, + "nil IP in IPNet ignored": { + iface: &mockInterface{ + addrs: []net.Addr{ + &net.IPNet{IP: nil}, + &net.IPNet{IP: net.ParseIP("192.168.1.100")}, + }, + }, + expIP: net.ParseIP("192.168.1.100").To4(), + }, + } { + t.Run(name, func(t *testing.T) { + ip, err := getFirstIPv4Addr(tc.iface) + + test.CmpErr(t, tc.expErr, err) + if tc.expErr != nil { + return + } + + if !tc.expIP.Equal(ip) { + t.Fatalf("expected IP %v, got %v", tc.expIP, ip) + } + }) + } +} + func TestServer_getSrxSetting(t *testing.T) { defCfg := config.DefaultServer() @@ -1725,6 +1815,69 @@ func TestServerUtils_getControlAddr(t *testing.T) { }, expErr: errors.New("mock resolve"), }, + "with control interface": { + params: ctlAddrParams{ + port: testTCPAddr.Port, + ctlIface: &mockInterface{ + addrs: []net.Addr{ + &net.IPNet{IP: net.ParseIP("192.168.1.100")}, + }, + }, + }, + expAddr: &net.TCPAddr{IP: net.ParseIP("192.168.1.100").To4(), Port: 1234}, + }, + "control interface matches replica address": { + params: ctlAddrParams{ + port: testTCPAddr.Port, + ctlIface: &mockInterface{ + addrs: []net.Addr{ + &net.IPNet{IP: net.ParseIP("127.0.0.1")}, + }, + }, + replicaAddrSrc: &mockReplicaAddrSrc{ + replicaAddrResult: testTCPAddr, + }, + lookupHost: func(addr string) ([]net.IP, error) { + t.Fatal("lookupHost should not be called when ctlIface is set") + return nil, nil + }, + }, + expAddr: testTCPAddr, + }, + "control interface mismatches replica address": { + params: ctlAddrParams{ + port: testTCPAddr.Port, + ctlIface: &mockInterface{ + addrs: []net.Addr{ + &net.IPNet{IP: net.ParseIP("10.0.0.50")}, + }, + }, + replicaAddrSrc: &mockReplicaAddrSrc{ + replicaAddrResult: testTCPAddr, + }, + }, + expErr: config.FaultConfigControlInterfaceMismatch("10.0.0.50", "127.0.0.1"), + }, + "control interface fails to get address": { + params: ctlAddrParams{ + port: testTCPAddr.Port, + ctlIface: &mockInterface{ + err: errors.New("mock interface error"), + }, + }, + expErr: errors.New("mock interface error"), + }, + "control interface has no IPv4 addresses": { + params: ctlAddrParams{ + port: testTCPAddr.Port, + ctlIface: &mockInterface{ + addrs: []net.Addr{ + &net.IPNet{IP: net.ParseIP("::1")}, + }, + }, + }, + expErr: errors.New("no IPv4 addresses"), + }, } { t.Run(name, func(t *testing.T) { if tc.params.lookupHost == nil { @@ -1742,6 +1895,9 @@ func TestServerUtils_getControlAddr(t *testing.T) { addr, err := getControlAddr(tc.params) test.CmpErr(t, tc.expErr, err) + if tc.expErr != nil { + return + } test.AssertEqual(t, tc.expAddr.String(), addr.String(), "") }) } diff --git a/src/control/server/telemetry.go b/src/control/server/telemetry.go index 0a7403fc7e0..b5eb9f7cb54 100644 --- a/src/control/server/telemetry.go +++ b/src/control/server/telemetry.go @@ -1,6 +1,6 @@ // // (C) Copyright 2018-2024 Intel Corporation. -// (C) Copyright 2025 Hewlett Packard Enterprise Development LP +// (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -69,10 +69,11 @@ func regPromEngineSources(ctx context.Context, log logging.Logger, engines []Eng return nil } -func startPrometheusExporter(ctx context.Context, log logging.Logger, port int, engines []Engine) (func(), error) { +func startPrometheusExporter(ctx context.Context, log logging.Logger, port int, bindAddr string, engines []Engine) (func(), error) { expCfg := &promexp.ExporterConfig{ - Port: port, - Title: "DAOS Engine Telemetry", + Port: port, + BindAddress: bindAddr, + Title: "DAOS Engine Telemetry", Register: func(ctx context.Context, log logging.Logger) error { return regPromEngineSources(ctx, log, engines) }, diff --git a/src/tests/ftest/server/daos_server_config.py b/src/tests/ftest/server/daos_server_config.py index b5383793cec..db0d1d043b1 100644 --- a/src/tests/ftest/server/daos_server_config.py +++ b/src/tests/ftest/server/daos_server_config.py @@ -1,8 +1,10 @@ """ (C) Copyright 2020-2023 Intel Corporation. + (C) Copyright 2026 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ +import os from apricot import TestWithServers from server_utils import ServerFailed @@ -44,7 +46,15 @@ def test_daos_server_config_basic(self): self.hostfile_servers_slots) # Get the input to verify - c_val = self.params.get("config_val", "/run/server_config_val/*/") + c_val = list(self.params.get("config_val", "/run/server_config_val/*/")) + + # Handle "auto" value for control_iface - use DAOS_TEST_CONTROL_IFACE + if c_val[0] == "control_iface" and c_val[1] == "auto": + control_iface = os.environ.get("DAOS_TEST_CONTROL_IFACE") + if not control_iface: + self.skipTest("DAOS_TEST_CONTROL_IFACE not set; cannot test control_iface") + c_val[1] = control_iface + self.log.info("Resolved control_iface 'auto' to '%s'", control_iface) if c_val[0] == "name": # Set the dmg system name to match the server in order to avoid diff --git a/src/tests/ftest/server/daos_server_config.yaml b/src/tests/ftest/server/daos_server_config.yaml index 889417aa74d..b11fe6fee03 100644 --- a/src/tests/ftest/server/daos_server_config.yaml +++ b/src/tests/ftest/server/daos_server_config.yaml @@ -171,3 +171,23 @@ server_config_val: !mux - "targets" - -1 - "FAIL" + control_iface_invalid: + config_val: + - "control_iface" + - "nonexistent_interface_12345" + - "FAIL" + # Loopback interface is valid but its address (127.0.0.1) won't match the + # MS replica address derived from the test node's hostname, causing startup + # to fail with an address mismatch error. + control_iface_loopback_mismatch: + config_val: + - "control_iface" + - "lo" + - "FAIL" + # Use the auto-detected control interface (from DAOS_TEST_CONTROL_IFACE) which + # has an IP matching the hostname, so it should work with the MS replica address. + control_iface_valid: + config_val: + - "control_iface" + - "auto" + - "PASS" diff --git a/src/tests/ftest/util/environment_utils.py b/src/tests/ftest/util/environment_utils.py index 8835fd90eb6..30c00204b2a 100644 --- a/src/tests/ftest/util/environment_utils.py +++ b/src/tests/ftest/util/environment_utils.py @@ -1,6 +1,6 @@ """ (C) Copyright 2018-2024 Intel Corporation. - (C) Copyright 2025 Hewlett Packard Enterprise Development LP + (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -103,6 +103,7 @@ class TestEnvironment(): 'shared_dir': 'DAOS_TEST_SHARED_DIR', 'user_dir': 'DAOS_TEST_USER_DIR', 'interface': 'DAOS_TEST_FABRIC_IFACE', + 'control_interface': 'DAOS_TEST_CONTROL_IFACE', 'provider': 'D_PROVIDER', 'insecure_mode': 'DAOS_TEST_INSECURE_MODE', 'bullseye_src': 'DAOS_TEST_BULLSEYE_SRC', @@ -172,6 +173,8 @@ def set_defaults(self, logger, servers=None, clients=None, provider=None, insecu self.user_dir = os.path.join(self.log_dir, "user") if self.interface is None: self.interface = self._default_interface(logger, servers) + if self.control_interface is None: + self.control_interface = self._default_control_interface(logger, servers) if self.provider is None: self.provider = self._default_provider(logger, servers) if self.insecure_mode is None: @@ -339,6 +342,67 @@ def _default_interface(self, logger, hosts): logger.debug(" Found interface(s): %s", ",".join(interfaces)) return ",".join(interfaces) + @property + def control_interface(self): + """Get the control plane interface device. + + Returns: + str: the control plane interface device + """ + return os.environ.get(self.__ENV_VAR_MAP['control_interface']) + + @control_interface.setter + def control_interface(self, value): + """Set the control plane interface device. + + Args: + value (str): the control plane interface device + """ + self.__set_value('control_interface', value) + + def _default_control_interface(self, logger, hosts): + """Get the default control plane interface. + + Finds the network interface whose IP address matches the hostname resolution. + This is the interface that should be used for control plane traffic. + + Args: + logger (Logger): logger for the messages produced by this method + hosts (NodeSet): hosts on which to find the control interface + + Returns: + str: the default control interface; can be None + """ + if not hosts or not logger: + return None + + # Get the first host to query + first_host = NodeSet(str(list(hosts)[0])) + + logger.debug( + "Detecting control interface on %s - %s not set", + first_host, self.__ENV_VAR_MAP['control_interface']) + + # Find the interface whose IP matches the hostname resolution + command = ( + "python3 -c \"" + "import socket, subprocess; " + "ip = socket.gethostbyname(socket.gethostname()); " + "out = subprocess.check_output(['ip', '-o', 'addr', 'show']).decode(); " + "print(next((l.split()[1] for l in out.split(chr(10)) if f'inet {ip}/' in l), ''))\"" + ) + result = run_remote(logger, first_host, command) + if result.passed and result.output: + for data in result.output: + if data.stdout: + interface = data.stdout[0].strip() + if interface: + logger.debug(" Found control interface: %s", interface) + return interface + + logger.debug(" Could not detect control interface") + return None + @property def provider(self): """Get the provider. diff --git a/src/tests/ftest/util/server_utils_params.py b/src/tests/ftest/util/server_utils_params.py index 4ce4a39bc71..36c9f5eb946 100644 --- a/src/tests/ftest/util/server_utils_params.py +++ b/src/tests/ftest/util/server_utils_params.py @@ -1,6 +1,6 @@ """ (C) Copyright 2020-2024 Intel Corporation. - (C) Copyright 2025 Hewlett Packard Enterprise Development LP + (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -141,6 +141,7 @@ def __init__(self, filename, common_yaml, version=None): self.control_log_file = LogParameter(log_dir, None, "daos_control.log") self.helper_log_file = LogParameter(log_dir, None, "daos_server_helper.log") self.telemetry_port = BasicParameter(None, 9191) + self.control_iface = BasicParameter(None) self.client_env_vars = BasicParameter(None) # access_points was changed to mgmt_svc_replicas in 2.7 diff --git a/utils/config/daos_server.yml b/utils/config/daos_server.yml index 4876c71db74..c7fdd8faab7 100644 --- a/utils/config/daos_server.yml +++ b/utils/config/daos_server.yml @@ -56,6 +56,20 @@ #port: 10001 # # +## Network interface for the control plane listener +# +## Bind the control plane to a specific network interface. When set, the server +## will use the first IPv4 address on this interface for binding the control +## plane listener and for reporting its address to other system components. +## This is useful in environments with multiple network interfaces where you +## want to restrict control plane traffic to a specific network. +# +## If not set, the default behavior is to bind to 0.0.0.0 (all interfaces). +# +## default: (not set - binds to all interfaces) +#control_iface: eth0 +# +# ## Transport credentials specifying certificates to secure communications # #transport_config: From 5e82a1062e0bb2bbd26a7ab6592c14669c30795c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 19 Feb 2026 09:01:42 -0800 Subject: [PATCH 216/253] DAOS-18599 cq: update GHA versions (#17560) Updates `github/codeql-action` from 4.32.2 to 4.32.3 Updates `aquasecurity/trivy-action` from 0.33.1 to 0.34.0 Signed-off-by: dependabot[bot] --- .github/workflows/ossf-scorecard.yml | 2 +- .github/workflows/trivy.yml | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ossf-scorecard.yml b/.github/workflows/ossf-scorecard.yml index c1d4f3c5e62..65324095cf2 100644 --- a/.github/workflows/ossf-scorecard.yml +++ b/.github/workflows/ossf-scorecard.yml @@ -71,6 +71,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard (optional). # Commenting out will disable upload of results to your repo's Code Scanning dashboard - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@45cbd0c69e560cd9e7cd7f8c32362050c9b7ded2 # v4.32.2 + uses: github/codeql-action/upload-sarif@9e907b5e64f6b83e7804b09294d44122997950d6 # v4.32.3 with: sarif_file: results.sarif diff --git a/.github/workflows/trivy.yml b/.github/workflows/trivy.yml index fc7b733dd85..2be3d8cd8ac 100644 --- a/.github/workflows/trivy.yml +++ b/.github/workflows/trivy.yml @@ -36,7 +36,7 @@ jobs: uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Run Trivy vulnerability scanner in filesystem mode (table format) - uses: aquasecurity/trivy-action@b6643a29fecd7f34b3597bc6acb0a98b03d33ff8 # 0.33.1 + uses: aquasecurity/trivy-action@c1824fd6edce30d7ab345a9989de00bbd46ef284 # 0.34.0 with: scan-type: 'fs' scan-ref: '.' @@ -61,14 +61,14 @@ jobs: sed -i 's/format: template/format: sarif/g' utils/trivy/trivy.yaml - name: Run Trivy vulnerability scanner in filesystem mode (sarif format) - uses: aquasecurity/trivy-action@b6643a29fecd7f34b3597bc6acb0a98b03d33ff8 # 0.33.1 + uses: aquasecurity/trivy-action@c1824fd6edce30d7ab345a9989de00bbd46ef284 # 0.34.0 with: scan-type: 'fs' scan-ref: '.' trivy-config: 'utils/trivy/trivy.yaml' - name: Upload Trivy scan results to GitHub Security tab - uses: github/codeql-action/upload-sarif@45cbd0c69e560cd9e7cd7f8c32362050c9b7ded2 # v4.32.2 + uses: github/codeql-action/upload-sarif@9e907b5e64f6b83e7804b09294d44122997950d6 # v4.32.3 with: sarif_file: 'trivy-results.sarif' @@ -79,7 +79,7 @@ jobs: sed -i 's/exit-code: 0/exit-code: 1/g' utils/trivy/trivy.yaml - name: Run Trivy vulnerability scanner in filesystem mode (human readable format) - uses: aquasecurity/trivy-action@b6643a29fecd7f34b3597bc6acb0a98b03d33ff8 # 0.33.1 + uses: aquasecurity/trivy-action@c1824fd6edce30d7ab345a9989de00bbd46ef284 # 0.34.0 with: scan-type: 'fs' scan-ref: '.' From ccc1cd28edb00069575eb0f72fce491a01eddde6 Mon Sep 17 00:00:00 2001 From: Cedric Koch-Hofer <94527853+knard38@users.noreply.github.com> Date: Thu, 19 Feb 2026 22:54:39 +0100 Subject: [PATCH 217/253] DAOS-18292 ddb: Fix ddb debugging facilities (#17232) Fix ddb debugging facilities and add ddb man page. Signed-off-by: Cedric Koch-Hofer --- src/control/SConscript | 4 +- src/control/cmd/ddb/main.go | 365 +++++++++++++++++++++++++++------- src/include/daos/debug.h | 5 +- src/utils/ddb/ddb.c | 16 +- src/utils/ddb/ddb_commands.c | 16 +- src/utils/ddb/ddb_main.c | 10 +- src/utils/ddb/ddb_mgmt.c | 2 +- src/utils/ddb/ddb_parse.c | 6 +- src/utils/ddb/ddb_printer.c | 3 +- src/utils/ddb/ddb_spdk.c | 10 +- src/utils/ddb/ddb_tree_path.c | 2 + src/utils/ddb/ddb_vos.c | 7 +- 12 files changed, 344 insertions(+), 102 deletions(-) diff --git a/src/control/SConscript b/src/control/SConscript index 1029735f957..44720122a18 100644 --- a/src/control/SConscript +++ b/src/control/SConscript @@ -109,7 +109,7 @@ def install_go_bin(env, name, libs=None, install_man=False): menv = env.Clone() # This runs code from the build area so needs LD_LIBRARY_PATH set. menv.d_enable_ld_path(["cart", "gurt", "client/api", "common", "client/dfs", "utils", - "utils/self_test"]) + "utils/self_test", "vos", "common/dav_v2", "bio", "utils/ddb"]) menv.Command(build_path, target, f'{gen_bin} manpage -o {build_path}') menv.Install('$PREFIX/share/man/man8', build_path) @@ -217,7 +217,7 @@ def scons(): # Add vos and dependent libs for ddb ddb_env.AppendENVPath("CGO_LDFLAGS", " -lvos -ldav_v2 -ldaos_common_pmem -lpmem " "-labt -lgurt -luuid -lbio -lssl -lcart", sep=" ") - install_go_bin(ddb_env, "ddb", ['ddb']) + install_go_bin(ddb_env, "ddb", ['ddb'], True) if __name__ == "SCons.Script": diff --git a/src/control/cmd/ddb/main.go b/src/control/cmd/ddb/main.go index 6dee5bd3363..f61903827b8 100644 --- a/src/control/cmd/ddb/main.go +++ b/src/control/cmd/ddb/main.go @@ -9,6 +9,8 @@ package main import ( "bufio" + "fmt" + "io" "os" "path" "path/filepath" @@ -23,8 +25,10 @@ import ( "github.com/pkg/errors" "github.com/daos-stack/daos/src/control/build" + "github.com/daos-stack/daos/src/control/common" "github.com/daos-stack/daos/src/control/fault" "github.com/daos-stack/daos/src/control/logging" + "github.com/daos-stack/daos/src/control/server/engine" ) /* @@ -32,25 +36,26 @@ import ( */ import "C" -func exitWithError(log logging.Logger, err error) { +func exitWithError(err error) { cmdName := path.Base(os.Args[0]) - log.Errorf("%s: %v", cmdName, err) + fmt.Fprintf(os.Stderr, "ERROR: %s: %v\n", cmdName, err) if fault.HasResolution(err) { - log.Errorf("%s: %s", cmdName, fault.ShowResolutionFor(err)) + fmt.Fprintf(os.Stderr, "ERROR: %s: %s", cmdName, fault.ShowResolutionFor(err)) } os.Exit(1) } type cliOptions struct { - Debug bool `long:"debug" description:"enable debug output"` WriteMode bool `long:"write_mode" short:"w" description:"Open the vos file in write mode."` CmdFile string `long:"cmd_file" short:"f" description:"Path to a file containing a sequence of ddb commands to execute."` SysdbPath string `long:"db_path" short:"p" description:"Path to the sys db."` VosPath string `long:"vos_path" short:"s" description:"Path to the VOS file to open."` Version bool `short:"v" long:"version" description:"Show version"` + Debug string `long:"debug" description:"Logging log level (default to ERROR). More details can be found in the ddb man page."` + LogDir string `long:"log_dir" description:"Directory to write log files to. If not provided, logs will only be written to the console."` Args struct { - RunCmd string `positional-arg-name:"ddb_command"` - RunCmdArgs []string `positional-arg-name:"ddb_command_args"` + RunCmd string `positional-arg-name:"ddb_command" description:"Optional ddb command to run. If not provided, the tool will run in interactive mode."` + RunCmdArgs []string `positional-arg-name:"ddb_command_args" description:"Arguments for the ddb command to run. If not provided, the command will be run without any arguments."` } `positional-args:"yes"` } @@ -59,36 +64,28 @@ Available commands: ` -const helpVosTreePath = ` +const helpTreePath = ` Path -Many of the commands take a VOS tree path. The format for this path -is [cont]/[obj]/[dkey]/[akey]/[extent]. -- cont - the full container uuid. -- obj - the object id. -- keys (akey, dkey) - there are multiple types of keys - -- string keys are simply the string value. If the size of the - key is greater than strlen(key), then the size is included at - the end of the string value. Example: 'akey{5}' is the key: akey - with a null terminator at the end. - -- number keys are formatted as '{[type]: NNN}' where type is - 'uint8, uint16, uint32, or uint64'. NNN can be a decimal or - hex number. Example: '{uint32: 123456}' - -- binary keys are formatted as '{bin: 0xHHH}' where HHH is the hex - representation of the binary key. Example: '{bin: 0x1a2b}' -- extent for array values - in the format {lo-hi}. - -To make it easier to navigate the tree, indexes can be -used instead of the path part. The index is in the format [i]. Indexes -and actual path values can be used together - -Example Paths: -/3550f5df-e6b1-4415-947e-82e15cf769af/939000573846355970.0.13.1/dkey/akey/[0-1023] -[0]/[1]/[2]/[1]/[9] -/[0]/939000573846355970.0.13.1/[2]/akey{5}/[0-1023] +Many of the commands take a VOS tree path. The format for this path is +[cont]/[obj]/[dkey]/[akey]/[extent]. To make it easier to navigate the tree, indexes can be used +instead of the path part. The index is in the format [i]. Indexes and actual path values can be used +together. + +More details on the path format can be found in the ddb man page. ` +const ddbLongDescription = `The DAOS Debug Tool (ddb) allows a user to navigate through and modify +a file in the VOS format. It offers both a command line and interactive +shell mode. If neither a single command or '-f' option is provided, then +the tool will run in interactive mode. In order to modify the VOS file, +the '-w' option must be included. + +If the command requires it, the VOS file must be provided with the parameter +--vos-path. The VOS file will be opened before any commands are executed. See +the command‑specific help for details.` + const grumbleUnknownCmdErr = "unknown command, try 'help'" func runFileCmds(log logging.Logger, app *grumble.App, fileName string) error { @@ -116,7 +113,7 @@ func runFileCmds(log logging.Logger, app *grumble.App, fileName string) error { continue } log.Debugf("Running Command %q\n", lineStr) - err = runCmdStr(app, lineCmd[0], lineCmd[1:]...) + err = runCmdStr(app, nil, lineCmd[0], lineCmd[1:]...) if err != nil { return errors.Wrapf(err, "Failed running command %q", lineStr) } @@ -129,7 +126,7 @@ func runFileCmds(log logging.Logger, app *grumble.App, fileName string) error { // the help command from the outside of the interactive mode. // This method extracts commands and their respective help (short) messages in the simplest possible way, // put them in columns and print them using the provided log. -func printCommands(app *grumble.App, log *logging.LeveledLogger) { +func printCommands(fd io.Writer, app *grumble.App) { var output []string for _, c := range app.Commands().All() { if c.Name == "quit" { @@ -138,27 +135,27 @@ func printCommands(app *grumble.App, log *logging.LeveledLogger) { row := c.Name + columnize.DefaultConfig().Delim + c.Help output = append(output, row) } - log.Info(helpCommandsHeader + columnize.SimpleFormat(output) + "\n\n") + fmt.Fprintf(fd, helpCommandsHeader+columnize.SimpleFormat(output)+"\n\n") } -func printGeneralHelp(app *grumble.App, generalMsg string, log *logging.LeveledLogger) { - log.Info(generalMsg + "\n") // standard help from go-flags - printCommands(app, log) // list of commands - log.Info(helpVosTreePath) // extra info on VOS Tree Path syntax +func printGeneralHelp(app *grumble.App, generalMsg string) { + fmt.Println(generalMsg) // standard help from go-flags + printCommands(os.Stdout, app) // list of commands + fmt.Printf(helpTreePath) // extra info on VOS Tree Path syntax } // Ask grumble to generate a help message for the requested command. // Caveat: There is no known easy way of forcing grumble to use log to print the generated message // so the output goes directly to stdout. // Returns false in case the opts.Args.RunCmd is unknown. -func printCmdHelp(app *grumble.App, opts *cliOptions, log *logging.LeveledLogger) bool { - err := runCmdStr(app, string(opts.Args.RunCmd), "--help") +func printCmdHelp(app *grumble.App, opts *cliOptions) bool { + err := runCmdStr(app, nil, string(opts.Args.RunCmd), "--help") if err != nil { if err.Error() == grumbleUnknownCmdErr { - log.Errorf("unknown command '%s'", string(opts.Args.RunCmd)) - printCommands(app, log) + fmt.Fprintf(os.Stderr, "ERROR: Unknown command '%s'", string(opts.Args.RunCmd)) + printCommands(os.Stderr, app) } else { - log.Error(err.Error()) + fmt.Fprintf(os.Stderr, "ERROR: %s", err.Error()) } return false } @@ -167,38 +164,109 @@ func printCmdHelp(app *grumble.App, opts *cliOptions, log *logging.LeveledLogger // Prints either general or command-specific help message. // Returns a reasonable return code in case the caller chooses to terminate the process. -func printHelp(generalMsg string, opts *cliOptions, log *logging.LeveledLogger) int { +func printHelp(generalMsg string, opts *cliOptions) int { // ctx is not necessary since this instance of the app is not intended to run any of the commands app := createGrumbleApp(nil) if string(opts.Args.RunCmd) == "" { - printGeneralHelp(app, generalMsg, log) + printGeneralHelp(app, generalMsg) return 0 } - if printCmdHelp(app, opts, log) { + if printCmdHelp(app, opts) { return 0 } else { return 1 } } -func parseOpts(args []string, opts *cliOptions, log *logging.LeveledLogger) error { +func setenvIfNotSet(key, value string) { + if os.Getenv(key) == "" { + os.Setenv(key, value) + } +} + +// The golang cli and the C engine use separate logging systems with different log levels. +// This function maps a string log level to the closest matching levels for both systems. +// More details on the log levels can be found in the LOGGING section of the ddb man page. +func strToLogLevels(level string) (logging.LogLevel, engine.LogLevel, error) { + switch strings.ToUpper(level) { + case "TRACE": + return logging.LogLevelTrace, engine.LogLevelDbug, nil + case "DEBUG", "DBUG": + return logging.LogLevelDebug, engine.LogLevelDbug, nil + case "INFO": + return logging.LogLevelInfo, engine.LogLevelInfo, nil + case "NOTE", "NOTICE": + return logging.LogLevelNotice, engine.LogLevelNote, nil + case "WARN": + return logging.LogLevelNotice, engine.LogLevelWarn, nil + case "ERROR", "ERR": + return logging.LogLevelError, engine.LogLevelErr, nil + case "CRIT": + return logging.LogLevelError, engine.LogLevelCrit, nil + case "ALRT": + return logging.LogLevelError, engine.LogLevelAlrt, nil + case "FATAL", "EMRG": + return logging.LogLevelError, engine.LogLevelEmrg, nil + case "EMIT": + return logging.LogLevelError, engine.LogLevelEmit, nil + default: + return logging.LogLevelDisabled, engine.LogLevelUndefined, errors.Errorf("invalid log level %q", level) + } +} + +func newLogger(opts *cliOptions) (*logging.LeveledLogger, error) { + level := "ERR" + if opts.Debug != "" { + level = opts.Debug + } + cliLogLevel, engineLogLevel, err := strToLogLevels(level) + if err != nil { + return nil, errors.Wrap(err, "Error parsing log level") + } + + consoleLog := logging.NewCommandLineLogger() + consoleLog.WithLogLevel(cliLogLevel) + + setenvIfNotSet("D_LOG_MASK", engineLogLevel.String()) + setenvIfNotSet("DD_STDERR", "ERR") + + if opts.LogDir == "" { + return consoleLog, nil + } + + path := filepath.Clean(opts.LogDir) + fi, err := os.Stat(path) + if err != nil { + return nil, errors.Wrapf(err, "Error accessing debug directory %q", path) + } + if !fi.IsDir() { + return nil, errors.Errorf("Debug path %q is not a directory", path) + } + + setenvIfNotSet("D_LOG_FILE", filepath.Join(path, "ddb-engine.log")) + + var fd *os.File + fd, err = common.AppendFile(filepath.Join(path, "ddb-cli.log")) + if err != nil { + return nil, errors.Wrapf(err, "Error opening debug log file 'ddb-cli.log' in %q", path) + } + + consoleLog.WithLogLevel(logging.LogLevelError) + fileLog := logging.NewCombinedLogger("DDB", fd) + fileLog.WithLogLevel(cliLogLevel) + fileLog.WithErrorLogger(consoleLog) + + return fileLog, nil +} + +func parseOpts(args []string, opts *cliOptions) error { p := flags.NewParser(opts, flags.HelpFlag|flags.IgnoreUnknown) p.Name = "ddb" p.Usage = "[OPTIONS]" p.ShortDescription = "daos debug tool" - p.LongDescription = ` -The DAOS Debug Tool (ddb) allows a user to navigate through and modify -a file in the VOS format. It offers both a command line and interactive -shell mode. If neither a single command or '-f' option is provided, then -the tool will run in interactive mode. In order to modify the VOS file, -the '-w' option must be included. - -If the command requires it, the VOS file must be provided with the parameter ---vos-path. The VOS file will be opened before any commands are executed. See -the command‑specific help for details. -` + p.LongDescription = ddbLongDescription // Set the traceback level such that a crash results in // a coredump (when ulimit -c is set appropriately). @@ -206,28 +274,33 @@ the command‑specific help for details. if _, err := p.ParseArgs(args); err != nil { if fe, ok := errors.Cause(err).(*flags.Error); ok && fe.Type == flags.ErrHelp { - os.Exit(printHelp(fe.Error(), opts, log)) + os.Exit(printHelp(fe.Error(), opts)) } return err } if opts.Version { - log.Infof("ddb version %s", build.DaosVersion) - return nil + opts.Args.RunCmd = "version" + opts.Args.RunCmdArgs = []string{} + opts.CmdFile = "" } if opts.Args.RunCmd != "" && opts.CmdFile != "" { return errors.New("Cannot use both command file and a command string") } - if opts.Debug { - log.WithLogLevel(logging.LogLevelDebug) - log.Debug("debug output enabled") + log, err := newLogger(opts) + if err != nil { + return errors.Wrap(err, "Error configuring logging") } + log.Debug("Logging facilities initialized") - ctx, cleanup, err := InitDdb(log) - if err != nil { + var ( + ctx *DdbContext + cleanup func() + ) + if ctx, cleanup, err = InitDdb(log); err != nil { return errors.Wrap(err, "Error initializing the DDB Context") } defer cleanup() @@ -260,7 +333,7 @@ the command‑specific help for details. if opts.Args.RunCmd != "" || opts.CmdFile != "" { // Non-interactive mode if opts.Args.RunCmd != "" { - err := runCmdStr(app, string(opts.Args.RunCmd), opts.Args.RunCmdArgs...) + err := runCmdStr(app, p, string(opts.Args.RunCmd), opts.Args.RunCmdArgs...) if err != nil { log.Errorf("Error running command %q %s\n", string(opts.Args.RunCmd), err) } @@ -298,10 +371,9 @@ the command‑specific help for details. func main() { var opts cliOptions - log := logging.NewCommandLineLogger() - if err := parseOpts(os.Args[1:], &opts, log); err != nil { - exitWithError(log, err) + if err := parseOpts(os.Args[1:], &opts); err != nil { + exitWithError(err) } } @@ -336,7 +408,158 @@ func createGrumbleApp(ctx *DdbContext) *grumble.App { return app } +const manMacroSection = `.\" Miscellaneous Helper macros +.de Sp \" Vertical space (when we can't use .PP) +.if t .sp .5v +.if n .sp +.. +.de Vb \" Begin verbatim text +.ft CW +.nf +.ne \\$1 +.. +.de Ve \" End verbatim text +.ft R +.fi +.. +.\" ======================================================================== +.\"` + +const manArgsHeader = `.SH ARGUMENTS +.SS Application Arguments` + +const manCmdsHeader = `.SH COMMANDS +.SS Available Commands` + +const manPathSection = `.SH PATH +.SS VOS Tree Path +Many of the commands take a VOS tree path. The format for this path is [cont]/[obj]/[dkey]/[akey]/[extent]. +.TP +.B cont +The full container uuid. +.TP +.B obj +The object id. +.TP +.B keys (akey, dkey) +There are multiple types of keys: +.RS +.IP "*" 4 +.B string keys +are simply the string value. If the size of the key is greater than strlen(key), then +the size is included at the end of the string value. Example: 'akey{5}' is the key: akey with a null +terminator at the end. +.IP "*" 4 +.B number keys +are formatted as '{[type]: NNN}' where type is 'uint8, uint16, uint32, or uint64'. NNN +can be a decimal or hex number. Example: '{uint32: 123456}' +.IP "*" 4 +.B binary keys +are formatted as '{bin: 0xHHH}' where HHH is the hex representation of the binary key. +Example: '{bin: 0x1a2b}' +.RE +.TP +.B extent +For array values in the format {lo-hi}. +.SS Index Tree Path +.RE +To make it easier to navigate the tree, indexes can be used instead of the path part. The index is +in the format [i]. Indexes and actual path values can be used together. +.SS Path Examples +VOS tree path examples: +.Sp +.Vb 1 +\& /3550f5df-e6b1-4415-947e-82e15cf769af/939000573846355970.0.13.1/dkey/akey/[0-1023] +.Ve +.Sp +Index tree path examples: +.Sp +.Vb 1 +\& [0]/[1]/[2]/[1]/[9] +.Ve +.Sp +Mixed tree path examples: +.Sp +.Vb 1 +\& /[0]/939000573846355970.0.13.1/[2]/akey{5}/[0-1023] +.Ve +.Sp` + +const manLoggingSection = `.SH LOGGING +The golang cli and the C engine use separate logging systems with different log levels. +The \fI--debug=\fR option sets the log level for both systems to the closest matching +levels. The available log levels supported by this option are: \fBTRACE\fR, \fBDEBUG\fR (or +\fBDBG\fR), \fBINFO\fR, \fBNOTICE\fR (or \fBNOTE\fR), \fBWARN\fR, \fBERROR\fR (or \fBERR\fR), +\fBCRIT\fR, \fBALRT\fR, \fBFATAL\fR (or \fBEMRG\fr), and \fBEMIT\fR. The default log level is +\fBERROR\fR. + +To not pollute the console output, the logs can be redirected to a file using the +\fI--log_dir=\fR option. However, \fBERROR\fR log messages or above will still be printed to +the console regardless if the \fI--log_dir=\fR option is used or not.` + +func fprintManPage(dest io.Writer, app *grumble.App, parser *flags.Parser) { + fmt.Fprintln(dest, manMacroSection) + + parser.WriteManPage(dest) + + fmt.Fprintln(dest, manArgsHeader) + for _, arg := range parser.Args() { + fmt.Fprintf(dest, ".TP\n.B %s\n%s\n", arg.Name, arg.Description) + } + + fmt.Fprintln(dest, manCmdsHeader) + for _, cmd := range app.Commands().All() { + if cmd.Name == "manpage" { + continue + } + + var cmdHelp string + if cmd.LongHelp != "" { + cmdHelp = cmd.LongHelp + } else { + cmdHelp = cmd.Help + } + fmt.Fprintf(dest, ".TP\n.B %s\n%s\n", cmd.Name, cmdHelp) + } + + fmt.Fprintln(dest, manPathSection) + + fmt.Fprint(dest, manLoggingSection) +} + // Run the command in 'run' using the grumble app. shlex is used to parse the string into an argv/c format -func runCmdStr(app *grumble.App, cmd string, args ...string) error { +func runCmdStr(app *grumble.App, p *flags.Parser, cmd string, args ...string) error { + if p != nil { + app.AddCommand(&grumble.Command{ + Name: "manpage", + Help: "Generate an application man page in groff format.", + LongHelp: "Generate an application man page in groff format. This command is used internally to generate the man page for the application and is not intended for general use.", + HelpGroup: "", + Flags: func(a *grumble.Flags) { + a.String("o", "output", "", "Output file for the man page. If not provided, the man page will be printed to stdout.") + }, + Run: func(c *grumble.Context) error { + dest := os.Stdout + if c.Flags.String("output") != "" { + fd, err := os.Create(c.Flags.String("output")) + if err != nil { + return errors.Wrapf(err, "Error creating file %q", c.Flags.String("output")) + } + defer func() { + err = fd.Close() + if err != nil { + fmt.Fprintf(os.Stderr, "Error closing file %q: %s\n", c.Flags.String("output"), err) + } + }() + dest = fd + } + + fprintManPage(dest, app, p) + return nil + }, + Completer: nil, + }) + } + return app.RunCommand(append([]string{cmd}, args...)) } diff --git a/src/include/daos/debug.h b/src/include/daos/debug.h index a9feeb022cd..f9e8c685084 100644 --- a/src/include/daos/debug.h +++ b/src/include/daos/debug.h @@ -1,6 +1,6 @@ /** * (C) Copyright 2015-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -55,7 +55,8 @@ ACTION(il, il, arg) \ ACTION(csum, csum, arg) \ ACTION(pipeline, pipeline, arg) \ - ACTION(stack, stack, arg) + ACTION(stack, stack, arg) \ + ACTION(ddb, ddb, arg) #define DAOS_FOREACH_DB(ACTION, arg) \ /** metadata operation */ \ diff --git a/src/utils/ddb/ddb.c b/src/utils/ddb/ddb.c index a0e975ad97d..fdedeab4132 100644 --- a/src/utils/ddb/ddb.c +++ b/src/utils/ddb/ddb.c @@ -1,20 +1,24 @@ /** * (C) Copyright 2022-2024 Intel Corporation. * (C) Copyright 2025 Vdura Inc. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP. + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ +#define D_LOGFAC DD_FAC(ddb) + #include -#include -#include -#include #include +#include +#include #include +#include + +#include +#include +#include #include "ddb.h" -#include "daos/common.h" -#include "daos_errno.h" #include "ddb_common.h" #include "ddb_parse.h" diff --git a/src/utils/ddb/ddb_commands.c b/src/utils/ddb/ddb_commands.c index ed3edef9121..04cdd4e41ce 100644 --- a/src/utils/ddb/ddb_commands.c +++ b/src/utils/ddb/ddb_commands.c @@ -5,24 +5,24 @@ * * SPDX-License-Identifier: BSD-2-Clause-Patent */ +#define D_LOGFAC DD_FAC(ddb) -#include -#include #include #include -#include "daos_errno.h" -#include "daos_srv/vos_types.h" -#include "daos_types.h" +#include +#include +#include +#include +#include +#include + #include "ddb_common.h" #include "ddb_parse.h" #include "ddb.h" #include "ddb_vos.h" #include "ddb_printer.h" -#include "daos.h" #include "ddb_tree_path.h" -#include "gurt/common.h" -#include "gurt/debug.h" #define ilog_path_required_error_message "Path to object, dkey, or akey required\n" #define error_msg_write_mode_only "Can only modify the VOS tree in 'write mode'\n" diff --git a/src/utils/ddb/ddb_main.c b/src/utils/ddb/ddb_main.c index 1db110961e4..8b66a4b17d5 100644 --- a/src/utils/ddb/ddb_main.c +++ b/src/utils/ddb/ddb_main.c @@ -1,19 +1,23 @@ /** * (C) Copyright 2022-2024 Intel Corporation. + * (C) Copyright 2026 Hewlett Packard Enterprise Development LP * (C) Copyright 2025 Vdura Inc. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ +#define D_LOGFAC DD_FAC(ddb) + +#include +#include #include #include + +#include "ddb.h" #include "ddb_main.h" #include "ddb_common.h" #include "ddb_parse.h" #include "ddb_vos.h" -#include "ddb.h" -#include -#include int ddb_init() diff --git a/src/utils/ddb/ddb_mgmt.c b/src/utils/ddb/ddb_mgmt.c index 7bcf06605fc..3941168eb48 100644 --- a/src/utils/ddb/ddb_mgmt.c +++ b/src/utils/ddb/ddb_mgmt.c @@ -4,6 +4,7 @@ * * SPDX-License-Identifier: BSD-2-Clause-Patent */ +#define D_LOGFAC DD_FAC(ddb) #include #include @@ -13,7 +14,6 @@ #include #include -#include #include #include #include diff --git a/src/utils/ddb/ddb_parse.c b/src/utils/ddb/ddb_parse.c index 29e53785911..8586427e6ad 100644 --- a/src/utils/ddb/ddb_parse.c +++ b/src/utils/ddb/ddb_parse.c @@ -4,12 +4,14 @@ * * SPDX-License-Identifier: BSD-2-Clause-Patent */ +#define D_LOGFAC DD_FAC(ddb) #include #include -#include + +#include #include -#include "daos_errno.h" + #include "ddb_common.h" #include "ddb_parse.h" diff --git a/src/utils/ddb/ddb_printer.c b/src/utils/ddb/ddb_printer.c index 99302ece91c..dd78efbb481 100644 --- a/src/utils/ddb/ddb_printer.c +++ b/src/utils/ddb/ddb_printer.c @@ -1,9 +1,10 @@ /** * (C) Copyright 2022-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ +#define D_LOGFAC DD_FAC(ddb) #include "ddb_printer.h" diff --git a/src/utils/ddb/ddb_spdk.c b/src/utils/ddb/ddb_spdk.c index fe1dff9a822..aa55ce8e782 100644 --- a/src/utils/ddb/ddb_spdk.c +++ b/src/utils/ddb/ddb_spdk.c @@ -1,12 +1,12 @@ /** * (C) Copyright 2022 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ +#define D_LOGFAC DD_FAC(ddb) -#include -#include +#include #include #include #include @@ -14,7 +14,9 @@ #include #include #include -#include + +#include +#include #include "ddb_common.h" #include "ddb_spdk.h" diff --git a/src/utils/ddb/ddb_tree_path.c b/src/utils/ddb/ddb_tree_path.c index e57bff9b245..3bfc104424e 100644 --- a/src/utils/ddb/ddb_tree_path.c +++ b/src/utils/ddb/ddb_tree_path.c @@ -1,8 +1,10 @@ /** * (C) Copyright 2023-2024 Intel Corporation. + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ +#define D_LOGFAC DD_FAC(ddb) #include "ddb_tree_path.h" #include "ddb_printer.h" diff --git a/src/utils/ddb/ddb_vos.c b/src/utils/ddb/ddb_vos.c index 1fd2e9893f7..7d4409a36c2 100644 --- a/src/utils/ddb/ddb_vos.c +++ b/src/utils/ddb/ddb_vos.c @@ -5,20 +5,23 @@ * * SPDX-License-Identifier: BSD-2-Clause-Patent */ +#define D_LOGFAC DD_FAC(ddb) #include #include #include + #include -#include -#include #include +#include #include + #include "ddb_common.h" #include "ddb_parse.h" #include "ddb_mgmt.h" #include "ddb_vos.h" #include "ddb_spdk.h" + #define ddb_vos_iterate(param, iter_type, recursive, anchors, cb, args) \ vos_iterate(param, iter_type, recursive, \ anchors, cb, NULL, args, NULL) From 3722759493b880bae8df3a3769c0adaf1d69ee3b Mon Sep 17 00:00:00 2001 From: Tomasz Gromadzki Date: Fri, 20 Feb 2026 13:08:09 +0100 Subject: [PATCH 218/253] DAOS-18603 cq: suppress CVE-2025-33042 (#17581) Suppress avro:1.11.4 vulnerability inherited from hadoop as there is no new version of hadoop Signed-off-by: Tomasz Gromadzki --- utils/trivy/.trivyignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/utils/trivy/.trivyignore b/utils/trivy/.trivyignore index c3452b8f4fa..c5c2c24ccf2 100644 --- a/utils/trivy/.trivyignore +++ b/utils/trivy/.trivyignore @@ -10,3 +10,6 @@ CVE-2025-48924 ## CVE-2025-58057,MEDIUM,7.5,"netty-codec: netty-codec-compression: Netty's BrotliDecoder is vulnerable to DoS via zip bomb style attack","io.netty:netty-codec","4.1.100.Final","4.1.125.Final",https://avd.aquasec.com/nvd/cve-2025-58057 CVE-2025-58057 + +## CVE-2025-33042,MEDIUM,,"org.apache.avro/avro: Apache Avro Java SDK: Code injection on Java generated code","org.apache.avro:avro","1.11.4","1.12.1, 1.11.5",https://avd.aquasec.com/nvd/cve-2025-33042 +CVE-2025-33042 From e7add9eaf378f2b5a209e795b37947928b6b3132 Mon Sep 17 00:00:00 2001 From: Jan Michalski Date: Fri, 20 Feb 2026 15:22:31 +0000 Subject: [PATCH 219/253] DAOS-18582 vos: fix picking up pages for pinning (#17578) As of now, this code path is used only by DTX commit/abort. The bug caused unrelated parts of the structure to be interpreted as page indices that were about to be pinned, instead of using only the page indices already added to the array. Because this part of the code determines both which pages should be pinned and whether additional pages would get pinned, it led to two outcomes: In the best case: DTX commit/abort operations were processed in smaller "chunks" than intended, which could result in performance degradation. In the worst case: The code reasoned a certain page indices are in the array where in fact they were not. As a result, some page indices never made it into the array and were never pinned. Later code assumed that all required pages have been pinned, so a missing page would ultimately lead to a crash. Signed-off-by: Jan Michalski --- src/vos/vos_obj_cache.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/vos/vos_obj_cache.c b/src/vos/vos_obj_cache.c index ba1898e0f25..a94450cfd0d 100644 --- a/src/vos/vos_obj_cache.c +++ b/src/vos/vos_obj_cache.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2016-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -849,7 +849,8 @@ vos_bkt_array_subset(struct vos_bkt_array *super, struct vos_bkt_array *sub) return false; for (i = 0; i < sub->vba_cnt; i++) { - idx = daos_array_find(super, super->vba_cnt, sub->vba_bkts[i], &bkt_sort_ops); + idx = daos_array_find(super->vba_bkts, super->vba_cnt, sub->vba_bkts[i], + &bkt_sort_ops); if (idx < 0) return false; } From 79c332dfe62c14e6bb9db70b7adb3bc859c114d3 Mon Sep 17 00:00:00 2001 From: Liu Xuezhao Date: Sat, 21 Feb 2026 22:12:13 +0800 Subject: [PATCH 220/253] DAOS-18487 rebuild: add VOS_OF_REBUILD for rec punch vos_obj_update (#17576) To avoid DER_VOS_PARTIAL_UPDATE() failure, all vos_obj_update() calls should carry VOS_OF_REBUILD Signed-off-by: Xuezhao Liu --- src/object/srv_obj_migrate.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/object/srv_obj_migrate.c b/src/object/srv_obj_migrate.c index ea3d78640a8..2ccda79e4c2 100644 --- a/src/object/srv_obj_migrate.c +++ b/src/object/srv_obj_migrate.c @@ -1691,11 +1691,9 @@ migrate_punch(struct migrate_pool_tls *tls, struct migrate_one *mrone, mrone->mo_oid.id_shard)) mrone_recx_daos2_vos(mrone, mrone->mo_punch_iods, mrone->mo_punch_iod_num); - rc = vos_obj_update(cont->sc_hdl, mrone->mo_oid, - mrone->mo_rec_punch_eph, - mrone->mo_version, 0, &mrone->mo_dkey, - mrone->mo_punch_iod_num, - mrone->mo_punch_iods, NULL, NULL); + rc = vos_obj_update(cont->sc_hdl, mrone->mo_oid, mrone->mo_rec_punch_eph, + mrone->mo_version, VOS_OF_REBUILD, &mrone->mo_dkey, + mrone->mo_punch_iod_num, mrone->mo_punch_iods, NULL, NULL); D_DEBUG(DB_REBUILD, DF_RB ": " DF_UOID " mrone %p punch %d eph " DF_U64 "records: " DF_RC "\n", DP_RB_MPT(tls), DP_UOID(mrone->mo_oid), mrone, mrone->mo_punch_iod_num, From f98097436787d591f8e583626a730e6600d700ee Mon Sep 17 00:00:00 2001 From: Dalton Bohning Date: Tue, 24 Feb 2026 08:16:26 -0800 Subject: [PATCH 221/253] DAOS-18593 test: replace sleep with retry in rebuild/interactive.py (#17559) Replace arbitrary sleep with a retry on expected DER_NONEXIST. Signed-off-by: Dalton Bohning --- src/tests/ftest/rebuild/interactive.py | 29 ++++++++++++++++++-------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/src/tests/ftest/rebuild/interactive.py b/src/tests/ftest/rebuild/interactive.py index 5dc968650a4..181a00391e6 100644 --- a/src/tests/ftest/rebuild/interactive.py +++ b/src/tests/ftest/rebuild/interactive.py @@ -1,5 +1,5 @@ """ - (C) Copyright 2025 Hewlett Packard Enterprise Development LP + (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -8,6 +8,7 @@ from apricot import TestWithServers from data_utils import assert_val_in_list +from exception_utils import CommandFailure from ior_utils import get_ior from job_manager_utils import get_job_manager @@ -74,10 +75,6 @@ def __run_rebuild_interactive(self, pool, cont_ior, ior, - 'dmg pool reintegrate' - 'dmg system reintegrate' """ - # Time to wait between rebuild start and manual stop. - # If we stop too early rebuild might not have started yet. - # Ideally, if we could poll the "actual" rebuild status this would not be necessary. - secs_between_rebuild_start_and_manual_stop = 4 ior_flags_read = self.params.get('flags_read', '/run/ior/*') ior_ppn = self.params.get('ppn', '/run/ior/*') @@ -100,8 +97,15 @@ def __run_rebuild_interactive(self, pool, cont_ior, ior, pool.wait_for_rebuild_to_start(interval=1) self.log_step(f'{exclude_method} - Manually stop rebuild') - time.sleep(secs_between_rebuild_start_and_manual_stop) - pool.rebuild_stop() + for i in range(3): + try: + pool.rebuild_stop() + break + except CommandFailure as error: + if i == 2 or 'DER_NONEXIST' not in str(error): + raise + self.log.info('Assuming rebuild is not started yet. Retrying in 3 seconds...') + time.sleep(3) self.log_step(f'{exclude_method} - Wait for rebuild to stop') pool.wait_for_rebuild_to_stop(interval=3) @@ -145,8 +149,15 @@ def __run_rebuild_interactive(self, pool, cont_ior, ior, pool.wait_for_rebuild_to_start(interval=1) self.log_step(f'{reint_method} - Manually stop rebuild') - time.sleep(secs_between_rebuild_start_and_manual_stop) - pool.rebuild_stop() + for i in range(3): + try: + pool.rebuild_stop() + break + except CommandFailure as error: + if i == 2 or 'DER_NONEXIST' not in str(error): + raise + self.log.info('Assuming rebuild is not started yet. Retrying in 3 seconds...') + time.sleep(3) self.log_step(f'{reint_method} - Wait for rebuild to stop') pool.wait_for_rebuild_to_stop(interval=3) From 45b7e0474f0dad6b7e9d10bd0787165cabad5b7a Mon Sep 17 00:00:00 2001 From: Dalton Bohning Date: Tue, 24 Feb 2026 08:19:08 -0800 Subject: [PATCH 222/253] DAOS-18483 test: fix OSAOfflineReintegration (#17534) Some rank exclusions were hardcoded to the wrong rank. Also fix incorrect client procs in the config. Signed-off-by: Dalton Bohning --- src/tests/ftest/osa/offline_reintegration.py | 43 +++++++++++-------- .../ftest/osa/offline_reintegration.yaml | 4 +- 2 files changed, 28 insertions(+), 19 deletions(-) diff --git a/src/tests/ftest/osa/offline_reintegration.py b/src/tests/ftest/osa/offline_reintegration.py index 825dc17702b..8d92ce8a556 100644 --- a/src/tests/ftest/osa/offline_reintegration.py +++ b/src/tests/ftest/osa/offline_reintegration.py @@ -1,6 +1,6 @@ """ (C) Copyright 2020-2023 Intel Corporation. - (C) Copyright 2025 Hewlett Packard Enterprise Development LP + (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -48,6 +48,12 @@ def run_offline_reintegration_test(self, num_pool, ranks, data=False, server_boo operations. num_ranks (int): Number of ranks to drain. Defaults to 1. """ + # Figure out an additional unique rank to stop during rebuild. + # Used when self.test_during_rebuild is True + all_ranks = list(map(str, self.server_managers[0].ranks.keys())) + all_exclude_ranks = ','.join(ranks).split(',') + rank_during_rebuild = self.random.choice(list(set(all_ranks) - set(all_exclude_ranks))) + # Create 'num_pool' number of pools pools = [] if oclass is None: @@ -82,7 +88,7 @@ def run_offline_reintegration_test(self, num_pool, ranks, data=False, server_boo if self.test_during_aggregation is True: self.run_ior_thread("Write", oclass, test_seq) - self.pool = self.random.choice(pools) # nosec + self.pool = self.random.choice(pools) for loop in range(0, self.loop_test_cnt): self.log.info( "==> (Loop %s/%s) Excluding ranks %s from %s", @@ -95,8 +101,8 @@ def run_offline_reintegration_test(self, num_pool, ranks, data=False, server_boo initial_free_space = self.pool.get_total_free_space(refresh=True) if server_boot is False: if (self.test_during_rebuild is True and index == 0): - # Exclude rank 5 - output = self.pool.exclude("5") + # Exclude an additional rank + output = self.pool.exclude(rank_during_rebuild) self.print_and_assert_on_rebuild_failure(output) if self.test_during_aggregation is True: self.delete_extra_container(self.pool) @@ -115,10 +121,9 @@ def run_offline_reintegration_test(self, num_pool, ranks, data=False, server_boo output = self.dmg_command.system_stop(ranks=rank, force=True) self.print_and_assert_on_rebuild_failure(output) output = self.dmg_command.system_start(ranks=rank) - # Just try to reintegrate rank 5 + # Just try to reintegrate the additional rank if (self.test_during_rebuild is True and index == 2): - # Reintegrate rank 5 - output = self.pool.reintegrate("5") + output = self.pool.reintegrate(rank_during_rebuild) self.print_and_assert_on_rebuild_failure(output) pver_exclude = self.pool.get_version(True) @@ -127,10 +132,12 @@ def run_offline_reintegration_test(self, num_pool, ranks, data=False, server_boo # Check pool version incremented after pool exclude # pver_exclude should be greater than # pver_begin + 1 (1 target + exclude) - self.assertTrue(pver_exclude > (pver_begin + 1), - "Pool Version Error: After exclude") - self.assertTrue(initial_free_space > free_space_after_exclude, - "Expected free space after exclude is less than initial") + if not pver_exclude > (pver_begin + 1): + self.fail(f"Pool version after exclude: {pver_exclude} !> {pver_begin + 1}") + if not initial_free_space > free_space_after_exclude: + self.fail( + "Expected free space after exclude: " + f"{initial_free_space} !> {free_space_after_exclude}") # Reintegrate the ranks which was excluded self.log.info( @@ -154,12 +161,14 @@ def run_offline_reintegration_test(self, num_pool, ranks, data=False, server_boo free_space_after_reintegration = self.pool.get_total_free_space(refresh=True) pver_reint = self.pool.get_version(True) self.log.info("Pool Version after reintegrate %d", pver_reint) - # Check pool version incremented after pool reintegrate - self.assertTrue(pver_reint > pver_exclude, "Pool Version Error: After reintegrate") - self.assertTrue(free_space_after_reintegration > free_space_after_exclude, - "Expected free space after reintegration is less than exclude") - - display_string = "{} space at the End".format(str(self.pool)) + if not pver_reint > pver_exclude: + self.fail(f"Pool version after reintegrate: {pver_reint} !> {pver_exclude}") + if not free_space_after_reintegration > free_space_after_exclude: + self.fail( + "Expected free space after reintegrate: " + f"{free_space_after_reintegration} !> {free_space_after_exclude}") + + display_string = f"{str(self.pool)} space at the End" self.pool.display_pool_daos_space(display_string) # Finally check whether the written data can be accessed. diff --git a/src/tests/ftest/osa/offline_reintegration.yaml b/src/tests/ftest/osa/offline_reintegration.yaml index d4e70262606..659186f7dbc 100644 --- a/src/tests/ftest/osa/offline_reintegration.yaml +++ b/src/tests/ftest/osa/offline_reintegration.yaml @@ -58,8 +58,8 @@ container: properties: cksum:crc64,cksum_size:16384,srv_cksum:on,rd_fac:2 ior: - clientslots: - slots: 48 + client_processes: + np: 2 test_file: /testFile repetitions: 2 dfs_destroy: false From ba648f75a1bbf74e08cf39e48e8c0b0ea95700b1 Mon Sep 17 00:00:00 2001 From: Tom Nabarro Date: Tue, 24 Feb 2026 16:22:04 +0000 Subject: [PATCH 223/253] DAOS-17433 doc: Remove nvme-add-device reference (#17572) Remove reference to the no logger used nvme-add-device subcommand and update non-VMD instructions to reference nvme-rebind. Signed-off-by: Tom Nabarro --- docs/admin/administration.md | 35 ++++++----------------------------- utils/config/daos_server.yml | 11 +++++------ 2 files changed, 11 insertions(+), 35 deletions(-) diff --git a/docs/admin/administration.md b/docs/admin/administration.md index 554866e858e..459d10dbb16 100644 --- a/docs/admin/administration.md +++ b/docs/admin/administration.md @@ -642,41 +642,18 @@ This LED activity visually indicates a fault and that the device needs to be rep longer in use by DAOS. The LED of the VMD device will remain in this state until replaced by a new device. -!!! note - Full NVMe hot plug capability will be available and supported in DAOS 2.6 release. - Use is currently intended for testing only and is not supported for production. - -- To use a newly added (hot-inserted) SSD it needs to be unbound from the kernel driver -and bound instead to a user-space driver so that the device can be used with DAOS. - -To rebind a SSD on a single host, run the following command (replace SSD PCI address and -hostname with appropriate values): +- If VMD is not enabled, then in order to use a newly added (hot-inserted) SSD it needs to be +unbound from the kernel driver and bound instead to a user-space driver so that the device can be +used with DAOS. To rebind an SSD on a single host, run the following command (replace SSD PCI +address and hostname with appropriate values): ```bash $ dmg storage nvme-rebind -a 0000:84:00.0 -l wolf-167 Command completed successfully ``` The device will now be bound to a user-space driver (e.g. VFIO) and can be accessed by -DAOS I/O engine processes (and used in the following `dmg storage replace nvme` command -as a new device). - -- Once an engine is using a newly added (hot-inserted) SSD it can be added to the persistent -NVMe config (stored on SCM) so that on engine restart the new device will be used. - -To update the engine's persistent NVMe config with the new SSD transport address, run the -following command (replace SSD PCI address, engine index and hostname with appropriate values): -```bash -$ dmg storage nvme-add-device -a 0000:84:00.0 -e 0 -l wolf-167 -Command completed successfully -``` - -The optional [--tier-index|-t] command parameter can be used to specify which storage tier to -insert the SSD into, if specified then the server will attempt to insert the device into the tier -specified by the index, if not specified then the server will attempt to insert the device into -the bdev tier with the lowest index value (the first bdev tier). - -The device will now be registered in the engine's persistent NVMe config so that when restarted, -the newly added SSD will be used. +DAOS I/O engine processes. Now the new device can be used in the following +`dmg storage replace nvme` command. - Replace an excluded SSD with a New Device: ```bash diff --git a/utils/config/daos_server.yml b/utils/config/daos_server.yml index c7fdd8faab7..7e3be1483ee 100644 --- a/utils/config/daos_server.yml +++ b/utils/config/daos_server.yml @@ -212,16 +212,16 @@ #socket_dir: ./.daos/daos_server # # -## Number of hugepages to allocate for DMA buffer memory +## Number of hugepages to allocate for DMA buffer memory (total value for all engines) # -## Optional parameter that should only be set if overriding the automatically calculated value is # -## #necessary. Specifies the number (not size) of hugepages to allocate for use by NVMe through -## #SPDK. For optimum performance each target requires 1 GiB of hugepage space. The provided value +## Optional parameter that should only be set if overriding the automatically calculated value is +## necessary. Specifies the number (not size) of hugepages to allocate for use by NVMe through +## SPDK. For optimum performance each target requires 1 GiB of hugepage space. The provided value ## should be calculated by dividing the total amount of hugepages memory required for all targets ## across all engines on a host by the system hugepage size. If not set here, the value will be ## automatically calculated based on the number of targets (using the default system hugepage size). # -## Example: (2 engines * (16 targets/engine * 1GiB)) / 2MiB hugepage size = 16834 +## Example: (2 engines * (16 targets/engine * 1GiB)) / 2MiB hugepage size = 16384 # ## default: 0 #nr_hugepages: 0 @@ -242,7 +242,6 @@ #allow_numa_imbalance: true # # - ## Allow DAOS server to run with transparent hugepages (THP) enabled on the host machine. # ## WARNING: Transparent hugepages can conflict with how the DAOS server uses hugepages, and enabling From 0bf06e339339899b2d323ce2b0bea4777d933ed3 Mon Sep 17 00:00:00 2001 From: Jan Michalski Date: Tue, 24 Feb 2026 16:23:07 +0000 Subject: [PATCH 224/253] SRE-3578 test: suppress OPENSSL_init_crypto() leak (#17513) { Memcheck:Leak match-leak-kinds: reachable fun:malloc fun:CRYPTO_malloc fun:CRYPTO_zalloc fun:CRYPTO_THREAD_lock_new obj:/usr/lib64/libcrypto.so.3.5.1 fun:__pthread_once_slow fun:CRYPTO_THREAD_run_once fun:ENGINE_new obj:/usr/lib64/libcrypto.so.3.5.1 fun:__pthread_once_slow fun:CRYPTO_THREAD_run_once fun:OPENSSL_init_crypto obj:/usr/lib64/libcrypto.so.3.5.1 fun:__pthread_once_slow fun:CRYPTO_THREAD_run_once fun:CONF_modules_load fun:CONF_modules_load_file_ex obj:/usr/lib64/libcrypto.so.3.5.1 fun:__pthread_once_slow fun:CRYPTO_THREAD_run_once } Signed-off-by: Jan Michalski --- utils/test_memcheck.supp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/utils/test_memcheck.supp b/utils/test_memcheck.supp index df2cd5bb28b..9c3e7e2b2df 100644 --- a/utils/test_memcheck.supp +++ b/utils/test_memcheck.supp @@ -456,3 +456,11 @@ ... fun:setgrent } +{ + OPENSSL_init_crypto leak + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + ... + fun:OPENSSL_init_crypto +} From ffdf3610120f7188f82e6a5b95bf0440133db493 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oksana=20Sa=C5=82yk?= Date: Tue, 24 Feb 2026 17:25:38 +0100 Subject: [PATCH 225/253] SRE-3466 test: use image_version in Functional Hardware Medium (#17515) Use image_version el9.7 in Functional Hardware Medium Verbs stages Signed-off-by: Oksana Salyk --- Jenkinsfile | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index e36de3fb189..4a039f83955 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1193,7 +1193,8 @@ pipeline { provider: 'ofi+verbs;ofi_rxm', run_if_pr: false, run_if_landing: false, - job_status: job_status_internal + job_status: job_status_internal, + image_version: 'el9.7' ), 'Functional Hardware Medium Verbs Provider MD on SSD': getFunctionalTestStage( name: 'Functional Hardware Medium Verbs Provider MD on SSD', @@ -1206,7 +1207,8 @@ pipeline { provider: 'ofi+verbs;ofi_rxm', run_if_pr: true, run_if_landing: false, - job_status: job_status_internal + job_status: job_status_internal, + image_version: 'el9.7' ), 'Functional Hardware Medium UCX Provider': getFunctionalTestStage( name: 'Functional Hardware Medium UCX Provider', From 84d3e40d720674bc03173006ce261fbe8a463885 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 25 Feb 2026 05:38:56 -0800 Subject: [PATCH 226/253] DAOS-18616 cq: update python versions (#17586) Updates `isort` from 7.0.0 to 8.0.0 Updates `pylint` from 4.0.4 to 4.0.5 Signed-off-by: dependabot[bot] --- utils/cq/requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/cq/requirements.txt b/utils/cq/requirements.txt index 079145ab4f3..f0a62b75f78 100644 --- a/utils/cq/requirements.txt +++ b/utils/cq/requirements.txt @@ -3,8 +3,8 @@ pyenchant ## flake8 6 removed --diff option which breaks flake precommit hook. ## https://github.com/pycqa/flake8/issues/1389 https://github.com/PyCQA/flake8/pull/1720 flake8==7.3.0 -isort==7.0.0 -pylint==4.0.4 +isort==8.0.0 +pylint==4.0.5 yamllint==1.38.0 codespell==2.4.1 # Used by ci/jira_query.py which pip installs it standalone. From 182455977a9671c9190df70046123df41c7274f4 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 25 Feb 2026 05:59:56 -0800 Subject: [PATCH 227/253] DAOS-18616 cq: update GHA versions (#17588) Updates `github/codeql-action` from 4.32.3 to 4.32.4 Updates `aquasecurity/trivy-action` from 0.34.0 to 0.34.1 Signed-off-by: dependabot[bot] --- .github/workflows/ossf-scorecard.yml | 2 +- .github/workflows/trivy.yml | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ossf-scorecard.yml b/.github/workflows/ossf-scorecard.yml index 65324095cf2..90de01e8201 100644 --- a/.github/workflows/ossf-scorecard.yml +++ b/.github/workflows/ossf-scorecard.yml @@ -71,6 +71,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard (optional). # Commenting out will disable upload of results to your repo's Code Scanning dashboard - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@9e907b5e64f6b83e7804b09294d44122997950d6 # v4.32.3 + uses: github/codeql-action/upload-sarif@89a39a4e59826350b863aa6b6252a07ad50cf83e # v4.32.4 with: sarif_file: results.sarif diff --git a/.github/workflows/trivy.yml b/.github/workflows/trivy.yml index 2be3d8cd8ac..752b35a63a6 100644 --- a/.github/workflows/trivy.yml +++ b/.github/workflows/trivy.yml @@ -36,7 +36,7 @@ jobs: uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Run Trivy vulnerability scanner in filesystem mode (table format) - uses: aquasecurity/trivy-action@c1824fd6edce30d7ab345a9989de00bbd46ef284 # 0.34.0 + uses: aquasecurity/trivy-action@e368e328979b113139d6f9068e03accaed98a518 # 0.34.1 with: scan-type: 'fs' scan-ref: '.' @@ -61,14 +61,14 @@ jobs: sed -i 's/format: template/format: sarif/g' utils/trivy/trivy.yaml - name: Run Trivy vulnerability scanner in filesystem mode (sarif format) - uses: aquasecurity/trivy-action@c1824fd6edce30d7ab345a9989de00bbd46ef284 # 0.34.0 + uses: aquasecurity/trivy-action@e368e328979b113139d6f9068e03accaed98a518 # 0.34.1 with: scan-type: 'fs' scan-ref: '.' trivy-config: 'utils/trivy/trivy.yaml' - name: Upload Trivy scan results to GitHub Security tab - uses: github/codeql-action/upload-sarif@9e907b5e64f6b83e7804b09294d44122997950d6 # v4.32.3 + uses: github/codeql-action/upload-sarif@89a39a4e59826350b863aa6b6252a07ad50cf83e # v4.32.4 with: sarif_file: 'trivy-results.sarif' @@ -79,7 +79,7 @@ jobs: sed -i 's/exit-code: 0/exit-code: 1/g' utils/trivy/trivy.yaml - name: Run Trivy vulnerability scanner in filesystem mode (human readable format) - uses: aquasecurity/trivy-action@c1824fd6edce30d7ab345a9989de00bbd46ef284 # 0.34.0 + uses: aquasecurity/trivy-action@e368e328979b113139d6f9068e03accaed98a518 # 0.34.1 with: scan-type: 'fs' scan-ref: '.' From a17e60caff0834d832c6acf7d1c4aab1137ccc73 Mon Sep 17 00:00:00 2001 From: "John E. Malmberg" Date: Wed, 25 Feb 2026 12:51:59 -0600 Subject: [PATCH 228/253] SRE-3584 Fix test reporting (#17568) Restore testing of NVMe and PMEM when they are supposed to be present. Jenkinsfile: Minor groovy linting fixes. ci/functional/test_main.sh: Need to pass environment variables for expected hardware counts to scripts that are run on other nodes. Changed to be make it easier to test outside of Jenkins. ci/functional/test_main_prep_node.sh: Enhancement, package changed from default of "(root)" to be hardware. Class name is now based on the stage name with punctuation removed. Changed e-mail heading rom "failed" to "warning" for non-fatal issues. Signed-off-by: John E. Malmberg --- Jenkinsfile | 15 +++++++---- ci/functional/test_main.sh | 38 ++++++++++++++++++---------- ci/functional/test_main_prep_node.sh | 20 ++++++++++----- 3 files changed, 47 insertions(+), 26 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 4a039f83955..d1b4d4abc1e 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -886,7 +886,8 @@ pipeline { job_step_update( functionalTest( inst_repos: daosRepos(), - inst_rpms: functionalPackages(1, next_version(), 'tests-internal') + ' mercury-libfabric', + inst_rpms: functionalPackages(1, next_version(), 'tests-internal') + + ' mercury-libfabric', test_function: 'runTestFunctionalV2')) } post { @@ -908,7 +909,8 @@ pipeline { job_step_update( functionalTest( inst_repos: daosRepos(), - inst_rpms: functionalPackages(1, next_version(), 'tests-internal') + ' mercury-libfabric', + inst_rpms: functionalPackages(1, next_version(), 'tests-internal') + + ' mercury-libfabric', test_function: 'runTestFunctionalV2')) } post { @@ -930,7 +932,8 @@ pipeline { job_step_update( functionalTest( inst_repos: daosRepos(), - inst_rpms: functionalPackages(1, next_version(), 'tests-internal') + ' mercury-libfabric', + inst_rpms: functionalPackages(1, next_version(), 'tests-internal') + + ' mercury-libfabric', test_function: 'runTestFunctionalV2')) } post { @@ -952,7 +955,8 @@ pipeline { job_step_update( functionalTest( inst_repos: daosRepos(), - inst_rpms: functionalPackages(1, next_version(), 'tests-internal') + ' mercury-libfabric', + inst_rpms: functionalPackages(1, next_version(), 'tests-internal') + + ' mercury-libfabric', test_function: 'runTestFunctionalV2', image_version: 'leap15.6')) } @@ -975,7 +979,8 @@ pipeline { job_step_update( functionalTest( inst_repos: daosRepos(), - inst_rpms: functionalPackages(1, next_version(), 'tests-internal') + ' mercury-libfabric', + inst_rpms: functionalPackages(1, next_version(), 'tests-internal') + + ' mercury-libfabric', test_function: 'runTestFunctionalV2')) } post { diff --git a/ci/functional/test_main.sh b/ci/functional/test_main.sh index adcd0f78be8..aa056248bcc 100755 --- a/ci/functional/test_main.sh +++ b/ci/functional/test_main.sh @@ -1,7 +1,7 @@ #!/bin/bash # # Copyright 2020-2024 Intel Corporation. -# Copyright 2025 Hewlett Packard Enterprise Development LP +# Copyright 2025-2026 Hewlett Packard Enterprise Development LP # # SPDX-License-Identifier: BSD-2-Clause-Patent # @@ -14,6 +14,13 @@ fi test_tag="$TEST_TAG" +: "${NODELIST:=localhost}" +: "${TEST_RPMS:=false}" +: "${STAGE_NAME:=unknown}" + +def_node_count="$(nodeset -c "$NODELIST")" +: "${NODE_COUNT:=$def_node_count}" + tnodes=$(echo "$NODELIST" | cut -d ',' -f 1-"$NODE_COUNT") first_node=${NODELIST%%,*} @@ -42,14 +49,17 @@ cluster_reboot () { test_cluster() { # Test that all nodes in the cluster are healthy clush -B -S -o '-i ci_key' -l root -w "${tnodes}" \ - "OPERATIONS_EMAIL=${OPERATIONS_EMAIL} \ + "OPERATIONS_EMAIL=${OPERATIONS_EMAIL:-} \ FIRST_NODE=${first_node} \ TEST_RPMS=${TEST_RPMS} \ NODELIST=${tnodes} \ BUILD_URL=\"${BUILD_URL:-Unknown in GHA}\" \ - STAGE_NAME=\"$STAGE_NAME\" \ + STAGE_NAME=\"${STAGE_NAME}\" \ JENKINS_URL=\"${JENKINS_URL:-}\" \ DAOS_DEVOPS_EMAIL=\"${DAOS_DEVOPS_EMAIL:-}\" \ + DAOS_INFINIBAND=${DAOS_INFINIBAND:-} \ + DAOS_NVME=${DAOS_NVME:-} \ + DAOS_PMEM=${DAOS_PMEM:-} \ $(cat ci/functional/test_main_prep_node.sh)" } @@ -65,7 +75,7 @@ if ! test_cluster; then echo "Hardware test failed again after reboot" fi else - echo "Cluster reboot failed" + echo "Cluster reboot failed" fi else hardware_ok=true @@ -88,6 +98,7 @@ trap 'clush -B -S -o "-i ci_key" -l root -w "${tnodes}" '\ # Setup the Jenkins build artifacts directory before running the tests to ensure # there is enough disk space to report the results. +# Even though STAGE_NAME forced to be set, shellcheck wants this syntax. rm -rf "${STAGE_NAME:?ERROR: STAGE_NAME is not defined}/" mkdir "${STAGE_NAME:?ERROR: STAGE_NAME is not defined}/" @@ -98,24 +109,23 @@ rm -rf install/lib/daos/TESTING/ftest/avocado ./*_results.xml mkdir -p install/lib/daos/TESTING/ftest/avocado/job-results if "$hardware_ok"; then - if $TEST_RPMS; then + if "$TEST_RPMS"; then # shellcheck disable=SC2029 - ssh -i ci_key -l jenkins "${first_node}" \ - "TEST_TAG=\"$test_tag\" \ - TNODES=\"$tnodes\" \ - FTEST_ARG=\"${FTEST_ARG:-}\" \ - WITH_VALGRIND=\"${WITH_VALGRIND:-}\" \ - STAGE_NAME=\"$STAGE_NAME\" \ - DAOS_HTTPS_PROXY=\"${DAOS_HTTPS_PROXY:-}\" \ + ssh -i ci_key -l jenkins "${first_node}" \ + "TEST_TAG=\"$test_tag\" \ + TNODES=\"$tnodes\" \ + FTEST_ARG=\"${FTEST_ARG:-}\" \ + WITH_VALGRIND=\"${WITH_VALGRIND:-}\" \ + STAGE_NAME=\"${STAGE_NAME}\" \ + DAOS_HTTPS_PROXY=\"${DAOS_HTTPS_PROXY:-}\" \ $(cat ci/functional/test_main_node.sh)" else - ./ftest.sh "$test_tag" "$tnodes" "$FTEST_ARG" + ./ftest.sh "$test_tag" "$tnodes" "${FTEST_ARG:-}" fi fi # Now rename the previously collected hardware test data for Jenkins # to use them for Junit processing. -: "${STAGE_NAME:=}" mkdir -p "${STAGE_NAME}/hardware_prep/" for node in ${tnodes//,/ }; do old_name="./hardware_prep_node_results.xml.$node" diff --git a/ci/functional/test_main_prep_node.sh b/ci/functional/test_main_prep_node.sh index 71ca727af1a..ab75841a54b 100755 --- a/ci/functional/test_main_prep_node.sh +++ b/ci/functional/test_main_prep_node.sh @@ -20,8 +20,12 @@ mail_domain="${domain1%%/*}" : "${DAOS_PMEM:=0}" : "${DAOS_NVME:=0}" +#cn is for a cleaned up stage name. +cn=$(echo "$STAGE_NAME" | sed 's/[^a-zA-Z0-9_]/_/g' | sed 's/__*/_/g') + result=0 mail_message='' +mail_type='warning' nl=" " @@ -51,7 +55,7 @@ function do_mail { fi # shellcheck disable=SC2059 build_info="BUILD_URL = $BUILD_URL$nl STAGE = $STAGE_NAME$nl$nl" - mail -s "Hardware check failed after reboot!" \ + mail -s "Hardware check $mail_type after reboot!" \ -r "$DAOS_DEVOPS_EMAIL" "$OPERATIONS_EMAIL" \ <<< "$build_info$mail_message" set -x @@ -340,21 +344,23 @@ if [ -n "$FIRST_NODE" ] && ! grep /mnt/share /proc/mounts; then mount "$FIRST_NODE":/export/share /mnt/share fi -# Defaulting the package to "(root)" for now as then Jenkins -# will default to setting putting the outer stage name and -# inner stage name in the full test name. -ts="Hardware" +# The package name defaults to "(root)" unless there is a dot in the +# testsuite name, in which case the package name is the part before +# the last dot in the testsuite name. +pn="Hardware" tf="failures=\"$testfails\"" te="errors=\"0\"" tc="tests=\"$testruns\"" -# shellcheck disable=SC2089 -junit_xml="$nl +junit_xml="$nl $testcases$nl" # Each junit file needs the same name for when they are collected. echo "$junit_xml" > "./hardware_prep_node_results.xml" +if [ "$testfails" -gt 0 ]; then + mail_type='failed' +fi do_mail if [ "$result" -ne 0 ]; then From 94b5b8320b8a2780d8e9f265ded7791ed5fabec0 Mon Sep 17 00:00:00 2001 From: Nasf-Fan Date: Thu, 26 Feb 2026 16:02:31 +0800 Subject: [PATCH 229/253] DAOS-18587 chk: handle report upcall failure (#17546) Anytime when DAOS engine logic needs interaction with admin, it will generate new interaction record in chk_instance::ci_pending_hdl tree, and then trigger dRPP upcall to control plane that may fail for some reason. If hit failure, the dRPC sponsor needs to remove such record from chk_instance::ci_pending_hdl tree before destroying it to avoid triggering fake assertion. The patch also fixes a container label check issue: If the label is transferred as d_iov_t instead of string, then the buffer maybe not '\0' terminated, need to check its buffer length. Signed-off-by: Fan Yong --- src/chk/chk_common.c | 53 ++++++++------ src/chk/chk_engine.c | 43 ++++++----- src/chk/chk_internal.h | 24 ++++++- src/chk/chk_leader.c | 27 +++---- src/chk/chk_upcall.c | 4 ++ src/include/daos/common.h | 1 + src/tests/suite/daos_cr.c | 145 ++++++++++++++++++++++++++++++++++++++ 7 files changed, 237 insertions(+), 60 deletions(-) diff --git a/src/chk/chk_common.c b/src/chk/chk_common.c index adf5d068523..21d0a8ceba7 100644 --- a/src/chk/chk_common.c +++ b/src/chk/chk_common.c @@ -291,7 +291,7 @@ chk_pending_free(struct btr_instance *tins, struct btr_record *rec, void *args) ABT_mutex_unlock(cpr->cpr_mutex); } else { ABT_mutex_unlock(cpr->cpr_mutex); - chk_pending_destroy(cpr); + chk_pending_destroy(NULL, cpr); } } @@ -930,6 +930,27 @@ chk_pool_shard_cleanup(struct chk_instance *ins) } } +int +chk_pending_lookup(struct chk_instance *ins, uint64_t seq, struct chk_pending_rec **cpr) +{ + d_iov_t kiov; + d_iov_t riov; + int rc; + + d_iov_set(&riov, NULL, 0); + d_iov_set(&kiov, &seq, sizeof(seq)); + + ABT_rwlock_rdlock(ins->ci_abt_lock); + rc = dbtree_lookup(ins->ci_pending_hdl, &kiov, &riov); + ABT_rwlock_unlock(ins->ci_abt_lock); + if (rc == 0) + *cpr = (struct chk_pending_rec *)riov.iov_buf; + else + *cpr = NULL; + + return rc; +} + int chk_pending_add(struct chk_instance *ins, d_list_t *pool_head, d_list_t *rank_head, uuid_t uuid, uint64_t seq, uint32_t rank, uint32_t cla, uint32_t option_nr, uint32_t *options, @@ -985,12 +1006,14 @@ chk_pending_del(struct chk_instance *ins, uint64_t seq, struct chk_pending_rec * d_iov_set(&kiov, &seq, sizeof(seq)); ABT_rwlock_wrlock(ins->ci_abt_lock); - rc = dbtree_delete(ins->ci_pending_hdl, BTR_PROBE_EQ, &kiov, &riov); + rc = dbtree_delete(ins->ci_pending_hdl, BTR_PROBE_EQ, &kiov, cpr == NULL ? NULL : &riov); ABT_rwlock_unlock(ins->ci_abt_lock); - if (rc == 0) - *cpr = (struct chk_pending_rec *)riov.iov_buf; - else - *cpr = NULL; + if (cpr != NULL) { + if (rc == 0) + *cpr = (struct chk_pending_rec *)riov.iov_buf; + else + *cpr = NULL; + } D_CDEBUG(rc != 0, DLOG_ERR, DLOG_DBG, "Del pending record with gen "DF_X64", seq "DF_X64": "DF_RC"\n", @@ -1028,29 +1051,13 @@ chk_pending_wakeup(struct chk_instance *ins, struct chk_pending_rec *cpr) ABT_mutex_unlock(cpr->cpr_mutex); } else { ABT_mutex_unlock(cpr->cpr_mutex); - chk_pending_destroy(cpr); + chk_pending_destroy(ins, cpr); } } return rc; } -void -chk_pending_destroy(struct chk_pending_rec *cpr) -{ - D_ASSERT(d_list_empty(&cpr->cpr_pool_link)); - D_ASSERT(d_list_empty(&cpr->cpr_rank_link)); - D_ASSERT(d_list_empty(&cpr->cpr_ins_link)); - - if (cpr->cpr_cond != ABT_COND_NULL) - ABT_cond_free(&cpr->cpr_cond); - - if (cpr->cpr_mutex != ABT_MUTEX_NULL) - ABT_mutex_free(&cpr->cpr_mutex); - - D_FREE(cpr); -} - int chk_policy_refresh(uint32_t policy_nr, struct chk_policy *policies, struct chk_property *prop) { diff --git a/src/chk/chk_engine.c b/src/chk/chk_engine.c index 3dfb7b9b705..b7374acb806 100644 --- a/src/chk/chk_engine.c +++ b/src/chk/chk_engine.c @@ -246,6 +246,7 @@ chk_engine_post_repair(struct chk_pool_rec *cpr, int *result, bool update) *result = 0; if (*result != 0) { + chk_ins_set_fail(cpr->cpr_ins, cbk->cb_phase); if (cpr->cpr_ins->ci_prop.cp_flags & CHK__CHECK_FLAG__CF_FAILOUT) { cbk->cb_time.ct_stop_time = time(NULL); cbk->cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_FAILED; @@ -1204,10 +1205,13 @@ chk_engine_cont_target_label_empty(struct chk_cont_rec *ccr) static inline bool chk_engine_cont_cs_label_empty(struct chk_cont_rec *ccr) { - if (daos_iov_empty(&ccr->ccr_label_cs)) + d_iov_t *label = &ccr->ccr_label_cs; + + if (daos_iov_empty(label)) return true; - if (strncmp(DAOS_PROP_NO_CO_LABEL, ccr->ccr_label_cs.iov_buf, DAOS_PROP_LABEL_MAX_LEN) == 0) + if (strlen(DAOS_PROP_NO_CO_LABEL) == label->iov_len && + strncmp(DAOS_PROP_NO_CO_LABEL, label->iov_buf, label->iov_len) == 0) return true; return false; @@ -1579,8 +1583,8 @@ chk_engine_cont_label_cb(daos_handle_t ih, d_iov_t *key, d_iov_t *val, void *arg ccr = riov.iov_buf; if (ccr->ccr_label_prop == NULL || - strncmp(key->iov_buf, ccr->ccr_label_prop->dpp_entries[0].dpe_str, - DAOS_PROP_LABEL_MAX_LEN) != 0) + key->iov_len != strlen(ccr->ccr_label_prop->dpp_entries[0].dpe_str) || + strncmp(key->iov_buf, ccr->ccr_label_prop->dpp_entries[0].dpe_str, key->iov_len) != 0) rc = daos_iov_copy(&ccr->ccr_label_cs, key); else ccr->ccr_label_checked = 1; @@ -3177,13 +3181,12 @@ chk_engine_set_policy(uint64_t gen, uint32_t policy_nr, struct chk_policy *polic static int chk_engine_report(struct chk_report_unit *cru, uint64_t *seq, int *decision) { - struct chk_instance *ins = chk_engine; - struct chk_pending_rec *cpr = NULL; - struct chk_pending_rec *tmp = NULL; - struct chk_pool_rec *pool = NULL; - d_iov_t kiov; - d_iov_t riov; - int rc; + struct chk_instance *ins = chk_engine; + struct chk_pending_rec *cpr = NULL; + struct chk_pool_rec *pool = NULL; + d_iov_t kiov; + d_iov_t riov; + int rc; D_ASSERT(cru->cru_pool != NULL); @@ -3220,14 +3223,9 @@ chk_engine_report(struct chk_report_unit *cru, uint64_t *seq, int *decision) cru->cru_detail_nr, cru->cru_details, *seq); if (unlikely(rc == -DER_AGAIN)) { D_ASSERT(cru->cru_act == CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT); + D_ASSERT(cpr != NULL); - rc = chk_pending_del(ins, *seq, &tmp); - if (rc == 0) - D_ASSERT(tmp == NULL); - else if (rc != -DER_NONEXIST) - goto log; - - chk_pending_destroy(cpr); + chk_pending_destroy(ins, cpr); cpr = NULL; goto new_seq; @@ -3273,11 +3271,12 @@ chk_engine_report(struct chk_report_unit *cru, uint64_t *seq, int *decision) goto again; out: - if (pool != NULL && pool->cpr_bk.cb_pool_status == CHK__CHECK_POOL_STATUS__CPS_PENDING) - pool->cpr_bk.cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_CHECKING; - if (cpr != NULL) - chk_pending_destroy(cpr); + chk_pending_destroy(ins, cpr); + + if (pool != NULL && pool->cpr_bk.cb_pool_status == CHK__CHECK_POOL_STATUS__CPS_PENDING && + d_list_empty(&pool->cpr_pending_list)) + pool->cpr_bk.cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_CHECKING; return rc; } diff --git a/src/chk/chk_internal.h b/src/chk/chk_internal.h index e4d6d52f3fd..276f7121c51 100644 --- a/src/chk/chk_internal.h +++ b/src/chk/chk_internal.h @@ -742,6 +742,8 @@ int chk_pool_add_shard(daos_handle_t hdl, d_list_t *head, uuid_t uuid, d_rank_t void chk_pool_shard_cleanup(struct chk_instance *ins); +int chk_pending_lookup(struct chk_instance *ins, uint64_t seq, struct chk_pending_rec **cpr); + int chk_pending_add(struct chk_instance *ins, d_list_t *pool_head, d_list_t *rank_head, uuid_t uuid, uint64_t seq, uint32_t rank, uint32_t cla, uint32_t option_nr, uint32_t *options, struct chk_pending_rec **cpr); @@ -750,8 +752,6 @@ int chk_pending_del(struct chk_instance *ins, uint64_t seq, struct chk_pending_r int chk_pending_wakeup(struct chk_instance *ins, struct chk_pending_rec *cpr); -void chk_pending_destroy(struct chk_pending_rec *cpr); - int chk_policy_refresh(uint32_t policy_nr, struct chk_policy *policies, struct chk_property *prop); int chk_prop_prepare(d_rank_t leader, uint32_t flags, uint32_t policy_nr, @@ -986,6 +986,26 @@ chk_destroy_tree(daos_handle_t *toh, struct btr_root *root) } } +static inline void +chk_pending_destroy(struct chk_instance *ins, struct chk_pending_rec *cpr) +{ + if (d_list_empty(&cpr->cpr_pool_link)) { + D_ASSERT(d_list_empty(&cpr->cpr_rank_link)); + D_ASSERT(d_list_empty(&cpr->cpr_ins_link)); + + if (cpr->cpr_cond != ABT_COND_NULL) + ABT_cond_free(&cpr->cpr_cond); + + if (cpr->cpr_mutex != ABT_MUTEX_NULL) + ABT_mutex_free(&cpr->cpr_mutex); + + D_FREE(cpr); + } else { + cpr->cpr_busy = 0; + chk_pending_del(ins, cpr->cpr_seq, NULL); + } +} + static inline void chk_destroy_pending_tree(struct chk_instance *ins) { diff --git a/src/chk/chk_leader.c b/src/chk/chk_leader.c index 31cfab811f6..18be52d0ac0 100644 --- a/src/chk/chk_leader.c +++ b/src/chk/chk_leader.c @@ -3469,12 +3469,10 @@ chk_leader_act_internal(struct chk_instance *ins, uint64_t seq, uint32_t act) d_iov_t riov; int rc; - rc = chk_pending_del(ins, seq, &pending); + rc = chk_pending_lookup(ins, seq, &pending); if (rc != 0) goto out; - D_ASSERT(pending->cpr_busy); - if (pending->cpr_on_leader) { ABT_mutex_lock(pending->cpr_mutex); /* @@ -3484,20 +3482,24 @@ chk_leader_act_internal(struct chk_instance *ins, uint64_t seq, uint32_t act) pending->cpr_action = act; ABT_cond_broadcast(pending->cpr_cond); ABT_mutex_unlock(pending->cpr_mutex); + chk_pending_del(ins, seq, &pending); } else { d_iov_set(&riov, NULL, 0); d_iov_set(&kiov, pending->cpr_uuid, sizeof(uuid_t)); rc = dbtree_lookup(ins->ci_pool_hdl, &kiov, &riov); - if (rc == 0) { + if (rc == 0) pool = (struct chk_pool_rec *)riov.iov_buf; - if (pool->cpr_bk.cb_pool_status == CHK__CHECK_POOL_STATUS__CPS_PENDING) - pool->cpr_bk.cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_CHECKING; - } rc = chk_act_remote(ins->ci_ranks, ins->ci_bk.cb_gen, seq, pending->cpr_class, act, pending->cpr_rank); + if (rc == 0) { + chk_pending_destroy(ins, pending); - chk_pending_destroy(pending); + if (pool != NULL && + pool->cpr_bk.cb_pool_status == CHK__CHECK_POOL_STATUS__CPS_PENDING && + d_list_empty(&pool->cpr_pending_list)) + pool->cpr_bk.cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_CHECKING; + } } out: @@ -3707,14 +3709,13 @@ chk_leader_report(struct chk_report_unit *cru, uint64_t *seq, int *decision) goto again; out: + if ((rc != 0 || decision != NULL) && cpr != NULL) + chk_pending_destroy(ins, cpr); + if (pool != NULL && pool->cpr_bk.cb_pool_status == CHK__CHECK_POOL_STATUS__CPS_PENDING && - (rc != 0 || (cpr != NULL && - cpr->cpr_action != CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT))) + d_list_empty(&pool->cpr_pending_list)) pool->cpr_bk.cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_CHECKING; - if ((rc != 0 || decision != NULL) && cpr != NULL) - chk_pending_destroy(cpr); - return rc; } diff --git a/src/chk/chk_upcall.c b/src/chk/chk_upcall.c index bbc05db5f75..8d699195ac9 100644 --- a/src/chk/chk_upcall.c +++ b/src/chk/chk_upcall.c @@ -1,5 +1,6 @@ /* * (C) Copyright 2022 Intel Corporation. + * (C) Copyright 2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -83,6 +84,9 @@ chk_report_upcall(uint64_t gen, uint64_t seq, uint32_t cla, uint32_t act, int re time_t tm = time(NULL); int rc; + if (DAOS_FAIL_CHECK(DAOS_CHK_REPORT_FAILURE)) + return -DER_IO; + report.seq = seq; report.class_ = cla; report.action = act; diff --git a/src/include/daos/common.h b/src/include/daos/common.h index e02c4f28022..a31cf34dfa7 100644 --- a/src/include/daos/common.h +++ b/src/include/daos/common.h @@ -928,6 +928,7 @@ enum { #define DAOS_CHK_ENGINE_DEATH (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0xb9) #define DAOS_CHK_VERIFY_CONT_SHARDS (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0xba) #define DAOS_CHK_ORPHAN_POOL_SHARD (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0xbb) +#define DAOS_CHK_REPORT_FAILURE (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0xbc) #define DAOS_MGMT_FAIL_CREATE_QUERY (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0xe0) diff --git a/src/tests/suite/daos_cr.c b/src/tests/suite/daos_cr.c index f2be8fbc056..1c31158cb17 100644 --- a/src/tests/suite/daos_cr.c +++ b/src/tests/suite/daos_cr.c @@ -3903,6 +3903,147 @@ cr_lost_rank0(void **state) cr_cleanup(arg, &pool, 1); } +/* + * 1. Create pool. + * 2. Fault injection to generate inconsistent pool label. + * 3. Set fail_loc to fail interaction report. + * 4. Start checker with option "--failout=on" and "POOL_BAD_LABEL:CIA_INTERACT". Should not crash. + * 5. Query checker, instance should failed, pool should be "failed". + * 6. Reset fail_loc. + * 7. Switch to normal mode to verify the pool label. + * 8. Cleanup. + */ +static void +cr_leader_report_fail(void **state) +{ + test_arg_t *arg = *state; + struct test_pool pool = {0}; + struct daos_check_info dci = {0}; + char *label = NULL; + int rc; + + FAULT_INJECTION_REQUIRED(); + + print_message("CR30: Leader handle report failure\n"); + + rc = cr_pool_create(state, &pool, false, TCC_POOL_BAD_LABEL); + assert_rc_equal(rc, 0); + + rc = cr_system_stop(false); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(true); + assert_rc_equal(rc, 0); + + /* Inject fail_loc to fail interaction report. */ + rc = cr_debug_set_params(arg, DAOS_CHK_REPORT_FAILURE | DAOS_FAIL_ALWAYS); + assert_rc_equal(rc, 0); + + rc = cr_check_start(TCSF_FAILOUT | TCSF_RESET, 0, NULL, "POOL_BAD_LABEL:CIA_INTERACT"); + assert_rc_equal(rc, 0); + + cr_ins_wait(1, &pool.pool_uuid, &dci); + + rc = cr_ins_verify(&dci, TCIS_FAILED); + assert_rc_equal(rc, 0); + + rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_FAILED, 0, NULL, NULL, NULL); + assert_rc_equal(rc, 0); + + cr_debug_set_params(arg, 0); + + rc = cr_mode_switch(false); + assert_rc_equal(rc, 0); + + rc = cr_system_start(); + assert_rc_equal(rc, 0); + + print_message("CR: getting label for pool " DF_UUID " after check\n", + DP_UUID(pool.pool_uuid)); + rc = dmg_pool_get_prop(dmg_config_file, pool.label, pool.pool_uuid, "label", &label); + assert_rc_equal(rc, 0); + + D_ASSERTF(strcmp(label, pool.label) != 0, + "Pool (" DF_UUID ") label should not be repaired: %s\n", DP_UUID(pool.pool_uuid), + label); + + D_FREE(label); + cr_dci_fini(&dci); + cr_cleanup(arg, &pool, 1); +} + +/* + * 1. Create pool and container. + * 2. Fault injection to make container label inconsistent. + * 3. Set fail_loc to fail interaction report. + * 4. Start checker with option "--failout=on" and "CONT_BAD_LABEL:CIA_INTERACT". Should not crash. + * 5. Query checker, instance should failed, pool should be "failed". + * 6. Reset fail_loc. + * 7. Switch to normal mode to verify the container label. + * 8. Cleanup. + */ +static void +cr_engine_report_fail(void **state) +{ + test_arg_t *arg = *state; + struct test_pool pool = {0}; + struct test_cont cont = {0}; + struct daos_check_info dci = {0}; + char *label = NULL; + int rc; + + FAULT_INJECTION_REQUIRED(); + + print_message("CR31: Engine handle report failure\n"); + + rc = cr_pool_create(state, &pool, true, TCC_NONE); + assert_rc_equal(rc, 0); + + rc = cr_cont_create(state, &pool, &cont, 1); + assert_rc_equal(rc, 0); + + rc = cr_system_stop(false); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(true); + assert_rc_equal(rc, 0); + + /* Inject fail_loc to fail interaction report. */ + rc = cr_debug_set_params(arg, DAOS_CHK_REPORT_FAILURE | DAOS_FAIL_ALWAYS); + assert_rc_equal(rc, 0); + + rc = cr_check_start(TCSF_FAILOUT | TCSF_RESET, 0, NULL, "CONT_BAD_LABEL:CIA_INTERACT"); + assert_rc_equal(rc, 0); + + cr_ins_wait(1, &pool.pool_uuid, &dci); + + rc = cr_ins_verify(&dci, TCIS_FAILED); + assert_rc_equal(rc, 0); + + rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_FAILED, 0, NULL, NULL, NULL); + assert_rc_equal(rc, 0); + + cr_debug_set_params(arg, 0); + + rc = cr_mode_switch(false); + assert_rc_equal(rc, 0); + + rc = cr_system_start(); + assert_rc_equal(rc, 0); + + /* Former connection for the pool has been evicted by checker. Let's re-connect the pool. */ + rc = cr_cont_get_label(state, &pool, &cont, true, &label); + assert_rc_equal(rc, 0); + + D_ASSERTF(strcmp(label, cont.label) != 0, + "Cont (" DF_UUID ") label should not be repaired: %s\n", DP_UUID(cont.uuid), + label); + + D_FREE(label); + cr_dci_fini(&dci); + cr_cleanup(arg, &pool, 1); +} + /* clang-format off */ static const struct CMUnitTest cr_tests[] = { { "CR1: start checker for specified pools", @@ -3963,6 +4104,10 @@ static const struct CMUnitTest cr_tests[] = { cr_maintenance_mode, async_disable, test_case_teardown}, { "CR29: CR with rank 0 excluded at the beginning", cr_lost_rank0, async_disable, test_case_teardown}, + { "CR30: Leader handle report failure", + cr_leader_report_fail, async_disable, test_case_teardown}, + { "CR31: Engine handle report failure", + cr_engine_report_fail, async_disable, test_case_teardown}, }; /* clang-format on */ From 41e54af171f5dd1b8f3a7b858a776f20f5fdc7b3 Mon Sep 17 00:00:00 2001 From: Tom Nabarro Date: Fri, 27 Feb 2026 18:10:21 +0000 Subject: [PATCH 230/253] DAOS-18375 control: Set scm_hugepages_disabled default to true (#17526) Apply scm_hugepages_disabled true if unset in yaml. This results in effectively removing hugepages=always from tmpfs mount options for the engine ramdisk by default. Signed-off-by: Tom Nabarro --- src/control/cmd/dmg/auto_test.go | 2 +- src/control/server/config/server.go | 18 +++++-- src/control/server/config/server_test.go | 58 ++++++++++++++--------- src/control/server/storage/config.go | 9 ++-- src/control/server/storage/config_test.go | 4 +- src/control/server/storage/provider.go | 12 ++++- src/tests/ftest/pool/create.yaml | 2 +- utils/config/daos_server.yml | 16 +++---- utils/nlt_server.yaml | 1 + 9 files changed, 75 insertions(+), 47 deletions(-) diff --git a/src/control/cmd/dmg/auto_test.go b/src/control/cmd/dmg/auto_test.go index 9ebf8aa3b62..bec414701d2 100644 --- a/src/control/cmd/dmg/auto_test.go +++ b/src/control/cmd/dmg/auto_test.go @@ -1,6 +1,6 @@ // // (C) Copyright 2020-2024 Intel Corporation. -// (C) Copyright 2025 Hewlett Packard Enterprise Development LP +// (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // diff --git a/src/control/server/config/server.go b/src/control/server/config/server.go index 7f889b0b39a..f92771621d1 100644 --- a/src/control/server/config/server.go +++ b/src/control/server/config/server.go @@ -911,7 +911,7 @@ func (cfg *Server) validateMultiEngineConfig(log logging.Logger) error { seenHelperStreamCount := -1 seenScmCls := storage.ClassNone seenScmClsIdx := -1 - seenScmHuge := false + var seenScmHuge *bool seenScmHugeIdx := -1 for idx, engine := range cfg.Engines { @@ -960,10 +960,18 @@ func (cfg *Server) validateMultiEngineConfig(log logging.Logger) error { seenScmCls = scmConf.Class seenScmClsIdx = idx - if seenScmHugeIdx != -1 && scmConf.Scm.DisableHugepages != seenScmHuge { - log.Debugf("scm_hugepages_disabled entry %v in %d doesn't match %d", - scmConf.Scm.DisableHugepages, idx, seenScmHugeIdx) - return FaultConfigScmDiffHugeEnabled(idx, seenScmHugeIdx) + if seenScmHugeIdx != -1 { + switch { + case scmConf.Scm.DisableHugepages == nil && seenScmHuge == nil: + case scmConf.Scm.DisableHugepages != nil && seenScmHuge == nil: + return FaultConfigScmDiffHugeEnabled(idx, seenScmHugeIdx) + case scmConf.Scm.DisableHugepages == nil && seenScmHuge != nil: + return FaultConfigScmDiffHugeEnabled(idx, seenScmHugeIdx) + case *scmConf.Scm.DisableHugepages != *seenScmHuge: + log.Debugf("scm_hugepages_disabled entry %v in %d doesn't match %d", + *scmConf.Scm.DisableHugepages, idx, seenScmHugeIdx) + return FaultConfigScmDiffHugeEnabled(idx, seenScmHugeIdx) + } } seenScmHuge = scmConf.Scm.DisableHugepages seenScmHugeIdx = idx diff --git a/src/control/server/config/server_test.go b/src/control/server/config/server_test.go index 119f9a503ac..08b53010c10 100644 --- a/src/control/server/config/server_test.go +++ b/src/control/server/config/server_test.go @@ -280,7 +280,7 @@ func TestServerConfig_Constructed(t *testing.T) { storage.NewTierConfig(). WithScmMountPoint("/mnt/daos/1"). WithStorageClass("ram"). - WithScmHugepagesDisabled(), + WithScmHugepagesDisabled(false), storage.NewTierConfig(). WithStorageClass("nvme"). WithBdevDeviceList("0000:81:00.0", "0000:82:00.0"). @@ -310,7 +310,7 @@ func TestServerConfig_Constructed(t *testing.T) { storage.NewTierConfig(). WithScmMountPoint("/mnt/daos/2"). WithStorageClass("ram"). - WithScmHugepagesDisabled(), + WithScmHugepagesDisabled(false), storage.NewTierConfig(). WithStorageClass("file"). WithBdevDeviceList("/tmp/daos-bdev1", "/tmp/daos-bdev2"). @@ -699,7 +699,7 @@ func TestServerConfig_Validation(t *testing.T) { storage.NewTierConfig(). WithScmMountPoint("/mnt/daos/1"). WithStorageClass("ram"). - WithScmHugepagesDisabled(), + WithScmHugepagesDisabled(true), storage.NewTierConfig(). WithStorageClass("nvme"). WithBdevDeviceList("0000:81:00.0", "0000:82:00.0"). @@ -712,7 +712,7 @@ func TestServerConfig_Validation(t *testing.T) { storage.NewTierConfig(). WithScmMountPoint("/mnt/daos/2"). WithStorageClass("ram"). - WithScmHugepagesDisabled(), + WithScmHugepagesDisabled(true), storage.NewTierConfig(). WithStorageClass("nvme"). WithBdevDeviceList("0000:91:00.0", "0000:92:00.0"). @@ -734,7 +734,7 @@ func TestServerConfig_Validation(t *testing.T) { storage.NewTierConfig(). WithScmMountPoint("/mnt/daos/1"). WithStorageClass("ram"). - WithScmHugepagesDisabled(), + WithScmHugepagesDisabled(true), storage.NewTierConfig(). WithStorageClass("nvme"). WithBdevDeviceList("0000:81:00.0", "0000:82:00.0"). @@ -757,7 +757,7 @@ func TestServerConfig_Validation(t *testing.T) { storage.NewTierConfig(). WithScmMountPoint("/mnt/daos/2"). WithStorageClass("ram"). - WithScmHugepagesDisabled(), + WithScmHugepagesDisabled(true), storage.NewTierConfig(). WithStorageClass("nvme"). WithBdevDeviceList("0000:91:00.0", "0000:92:00.0"). @@ -788,7 +788,7 @@ func TestServerConfig_Validation(t *testing.T) { storage.NewTierConfig(). WithScmMountPoint("/mnt/daos/1"). WithStorageClass("ram"). - WithScmHugepagesDisabled(), + WithScmHugepagesDisabled(true), storage.NewTierConfig(). WithStorageClass("nvme"). WithBdevDeviceList("0000:81:00.0", "0000:82:00.0"). @@ -809,7 +809,7 @@ func TestServerConfig_Validation(t *testing.T) { storage.NewTierConfig(). WithScmMountPoint("/mnt/daos/1"). WithStorageClass("ram"). - WithScmHugepagesDisabled(), + WithScmHugepagesDisabled(true), storage.NewTierConfig(). WithStorageClass("nvme"). WithBdevDeviceList("0000:81:00.0", "0000:82:00.0"). @@ -839,7 +839,7 @@ func TestServerConfig_Validation(t *testing.T) { storage.NewTierConfig(). WithScmMountPoint("/mnt/daos/0"). WithStorageClass("ram"). - WithScmHugepagesDisabled(), + WithScmHugepagesDisabled(true), storage.NewTierConfig(). WithStorageClass("nvme"). WithBdevDeviceList("0000:80:00.0"). @@ -851,7 +851,7 @@ func TestServerConfig_Validation(t *testing.T) { storage.NewTierConfig(). WithScmMountPoint("/mnt/daos/1"). WithStorageClass("ram"). - WithScmHugepagesDisabled(), + WithScmHugepagesDisabled(true), storage.NewTierConfig(). WithStorageClass("nvme"). WithBdevDeviceList("0000:81:00.0"), @@ -872,7 +872,7 @@ func TestServerConfig_Validation(t *testing.T) { storage.NewTierConfig(). WithScmMountPoint("/mnt/daos/1"). WithStorageClass("ram"). - WithScmHugepagesDisabled(), + WithScmHugepagesDisabled(true), storage.NewTierConfig(). WithStorageClass("nvme"). WithBdevDeviceList("0000:81:00.0", "0000:82:00.0"). @@ -891,7 +891,7 @@ func TestServerConfig_Validation(t *testing.T) { storage.NewTierConfig(). WithScmMountPoint("/mnt/daos/1"). WithStorageClass("ram"). - WithScmHugepagesDisabled(), + WithScmHugepagesDisabled(true), storage.NewTierConfig(). WithStorageClass("nvme"). WithBdevDeviceList("0000:81:00.0", "0000:82:00.0"). @@ -935,7 +935,7 @@ func TestServerConfig_Validation(t *testing.T) { storage.NewTierConfig(). WithScmMountPoint("/mnt/daos/1"). WithStorageClass("ram"). - WithScmHugepagesDisabled(), + WithScmHugepagesDisabled(true), storage.NewTierConfig(). WithStorageClass("nvme"). WithBdevDeviceList("0000:81:00.0"), @@ -954,7 +954,7 @@ func TestServerConfig_Validation(t *testing.T) { storage.NewTierConfig(). WithScmMountPoint("/mnt/daos/1"). WithStorageClass("ram"). - WithScmHugepagesDisabled(), + WithScmHugepagesDisabled(true), storage.NewTierConfig(). WithStorageClass("nvme"). WithBdevDeviceList("0000:81:00.0"). @@ -1140,7 +1140,7 @@ func TestServerConfig_getMinNrHugepages(t *testing.T) { storage.NewTierConfig(). WithScmMountPoint("/mnt/daos/1"). WithStorageClass("ram"). - WithScmHugepagesDisabled(), + WithScmHugepagesDisabled(true), storage.NewTierConfig(). WithStorageClass("nvme"). WithBdevDeviceList("0000:81:00.0", "0000:82:00.0"). @@ -1276,7 +1276,7 @@ func TestServerConfig_SetNrHugepages(t *testing.T) { storage.NewTierConfig(). WithScmMountPoint("/mnt/daos/1"). WithStorageClass("ram"). - WithScmHugepagesDisabled(), + WithScmHugepagesDisabled(true), storage.NewTierConfig(). WithStorageClass("nvme"). WithBdevDeviceList("0000:81:00.0", "0000:82:00.0"). @@ -1296,7 +1296,7 @@ func TestServerConfig_SetNrHugepages(t *testing.T) { storage.NewTierConfig(). WithScmMountPoint("/mnt/daos/1"). WithStorageClass("ram"). - WithScmHugepagesDisabled(), + WithScmHugepagesDisabled(true), storage.NewTierConfig(). WithStorageClass("nvme"). WithBdevDeviceList("0000:81:00.0", "0000:82:00.0"). @@ -1480,7 +1480,7 @@ func TestServerConfig_SetRamdiskSize(t *testing.T) { storage.NewTierConfig(). WithScmMountPoint("/mnt/daos/1"). WithStorageClass("ram"). - WithScmHugepagesDisabled(), + WithScmHugepagesDisabled(true), storage.NewTierConfig(). WithStorageClass("nvme"). WithBdevDeviceList("0000:81:00.0", "0000:82:00.0"). @@ -1736,12 +1736,24 @@ func TestServerConfig_Parsing(t *testing.T) { return nil }, }, - "allow_thp true allows scm_hugepages_disabled false": { - inTxt: " scm_hugepages_disabled: true", - outTxt: " scm_hugepages_disabled: false", + "scm_hugepages_disabled unset": { + inTxt: " scm_hugepages_disabled: false", + outTxt: "", + expCheck: func(c *Server) error { + for _, e := range c.Engines { + if e.Storage.Tiers.ScmConfigs()[0].Scm.DisableHugepages != nil { + return errors.New("expecting scm hugepages to be enabled") + } + } + return nil + }, + }, + "explicitly set scm_hugepages_disabled true": { + inTxt: " scm_hugepages_disabled: false", + outTxt: " scm_hugepages_disabled: true", expCheck: func(c *Server) error { for _, e := range c.Engines { - if e.Storage.Tiers.ScmConfigs()[0].Scm.DisableHugepages { + if !*e.Storage.Tiers.ScmConfigs()[0].Scm.DisableHugepages { return errors.New("expecting scm hugepages to be enabled") } } @@ -2013,7 +2025,7 @@ func TestServerConfig_validateMultiEngineConfig(t *testing.T) { storage.NewTierConfig(). WithStorageClass("ram"). WithScmMountPoint("b"). - WithScmHugepagesDisabled(), + WithScmHugepagesDisabled(true), ), expErr: FaultConfigScmDiffHugeEnabled(1, 0), }, diff --git a/src/control/server/storage/config.go b/src/control/server/storage/config.go index 548c959aee7..ecccde9e171 100644 --- a/src/control/server/storage/config.go +++ b/src/control/server/storage/config.go @@ -174,8 +174,8 @@ func (tc *TierConfig) WithStorageClass(cls string) *TierConfig { } // WithScmHugepagesDisabled disables hugepages for tmpfs. -func (tc *TierConfig) WithScmHugepagesDisabled() *TierConfig { - tc.Scm.DisableHugepages = true +func (tc *TierConfig) WithScmHugepagesDisabled(b bool) *TierConfig { + tc.Scm.DisableHugepages = &b return tc } @@ -574,7 +574,7 @@ func (tcs *TierConfigs) UnmarshalYAML(unmarshal func(interface{}) error) error { type ScmConfig struct { MountPoint string `yaml:"scm_mount,omitempty" cmdLongFlag:"--storage" cmdShortFlag:"-s"` RamdiskSize uint `yaml:"scm_size,omitempty"` - DisableHugepages bool `yaml:"scm_hugepages_disabled,omitempty"` + DisableHugepages *bool `yaml:"scm_hugepages_disabled,omitempty"` DeviceList []string `yaml:"scm_list,omitempty"` NumaNodeIndex uint `yaml:"-"` } @@ -593,9 +593,6 @@ func (sc *ScmConfig) Validate(class Class) error { if len(sc.DeviceList) == 0 { return errors.New("scm_list must be set when class is dcpm") } - if sc.DisableHugepages { - return errors.New("scm_hugepages_disabled may not be set when class is dcpm") - } case ClassRam: if len(sc.DeviceList) > 0 { return errors.New("scm_list may not be set when class is ram") diff --git a/src/control/server/storage/config_test.go b/src/control/server/storage/config_test.go index e3dc3053278..2d53a9f646e 100644 --- a/src/control/server/storage/config_test.go +++ b/src/control/server/storage/config_test.go @@ -738,7 +738,8 @@ func TestStorage_BdevDeviceRoles_ToYAML(t *testing.T) { NewTierConfig(). WithStorageClass("ram"). WithScmRamdiskSize(16). - WithScmMountPoint("/mnt/daos"), + WithScmMountPoint("/mnt/daos"). + WithScmHugepagesDisabled(true), NewTierConfig(). WithTier(1). WithStorageClass("nvme"). @@ -751,6 +752,7 @@ storage: - class: ram scm_mount: /mnt/daos scm_size: 16 + scm_hugepages_disabled: true - class: nvme bdev_list: - 0000:80:00.0 diff --git a/src/control/server/storage/provider.go b/src/control/server/storage/provider.go index 459cc31349a..9749739f862 100644 --- a/src/control/server/storage/provider.go +++ b/src/control/server/storage/provider.go @@ -284,10 +284,14 @@ func (p *Provider) MountScm() error { switch cfg.Class { case ClassRam: + disableHugepages := true + if cfg.Scm.DisableHugepages != nil { + disableHugepages = *cfg.Scm.DisableHugepages + } req.Ramdisk = &RamdiskParams{ Size: cfg.Scm.RamdiskSize, NUMANode: cfg.Scm.NumaNodeIndex, - DisableHugepages: cfg.Scm.DisableHugepages, + DisableHugepages: disableHugepages, } case ClassDcpm: if len(cfg.Scm.DeviceList) != 1 { @@ -346,10 +350,14 @@ func createScmFormatRequest(class Class, scmCfg ScmConfig, force bool) (*ScmForm switch class { case ClassRam: + disableHugepages := true + if scmCfg.DisableHugepages != nil { + disableHugepages = *scmCfg.DisableHugepages + } req.Ramdisk = &RamdiskParams{ Size: scmCfg.RamdiskSize, NUMANode: scmCfg.NumaNodeIndex, - DisableHugepages: scmCfg.DisableHugepages, + DisableHugepages: disableHugepages, } case ClassDcpm: if len(scmCfg.DeviceList) != 1 { diff --git a/src/tests/ftest/pool/create.yaml b/src/tests/ftest/pool/create.yaml index c3e06b86211..87a5a385fab 100644 --- a/src/tests/ftest/pool/create.yaml +++ b/src/tests/ftest/pool/create.yaml @@ -6,7 +6,7 @@ timeouts: test_create_max_pool_scm_only: 180 test_create_max_pool: 300 test_create_no_space: 300 - test_create_no_space_loop: 3500 + test_create_no_space_loop: 3700 server_config: name: daos_server diff --git a/utils/config/daos_server.yml b/utils/config/daos_server.yml index 7e3be1483ee..a7acd216909 100644 --- a/utils/config/daos_server.yml +++ b/utils/config/daos_server.yml @@ -433,10 +433,10 @@ # # #scm_size: 0 # -# # When class is set to ram, tmpfs will be mounted with hugepage -# # support, if the kernel supports it. If this is not desirable, -# # the behavior may be disabled here. -# scm_hugepages_disabled: true +# # When class is set to ram, tmpfs will only be mounted with hugepage +# # support if the kernel supports it and this flag is explicitly set to false. +# # default: true +# scm_hugepages_disabled: false # # - # # Backend block device type. Force a SPDK driver to be used by this engine @@ -608,10 +608,10 @@ # #class: dcpm # #scm_list: [/dev/pmem1] # -# # When class is set to ram, tmpfs will be mounted with hugepage -# # support, if the kernel supports it. If this is not desirable, -# # the behavior may be disabled here. -# scm_hugepages_disabled: true +# # When class is set to ram, tmpfs will only be mounted with hugepage +# # support if the kernel supports it and this flag is explicitly set to false. +# # default: true +# scm_hugepages_disabled: false # # - # # Backend block device type. Force a SPDK driver to be used by this engine diff --git a/utils/nlt_server.yaml b/utils/nlt_server.yaml index 1e24e4d9c3c..438e55718be 100644 --- a/utils/nlt_server.yaml +++ b/utils/nlt_server.yaml @@ -19,3 +19,4 @@ engines: - class: ram scm_mount: /mnt/daos + scm_hugepages_disabled: false From 515878f8524ac878b0d7096397a94165e57d6486 Mon Sep 17 00:00:00 2001 From: Makito Kano Date: Sat, 28 Feb 2026 03:26:51 +0900 Subject: [PATCH 231/253] DAOS-18387 test: recovery/ddb.py test_recovery_ddb_ls MD-on-SSD Support (#17332) To support MD-on-SSD for ddb, we need to support two commands. ddb prov_mem and ddb ls with --db_path. Update ddb_utils.py to support the new commands. Update test_recovery_ddb_ls to support MD-on-SSD with the new ddb commands. We need to update the test yaml to run on MD-on-SSD/HW Medium, but that will break other tests in ddb.py because they don't support MD-on-SSD yet. Keep the original tests as ddb_pmem.py and ddb_pmem.yaml and keep running them on VM (except test_recovery_ddb_ls because that's updated in this PR). Signed-off-by: Makito Kano --- src/tests/ftest/recovery/ddb.py | 511 +++++-------------------- src/tests/ftest/recovery/ddb.yaml | 14 +- src/tests/ftest/recovery/ddb_pmem.py | 446 +++++++++++++++++++++ src/tests/ftest/recovery/ddb_pmem.yaml | 27 ++ src/tests/ftest/util/ddb_utils.py | 27 +- 5 files changed, 599 insertions(+), 426 deletions(-) create mode 100644 src/tests/ftest/recovery/ddb_pmem.py create mode 100644 src/tests/ftest/recovery/ddb_pmem.yaml diff --git a/src/tests/ftest/recovery/ddb.py b/src/tests/ftest/recovery/ddb.py index 137f21ca5f3..00c5e7c8266 100644 --- a/src/tests/ftest/recovery/ddb.py +++ b/src/tests/ftest/recovery/ddb.py @@ -10,12 +10,9 @@ from apricot import TestWithServers from ddb_utils import DdbCommand -from exception_utils import CommandFailure -from file_utils import distribute_files -from general_utils import (DaosTestError, create_string_buffer, get_random_string, report_errors, - run_command) +from general_utils import create_string_buffer, get_random_string, report_errors from pydaos.raw import DaosObjClass, IORequest -from run_utils import get_clush_command +from run_utils import command_as_user, run_remote def insert_objects(context, container, object_count, dkey_count, akey_count, base_dkey, @@ -78,35 +75,6 @@ def insert_objects(context, container, object_count, dkey_count, akey_count, bas return (ioreqs, dkeys, akeys, data_list) -def copy_remote_to_local(remote_file_path, test_dir, remote): - """Copy the given file from the server node to the local test node and retrieve - the original name. - - Args: - remote_file_path (str): File path to copy to local. - test_dir (str): Test directory. Usually self.test_dir. - remote (str): Remote hostname to copy file from. - """ - # Use clush --rcopy to copy the file from the remote server node to the local test - # node. clush will append . to the file when copying. - args = f"--rcopy {remote_file_path} --dest {test_dir}" - clush_command = get_clush_command(hosts=remote, args=args, timeout=60) - try: - run_command(command=clush_command, timeout=None) - except DaosTestError as error: - raise DaosTestError( - f"ERROR: Copying {remote_file_path} from {remote}: {error}") from error - - # Remove the appended . from the copied file. - current_file_path = "".join([remote_file_path, ".", remote]) - mv_command = f"mv {current_file_path} {remote_file_path}" - try: - run_command(command=mv_command) - except DaosTestError as error: - raise DaosTestError( - f"ERROR: Moving {current_file_path} to {remote_file_path}: {error}") from error - - class DdbTest(TestWithServers): """Test ddb subcommands. @@ -125,6 +93,18 @@ def __init__(self, *args, **kwargs): self.random_akey = get_random_string(10) self.random_data = get_random_string(10) + def run_cmd_check_result(self, command): + """Run given command as root and check its result. + + Args: + command (str): Command to execute. + """ + command_root = command_as_user(command=command, user="root") + result = run_remote( + log=self.log, hosts=self.hostlist_servers, command=command_root) + if not result.passed: + self.fail(f"{command} failed on {result.failed_hosts}!") + def test_recovery_ddb_ls(self): """Test ddb ls. @@ -138,35 +118,66 @@ def test_recovery_ddb_ls(self): 6. Reset the container and the pool to prepare for the cleanup. :avocado: tags=all,full_regression - :avocado: tags=vm + :avocado: tags=hw,medium :avocado: tags=recovery :avocado: tags=DdbTest,ddb_cmd,test_recovery_ddb_ls """ - # Create a pool and a container. + # This is where we load pool for MD-on-SSD. It's called tmpfs_mount in ddb + # prov_mem documentation, but use daos_load_path here for clarity. + daos_load_path = "/mnt/daos_load" + md_on_ssd = self.server_managers[0].manager.job.using_control_metadata + if md_on_ssd: + self.log_step("MD-on-SSD: Create a directory to load pool data under /mnt.") + self.run_cmd_check_result(command=f"mkdir {daos_load_path}") + + self.log_step("Create a pool and a container.") pool = self.get_pool() container = self.get_container(pool) - # Find the vos file name. e.g., /mnt/daos0//vos-0. - vos_paths = self.server_managers[0].get_vos_files(pool) - if not vos_paths: - self.fail(f"vos file wasn't found in {self.server_managers[0].get_vos_paths(pool)[0]}") - ddb_command = DdbCommand(self.server_managers[0].hosts[0:1], self.bin, vos_paths[0]) + if md_on_ssd: + vos_path = '""' + else: + # Find the vos file name. e.g., /mnt/daos0//vos-0. + vos_paths = self.server_managers[0].get_vos_files(pool) + if not vos_paths: + self.fail("vos file wasn't found!") + vos_path = vos_paths[0] + + ddb_command = DdbCommand( + server_host=self.server_managers[0].hosts[0:1], path=self.bin, + vos_path=vos_path) errors = [] object_count = self.object_count dkey_count = self.dkey_count akey_count = self.akey_count - # Insert objects with API. + self.log_step("Insert objects with API.") insert_objects( context=self.context, container=container, object_count=object_count, dkey_count=dkey_count, akey_count=akey_count, base_dkey=self.random_dkey, base_akey=self.random_akey, base_data=self.random_data) - # Need to stop the server to use ddb. + self.log_step("Stop server to use ddb.") self.get_dmg_command().system_stop() - # 1. Verify container UUID. + db_path = None + if md_on_ssd: + self.log_step(f"MD-on-SSD: Load pool dir to {daos_load_path}") + db_path = os.path.join( + self.server_managers[0].manager.job.yaml.metadata_params.path.value, + "daos_control", "engine0") + ddb_command.prov_mem(db_path=db_path, tmpfs_mount=daos_load_path) + + self.log_step("Verify container UUID.") + if md_on_ssd: + # "ddb ls" command for MD-on-SSD is quite different. + # PMEM: ddb /mnt/daos//vos-0 ls + # MD-on-SSD: ddb --db_path=/var/tmp/daos_testing/control_metadata/daos_control + # /engine0 --vos_path /mnt/daos_load//vos-0 ls + ddb_command.db_path.update(value=" ".join(["--db_path", db_path])) + ddb_command.vos_path.update( + value=os.path.join(daos_load_path, pool.uuid.lower(), "vos-0")) cmd_result = ddb_command.list_component() # Sample output. # Listing contents of '/' @@ -183,10 +194,11 @@ def test_recovery_ddb_ls(self): actual_uuid = match.group(1).lower() expected_uuid = container.uuid.lower() if actual_uuid != expected_uuid: - msg = f"Unexpected container UUID! Expected = {expected_uuid}; Actual = {actual_uuid}" - errors.append(msg) + errors.append( + f"Unexpected container UUID! Expected = {expected_uuid}; Actual = " + f"{actual_uuid}") - # 2. Verify object count in the container. + self.log_step("Verify object count in the container.") cmd_result = ddb_command.list_component(component_path="[0]") # Sample output. # Listing contents of 'CONT: (/[0]) /3082b7d3-32f9-41ea-bcbf-5d6450c1b34f' @@ -204,17 +216,21 @@ def test_recovery_ddb_ls(self): f"Unexpected object count! Expected = {object_count}; " f"Actual = {actual_object_count}") - # 3. Verify there are two dkeys for every object. Also verify the dkey string and - # the size. + msg = ("Verify there are two dkeys for every object. Also verify the dkey string " + "and the size.") + self.log_step(msg) dkey_regex = f"/{uuid_regex}/{object_id_regex}/(.*)" actual_dkey_count = 0 for obj_index in range(object_count): - component_path = f"[0]/[{obj_index}]" - cmd_result = ddb_command.list_component(component_path=component_path) - # Sample output. - # /d4e0c836-17bd-4df3-b255-929732486bab/281479271677953.0.0/ - # [0] 'Sample dkey 0 0' (15) - # [1] 'Sample dkey 0 1' (15) + cmd_result = ddb_command.list_component(component_path=f"[0]/[{obj_index}]") + # Sample output. There are three lines, but a line break is added to fit into + # the code. + # Listing contents of 'OBJ: (/[0]/[0]) + # /a78b65a1-31f4-440b-95e1-b4ead193b3f1/281479271677953.0.0.2' + # DKEY: (/[0]/[0]/[0]) + # /a78b65a1-31f4-440b-95e1-b4ead193b3f1/281479271677953.0.0.2/GSWOPOF1EX 0 0 + # DKEY: (/[0]/[0]/[1]) + # /a78b65a1-31f4-440b-95e1-b4ead193b3f1/281479271677953.0.0.2/GSWOPOF1EX 0 1 match = re.findall(dkey_regex, cmd_result.joined_stdout) actual_dkey_count += len(match) @@ -227,30 +243,34 @@ def test_recovery_ddb_ls(self): f"Actual = {actual_dkey}") errors.append(msg) - # Verify there are two dkeys for every object. + self.log_step("Verify there are two dkeys for every object.") expected_dkey_count = object_count * dkey_count if actual_dkey_count != expected_dkey_count: - msg = (f"Unexpected number of dkeys! Expected = {expected_dkey_count}; " - f"Actual = {actual_dkey_count}") - errors.append(msg) + errors.append( + f"Unexpected number of dkeys! Expected = {expected_dkey_count}; " + f"Actual = {actual_dkey_count}") - # 4. Verify there is one akey for every dkey. Also verify the key string and the - # size. + self.log_step( + "Verify there is one akey for every dkey. Also verify the key string and " + "the size.") akey_count = 0 for obj_index in range(object_count): for dkey_index in range(dkey_count): - component_path = f"[0]/[{obj_index}]/[{dkey_index}]" - cmd_result = ddb_command.list_component(component_path=component_path) + cmd_result = ddb_command.list_component( + component_path=f"[0]/[{obj_index}]/[{dkey_index}]") ls_out = cmd_result.joined_stdout msg = (f"List akeys obj_index = {obj_index}, dkey_index = {dkey_index}, " f"stdout = {ls_out}") self.log.info(msg) - # Output is in the same format as dkey, so use the same regex. - # /d4e0c836-17bd-4df3-b255-929732486bab/281479271677954.0.0/' - # Sample dkey 1 0'/ - # [0] 'Sample akey 1 0 0' (17) + # Output is in the same format as dkey, so use the same regex. There are + # two lines, but line breaks are added to fit into the code. + # Listing contents of 'DKEY: (/[0]/[0]/[0]) + # /a78b65a1-31f4-440b-95e1-b4ead193b3f1/281479271677953.0.0.2/ + # GSWOPOF1EX 0 0' + # AKEY: (/[0]/[0]/[0]/[0]) + # /a78b65a1-31f4-440b-95e1-b4ead193b3f1/281479271677953.0.0.2/ + # GSWOPOF1EX 0 0/OOJ2TNAHS7 0 0 0 match = re.findall(f"{dkey_regex}/(.*)", ls_out) - akey_count += len(match) # Verify akey string. As in dkey, ignore the numbers at the end. @@ -260,365 +280,24 @@ def test_recovery_ddb_ls(self): f"Expected = {self.random_akey}; Actual = {actual_akey}") errors.append(msg) - # Verify there is one akey for every dkey. + self.log_step("Verify there is one akey for every dkey.") if expected_dkey_count != akey_count: msg = (f"Unexpected number of akeys! Expected = {expected_dkey_count}; " f"Actual = {akey_count}") errors.append(msg) - # 5. Restart the server for the cleanup. - self.get_dmg_command().system_start() + if md_on_ssd: + self.log_step(f"MD-on-SSD: Clean {daos_load_path}") + self.run_cmd_check_result(command=f"umount {daos_load_path}") + self.run_cmd_check_result(command=f"rm -rf {daos_load_path}") - # 6. Reset the container and the pool to prepare for the cleanup. - container.close() - pool.disconnect() - pool.connect() - container.open() + self.log_step("Restart the server for the cleanup.") self.get_dmg_command().system_start() - self.log.info("##### Errors #####") - report_errors(test=self, errors=errors) - self.log.info("##################") - - def test_recovery_ddb_rm(self): - """Test rm. - - 1. Create a pool and a container. Insert objects, dkeys, and akeys. - 2. Stop the server to use ddb. - 3. Find the vos file name. e.g., /mnt/daos0//vos-0. - 4. Call ddb rm to remove the akey. - 5. Restart the server to use the API. - 6. Reset the object, container, and pool to use the API after server restart. - 7. Call list_akey() in pydaos API to verify that the akey was removed. - 8. Stop the server to use ddb. - 9. Call ddb rm to remove the dkey. - 10. Restart the server to use the API. - 11. Reset the object, container, and pool to use the API after server restart. - 12. Call list_dkey() in pydaos API to verify that the dkey was removed. - 13. Stop the server to use ddb. - 14. Call ddb rm to remove the object. - 15. Restart the server to use daos command. - 16. Reset the container and pool so that cleanup works. - 17. Call "daos container list-objects " to verify that the - object was removed. - - :avocado: tags=all,full_regression - :avocado: tags=vm - :avocado: tags=recovery - :avocado: tags=DdbTest,ddb_cmd,test_recovery_ddb_rm - """ - # 1. Create a pool and a container. Insert objects, dkeys, and akeys. - pool = self.get_pool(connect=True) - container = self.get_container(pool) - - # Insert one object with one dkey and one akey with API. - obj_dataset = insert_objects( - context=self.context, container=container, object_count=1, - dkey_count=1, akey_count=2, base_dkey=self.random_dkey, - base_akey=self.random_akey, base_data=self.random_data) - ioreqs = obj_dataset[0] - dkeys_inserted = obj_dataset[1] - akeys_inserted = obj_dataset[2] - - # For debugging/reference, check that the dkey and the akey we just inserted are - # returned from the API. - akeys_api = ioreqs[0].list_akey(dkey=dkeys_inserted[0]) - self.log.info("akeys from API (before) = %s", akeys_api) - dkeys_api = ioreqs[0].list_dkey() - self.log.info("dkeys from API (before) = %s", dkeys_api) - - # For debugging/reference, check that the object was inserted using daos command. - list_obj_out = self.get_daos_command().container_list_objects( - pool=pool.identifier, cont=container.uuid) - self.log.info("Object list (before) = %s", list_obj_out["response"]) - - # 2. Need to stop the server to use ddb. - dmg_command = self.get_dmg_command() - dmg_command.system_stop() - - # 3. Find the vos file name. - vos_paths = self.server_managers[0].get_vos_files(pool) - if not vos_paths: - self.fail(f"vos file wasn't found in {self.server_managers[0].get_vos_paths(pool)[0]}") - ddb_command = DdbCommand(self.server_managers[0].hosts[0:1], self.bin, vos_paths[0]) - - # 4. Call ddb rm to remove the akey. - cmd_result = ddb_command.remove_component(component_path="[0]/[0]/[0]/[0]") - self.log.info("rm akey stdout = %s", cmd_result.joined_stdout) - - # 5. Restart the server to use the API. - dmg_command.system_start() - - # 6. Reset the object, container, and pool to use the API after server restart. - ioreqs[0].obj.close() - container.close() - pool.disconnect() - pool.connect() - container.open() - ioreqs[0].obj.open() - - # 7. Call list_akey() in pydaos API to verify that the akey was removed. - akeys_api = ioreqs[0].list_akey(dkey=dkeys_inserted[0]) - self.log.info("akeys from API (after) = %s", akeys_api) - - errors = [] - expected_len = len(akeys_inserted) - 1 - actual_len = len(akeys_api) - if actual_len != expected_len: - msg = (f"Unexpected number of akeys after ddb rm! Expected = {expected_len}; " - f"Actual = {actual_len}") - errors.append(msg) - - # 8. Stop the server to use ddb. - dmg_command.system_stop() - - # 9. Call ddb rm to remove the dkey. - cmd_result = ddb_command.remove_component(component_path="[0]/[0]/[0]") - self.log.info("rm dkey stdout = %s", cmd_result.joined_stdout) - - # 10. Restart the server to use the API. - dmg_command.system_start() - - # 11. Reset the object, container, and pool to use the API after server restart. - ioreqs[0].obj.close() - container.close() - pool.disconnect() - pool.connect() - container.open() - ioreqs[0].obj.open() - - # 12. Call list_dkey() in pydaos API to verify that the dkey was removed. - dkeys_api = ioreqs[0].list_dkey() - self.log.info("dkeys from API (after) = %s", dkeys_api) - - expected_len = len(dkeys_inserted) - 1 - actual_len = len(dkeys_api) - if actual_len != expected_len: - msg = (f"Unexpected number of dkeys after ddb rm! Expected = {expected_len}; " - f"Actual = {actual_len}") - errors.append(msg) - - # 13. Stop the server to use ddb. - dmg_command.system_stop() - - # 14. Call ddb rm to remove the object. - cmd_result = ddb_command.remove_component(component_path="[0]/[0]") - self.log.info("rm object stdout = %s", cmd_result.joined_stdout) - - # 15. Restart the server to use daos command. - dmg_command.system_start() - - # 16. Reset the container and pool so that cleanup works. - container.close() - pool.disconnect() - pool.connect() - container.open() - - # 17. Call "daos container list-objects " to verify that - # the object was removed. - list_obj_out = self.get_daos_command().container_list_objects( - pool=pool.identifier, cont=container.uuid) - obj_list = list_obj_out["response"] - self.log.info("Object list (after) = %s", obj_list) - - expected_len = len(ioreqs) - 1 - if obj_list: - actual_len = len(obj_list) - else: - actual_len = 0 - if actual_len != expected_len: - msg = (f"Unexpected number of objects after ddb rm! Expected = {expected_len}; " - f"Actual = {actual_len}") - errors.append(msg) - - self.log.info("##### Errors #####") - report_errors(test=self, errors=errors) - self.log.info("##################") - - def test_recovery_ddb_load(self): - """Test ddb value_load. - - 1. Create a pool and a container. - 2. Insert one object with one dkey with the API. - 3. Stop the server to use ddb. - 4. Find the vos file name. e.g., /mnt/daos0//vos-0. - 5. Load new data into [0]/[0]/[0]/[0] - 6. Restart the server. - 7. Reset the object, container, and pool to use the API. - 8. Verify the data in the akey with single_fetch(). - - :avocado: tags=all,full_regression - :avocado: tags=vm - :avocado: tags=recovery - :avocado: tags=DdbTest,ddb_cmd,test_recovery_ddb_load - """ - # 1. Create a pool and a container. - pool = self.get_pool(connect=True) - container = self.get_container(pool) - - # 2. Insert one object with one dkey with API. - obj_dataset = insert_objects( - context=self.context, container=container, object_count=1, - dkey_count=1, akey_count=1, base_dkey=self.random_dkey, - base_akey=self.random_akey, base_data=self.random_data) - ioreqs = obj_dataset[0] - dkeys_inserted = obj_dataset[1] - akeys_inserted = obj_dataset[2] - data_list = obj_dataset[3] - - # For debugging/reference, call single_fetch and get the data just inserted. - # Pass in size + 1 to single_fetch to avoid the no-space error. - data_size = len(data_list[0]) + 1 - data = ioreqs[0].single_fetch( - dkey=dkeys_inserted[0], akey=akeys_inserted[0], size=data_size) - self.log.info("data (before) = %s", data.value.decode('utf-8')) - - # 3. Stop the server to use ddb. - dmg_command = self.get_dmg_command() - dmg_command.system_stop() - - # 4. Find the vos file name. - host = self.server_managers[0].hosts[0:1] - vos_paths = self.server_managers[0].get_vos_files(pool) - if not vos_paths: - self.fail(f"vos file wasn't found in {self.server_managers[0].get_vos_paths(pool)[0]}") - ddb_command = DdbCommand(host, self.bin, vos_paths[0]) - - # 5. Load new data into [0]/[0]/[0]/[0] - # Create a file in test node. - load_file_path = os.path.join(self.test_dir, "new_data.txt") - new_data = "New akey data 0123456789" - with open(load_file_path, "w", encoding="utf-8") as file: - file.write(new_data) - - # Copy the created file to server node. - result = distribute_files(self.log, host, load_file_path, load_file_path, False) - if not result.passed: - raise CommandFailure(f"ERROR: Copying new_data.txt to {result.failed_hosts}") - - # The file with the new data is ready. Run ddb load. - ddb_command.value_load(component_path="[0]/[0]/[0]/[0]", load_file_path=load_file_path) - - # 6. Restart the server. - dmg_command.system_start() - - # 7. Reset the object, container, and pool to use the API after server restart. - ioreqs[0].obj.close() - container.close() - pool.disconnect() - pool.connect() - container.open() - ioreqs[0].obj.open() - - # 8. Verify the data in the akey with single_fetch(). - data_size = len(new_data) + 1 - data = ioreqs[0].single_fetch( - dkey=dkeys_inserted[0], akey=akeys_inserted[0], size=data_size) - actual_data = data.value.decode('utf-8') - self.log.info("data (after) = %s", actual_data) - - errors = [] - if new_data != actual_data: - msg = f"ddb load failed! Expected = {new_data}; Actual = {actual_data}" - errors.append(msg) - - self.log.info("##### Errors #####") - report_errors(test=self, errors=errors) - self.log.info("##################") - - def test_recovery_ddb_dump_value(self): - """Test ddb dump_value. - - 1. Create a pool and a container. - 2. Insert one object with one dkey with API. - 3. Stop the server to use ddb. - 4. Find the vos file name. e.g., /mnt/daos0//vos-0. - 5. Dump the two akeys to files. - 6. Verify the content of the files. - 7. Restart the server for the cleanup. - 8. Reset the object, container, and pool to prepare for the cleanup. - - :avocado: tags=all,full_regression - :avocado: tags=vm - :avocado: tags=recovery - :avocado: tags=DdbTest,ddb_cmd,test_recovery_ddb_dump_value - """ - # 1. Create a pool and a container. - pool = self.get_pool(connect=True) - container = self.get_container(pool) - - # 2. Insert one object with one dkey with API. - obj_dataset = insert_objects( - context=self.context, container=container, object_count=1, - dkey_count=1, akey_count=2, base_dkey=self.random_dkey, - base_akey=self.random_akey, base_data=self.random_data) - ioreqs = obj_dataset[0] - data_list = obj_dataset[3] - - # 3. Stop the server to use ddb. - dmg_command = self.get_dmg_command() - dmg_command.system_stop() - - # 4. Find the vos file name. - vos_paths = self.server_managers[0].get_vos_files(pool) - if not vos_paths: - self.fail(f"vos file wasn't found in {self.server_managers[0].get_vos_paths(pool)[0]}") - ddb_command = DdbCommand(self.server_managers[0].hosts[0:1], self.bin, vos_paths[0]) - - # 5. Dump the two akeys to files. - akey1_file_path = os.path.join(self.test_dir, "akey1.txt") - ddb_command.value_dump( - component_path="[0]/[0]/[0]/[0]", out_file_path=akey1_file_path) - akey2_file_path = os.path.join(self.test_dir, "akey2.txt") - ddb_command.value_dump( - component_path="[0]/[0]/[0]/[1]", out_file_path=akey2_file_path) - - # Copy them from remote server node to local test node. - copy_remote_to_local( - remote_file_path=akey1_file_path, test_dir=self.test_dir, - remote=self.hostlist_servers[0]) - copy_remote_to_local( - remote_file_path=akey2_file_path, test_dir=self.test_dir, - remote=self.hostlist_servers[0]) - - # 6. Verify the content of the files. - actual_akey1_data = None - with open(akey1_file_path, "r", encoding="utf-8") as file: - actual_akey1_data = file.readlines()[0] - actual_akey2_data = None - with open(akey2_file_path, "r", encoding="utf-8") as file: - actual_akey2_data = file.readlines()[0] - - errors = [] - str_data_list = [] - # Convert the data to string. - for data in data_list: - str_data_list.append(data.value.decode("utf-8")) - # Verify that we were able to obtain the data and akey1 and akey2 aren't the same. - if actual_akey1_data is None or actual_akey2_data is None or \ - actual_akey1_data == actual_akey2_data: - msg = (f"Invalid dumped value! Dumped akey1 data = {actual_akey1_data}; " - f"Dumped akey2 data = {actual_akey2_data}") - errors.append(msg) - # Verify that the data we obtained with ddb are the ones we wrote. The order isn't - # deterministic, so check with "in". - if actual_akey1_data not in str_data_list or \ - actual_akey2_data not in str_data_list: - msg = (f"Unexpected dumped value! Dumped akey data 1 = {actual_akey1_data}; Dumped " - f"akey data 2 = {actual_akey2_data}; Expected data list = {str_data_list}") - errors.append(msg) - - # 7. Restart the server for the cleanup. - dmg_command.system_start() - - # 8. Reset the object, container, and pool to prepare for the cleanup. - ioreqs[0].obj.close() + self.log_step("Reset the container and the pool to prepare for the cleanup.") container.close() pool.disconnect() pool.connect() container.open() - ioreqs[0].obj.open() - self.log.info("##### Errors #####") report_errors(test=self, errors=errors) - self.log.info("##################") diff --git a/src/tests/ftest/recovery/ddb.yaml b/src/tests/ftest/recovery/ddb.yaml index a89fa7beb29..81d6b803aa4 100644 --- a/src/tests/ftest/recovery/ddb.yaml +++ b/src/tests/ftest/recovery/ddb.yaml @@ -2,19 +2,19 @@ hosts: test_servers: 1 test_clients: 1 -timeout: 1800 +timeout: 7M server_config: name: daos_server engines_per_host: 1 engines: 0: + log_file: daos_server0.log + nr_xs_helpers: 1 + # Objects are placed in different targets, or in different vos-x, so we need to use + # 1 target to make the test steps simpler. targets: 1 - storage: - 0: - class: ram - scm_mount: /mnt/daos - system_ram_reserved: 1 + storage: auto # In CI, all tests in ddb.py are ran in a single launch.py execution. In that case, the # test_dir (/var/tmp/daos_testing/) in the server node will not be created @@ -24,4 +24,4 @@ setup: start_servers_once: False pool: - scm_size: 1G + scm_size: 50G diff --git a/src/tests/ftest/recovery/ddb_pmem.py b/src/tests/ftest/recovery/ddb_pmem.py new file mode 100644 index 00000000000..509bcfeeba4 --- /dev/null +++ b/src/tests/ftest/recovery/ddb_pmem.py @@ -0,0 +1,446 @@ +""" + (C) Copyright 2022-2024 Intel Corporation. + (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP + + SPDX-License-Identifier: BSD-2-Clause-Patent +""" +import ctypes +import os + +from apricot import TestWithServers +from ddb_utils import DdbCommand +from exception_utils import CommandFailure +from file_utils import distribute_files +from general_utils import (DaosTestError, create_string_buffer, get_random_string, report_errors, + run_command) +from pydaos.raw import DaosObjClass, IORequest +from run_utils import get_clush_command + + +def insert_objects(context, container, object_count, dkey_count, akey_count, base_dkey, base_akey, + base_data): + """Insert objects, dkeys, akeys, and data into the container. + + Args: + context (DaosContext): + container (TestContainer): Container to insert objects. + object_count (int): Number of objects to insert. + dkey_count (int): Number of dkeys to insert. + akey_count (int): Number of akeys to insert. + base_dkey (str): Base dkey. Index numbers will be appended to it. + base_akey (str):Base akey. Index numbers will be appended to it. + base_data (str):Base data that goes inside akey. Index numbers will be appended to it. + + Returns: + tuple: Inserted objects, dkeys, akeys, and data as (ioreqs, dkeys, akeys, data_list) + """ + ioreqs = [] + dkeys = [] + akeys = [] + data_list = [] + + container.open() + + for obj_index in range(object_count): + # Insert object. + ioreqs.append(IORequest( + context=context, container=container.container, obj=None, + objtype=DaosObjClass.OC_S1)) + + for dkey_index in range(dkey_count): + # Prepare the dkey to insert into the object. + dkey_str = " ".join([base_dkey, str(obj_index), str(dkey_index)]).encode("utf-8") + dkeys.append(create_string_buffer(value=dkey_str, size=len(dkey_str))) + + for akey_index in range(akey_count): + # Prepare the akey to insert into the dkey. + akey_str = " ".join( + [base_akey, str(obj_index), str(dkey_index), str(akey_index)]).encode("utf-8") + akeys.append(create_string_buffer(value=akey_str, size=len(akey_str))) + + # Prepare the data to insert into the akey. + data_str = " ".join( + [base_data, str(obj_index), str(dkey_index), str(akey_index)]).encode("utf-8") + data_list.append(create_string_buffer(value=data_str, size=len(data_str))) + c_size = ctypes.c_size_t(ctypes.sizeof(data_list[-1])) + + # Insert dkeys, akeys, and the data. + ioreqs[-1].single_insert( + dkey=dkeys[-1], akey=akeys[-1], value=data_list[-1], size=c_size) + + return (ioreqs, dkeys, akeys, data_list) + + +def copy_remote_to_local(remote_file_path, test_dir, remote): + """Copy the given file from the server node to the local test node and retrieve the original + name. + + Args: + remote_file_path (str): File path to copy to local. + test_dir (str): Test directory. Usually self.test_dir. + remote (NodeSet): Remote hostname to copy file from. + """ + # Use clush --rcopy to copy the file from the remote server node to the local test + # node. clush will append . to the file when copying. + args = f"--rcopy {remote_file_path} --dest {test_dir}" + clush_command = get_clush_command(hosts=remote, args=args, timeout=60) + try: + run_command(command=clush_command, timeout=None) + except DaosTestError as error: + raise DaosTestError(f"ERROR: Copying {remote_file_path} from {remote}: {error}") from error + + # Remove the appended . from the copied file. + current_file_path = "".join([remote_file_path, ".", remote]) + mv_command = f"mv {current_file_path} {remote_file_path}" + try: + run_command(command=mv_command) + except DaosTestError as error: + raise DaosTestError( + f"ERROR: Moving {current_file_path} to {remote_file_path}: {error}") from error + + +class DdbPMEMTest(TestWithServers): + """Test ddb subcommands. + + :avocado: recursive + """ + + def __init__(self, *args, **kwargs): + """Initialize a DdbPMEMTest object.""" + super().__init__(*args, **kwargs) + # Number of objects and keys to insert/expect. + self.object_count = 5 + self.dkey_count = 2 + self.akey_count = 1 + # Generate random keys and data to insert into the object. + self.random_dkey = get_random_string(10) + self.random_akey = get_random_string(10) + self.random_data = get_random_string(10) + + def test_recovery_ddb_rm(self): + """Test rm. + + 1. Create a pool and a container. Insert objects, dkeys, and akeys. + 2. Stop the server to use ddb. + 3. Find the vos file name. e.g., /mnt/daos0//vos-0. + 4. Call ddb rm to remove the akey. + 5. Restart the server to use the API. + 6. Reset the object, container, and pool to use the API after server restart. + 7. Call list_akey() in pydaos API to verify that the akey was removed. + 8. Stop the server to use ddb. + 9. Call ddb rm to remove the dkey. + 10. Restart the server to use the API. + 11. Reset the object, container, and pool to use the API after server restart. + 12. Call list_dkey() in pydaos API to verify that the dkey was removed. + 13. Stop the server to use ddb. + 14. Call ddb rm to remove the object. + 15. Restart the server to use daos command. + 16. Reset the container and pool so that cleanup works. + 17. Call "daos container list-objects " to verify that the object was + removed. + + :avocado: tags=all,full_regression + :avocado: tags=vm + :avocado: tags=recovery + :avocado: tags=DdbPMEMTest,ddb_cmd,test_recovery_ddb_rm + """ + self.log_step("Create a pool and a container. Insert objects, dkeys, and akeys.") + pool = self.get_pool(connect=True) + container = self.get_container(pool) + + # Insert one object with one dkey and one akey with API. + obj_dataset = insert_objects( + context=self.context, container=container, object_count=1, dkey_count=1, akey_count=2, + base_dkey=self.random_dkey, base_akey=self.random_akey, base_data=self.random_data) + ioreqs = obj_dataset[0] + dkeys_inserted = obj_dataset[1] + akeys_inserted = obj_dataset[2] + + # For debugging/reference, check that the dkey and the akey we just inserted are returned + # from the API. + akeys_api = ioreqs[0].list_akey(dkey=dkeys_inserted[0]) + self.log.info("akeys from API (before) = %s", akeys_api) + dkeys_api = ioreqs[0].list_dkey() + self.log.info("dkeys from API (before) = %s", dkeys_api) + + # For debugging/reference, check that the object was inserted using daos command. + list_obj_out = self.get_daos_command().container_list_objects( + pool=pool.identifier, cont=container.uuid) + self.log.info("Object list (before) = %s", list_obj_out["response"]) + + self.log_step("Stop the server to use ddb.") + dmg_command = self.get_dmg_command() + dmg_command.system_stop() + + self.log_step("Find the vos file name.") + vos_paths = self.server_managers[0].get_vos_files(pool) + if not vos_paths: + self.fail("vos file wasn't found!") + ddb_command = DdbCommand(self.server_managers[0].hosts[0:1], self.bin, vos_paths[0]) + + self.log_step("Call ddb rm to remove the akey.") + cmd_result = ddb_command.remove_component(component_path="[0]/[0]/[0]/[0]") + self.log.info("rm akey stdout = %s", cmd_result.joined_stdout) + + self.log_step("Restart the server to use the API.") + dmg_command.system_start() + + self.log_step("Reset the object, container, and pool to use the API after server restart.") + ioreqs[0].obj.close() + container.close() + pool.disconnect() + pool.connect() + container.open() + ioreqs[0].obj.open() + + self.log_step("Call list_akey() in pydaos API to verify that the akey was removed.") + akeys_api = ioreqs[0].list_akey(dkey=dkeys_inserted[0]) + self.log.info("akeys from API (after) = %s", akeys_api) + errors = [] + expected_len = len(akeys_inserted) - 1 + actual_len = len(akeys_api) + if actual_len != expected_len: + msg = (f"Unexpected number of akeys after ddb rm! Expected = {expected_len}; " + f"Actual = {actual_len}") + errors.append(msg) + + self.log_step("Stop the server to use ddb.") + dmg_command.system_stop() + + self.log_step("Call ddb rm to remove the dkey.") + cmd_result = ddb_command.remove_component(component_path="[0]/[0]/[0]") + self.log.info("rm dkey stdout = %s", cmd_result.joined_stdout) + + self.log_step("Restart the server to use the API.") + dmg_command.system_start() + + self.log_step("Reset the object, container, and pool to use the API after server restart.") + ioreqs[0].obj.close() + container.close() + pool.disconnect() + pool.connect() + container.open() + ioreqs[0].obj.open() + + self.log_step("Call list_dkey() in pydaos API to verify that the dkey was removed.") + dkeys_api = ioreqs[0].list_dkey() + self.log.info("dkeys from API (after) = %s", dkeys_api) + expected_len = len(dkeys_inserted) - 1 + actual_len = len(dkeys_api) + if actual_len != expected_len: + msg = (f"Unexpected number of dkeys after ddb rm! Expected = {expected_len}; " + f"Actual = {actual_len}") + errors.append(msg) + + self.log_step("Stop the server to use ddb.") + dmg_command.system_stop() + + self.log_step("Call ddb rm to remove the object.") + cmd_result = ddb_command.remove_component(component_path="[0]/[0]") + self.log.info("rm object stdout = %s", cmd_result.joined_stdout) + + self.log_step("Restart the server to use daos command.") + dmg_command.system_start() + + self.log_step("Reset the container and pool so that cleanup works.") + container.close() + pool.disconnect() + pool.connect() + container.open() + + self.log_step("Call daos container list-objects to verify that the object was removed.") + list_obj_out = self.get_daos_command().container_list_objects( + pool=pool.identifier, cont=container.uuid) + obj_list = list_obj_out["response"] + self.log.info("Object list (after) = %s", obj_list) + expected_len = len(ioreqs) - 1 + if obj_list: + actual_len = len(obj_list) + else: + actual_len = 0 + if actual_len != expected_len: + msg = (f"Unexpected number of objects after ddb rm! Expected = {expected_len}; " + f"Actual = {actual_len}") + errors.append(msg) + + report_errors(test=self, errors=errors) + + def test_recovery_ddb_load(self): + """Test ddb value_load. + + 1. Create a pool and a container. + 2. Insert one object with one dkey with the API. + 3. Stop the server to use ddb. + 4. Find the vos file name. e.g., /mnt/daos0//vos-0. + 5. Load new data into [0]/[0]/[0]/[0] + 6. Restart the server. + 7. Reset the object, container, and pool to use the API. + 8. Verify the data in the akey with single_fetch(). + + :avocado: tags=all,full_regression + :avocado: tags=vm + :avocado: tags=recovery + :avocado: tags=DdbPMEMTest,ddb_cmd,test_recovery_ddb_load + """ + self.log_step("Create a pool and a container.") + pool = self.get_pool(connect=True) + container = self.get_container(pool) + + self.log_step("Insert one object with one dkey with API.") + obj_dataset = insert_objects( + context=self.context, container=container, object_count=1, dkey_count=1, akey_count=1, + base_dkey=self.random_dkey, base_akey=self.random_akey, base_data=self.random_data) + ioreqs = obj_dataset[0] + dkeys_inserted = obj_dataset[1] + akeys_inserted = obj_dataset[2] + data_list = obj_dataset[3] + + # For debugging/reference, call single_fetch and get the data just inserted. + # Pass in size + 1 to single_fetch to avoid the no-space error. + data_size = len(data_list[0]) + 1 + data = ioreqs[0].single_fetch( + dkey=dkeys_inserted[0], akey=akeys_inserted[0], size=data_size) + self.log.info("data (before) = %s", data.value.decode('utf-8')) + + self.log_step("Stop the server to use ddb.") + dmg_command = self.get_dmg_command() + dmg_command.system_stop() + + self.log_step("Find the vos file name.") + host = self.server_managers[0].hosts[0:1] + vos_paths = self.server_managers[0].get_vos_files(pool) + if not vos_paths: + self.fail("vos file wasn't found!") + ddb_command = DdbCommand(host, self.bin, vos_paths[0]) + + self.log_step("Load new data into [0]/[0]/[0]/[0]; Create a file in test node.") + load_file_path = os.path.join(self.test_dir, "new_data.txt") + new_data = get_random_string(20) + with open(load_file_path, "w", encoding="utf-8") as file: + file.write(new_data) + + self.log_step("Copy the created file to server node.") + result = distribute_files(self.log, host, load_file_path, load_file_path, False) + if not result.passed: + raise CommandFailure(f"ERROR: Copying new_data.txt to {result.failed_hosts}") + + self.log_step("The file with the new data is ready. Run ddb load.") + ddb_command.value_load(component_path="[0]/[0]/[0]/[0]", load_file_path=load_file_path) + + self.log_step("Restart the server.") + dmg_command.system_start() + + self.log_step("Reset the object, container, and pool to use the API after server restart.") + ioreqs[0].obj.close() + container.close() + pool.disconnect() + pool.connect() + container.open() + ioreqs[0].obj.open() + + self.log_step("Verify the data in the akey with single_fetch().") + data_size = len(new_data) + 1 + data = ioreqs[0].single_fetch( + dkey=dkeys_inserted[0], akey=akeys_inserted[0], size=data_size) + actual_data = data.value.decode('utf-8') + self.log.info("data (after) = %s", actual_data) + errors = [] + if new_data != actual_data: + msg = f"ddb load failed! Expected = {new_data}; Actual = {actual_data}" + errors.append(msg) + + report_errors(test=self, errors=errors) + + def test_recovery_ddb_dump_value(self): + """Test ddb dump_value. + + 1. Create a pool and a container. + 2. Insert one object with one dkey with API. + 3. Stop the server to use ddb. + 4. Find the vos file name. e.g., /mnt/daos0//vos-0. + 5. Dump the two akeys to files. + 6. Verify the content of the files. + 7. Restart the server for the cleanup. + 8. Reset the object, container, and pool to prepare for the cleanup. + + :avocado: tags=all,full_regression + :avocado: tags=vm + :avocado: tags=recovery + :avocado: tags=DdbPMEMTest,ddb_cmd,test_recovery_ddb_dump_value + """ + self.log_step("Create a pool and a container.") + pool = self.get_pool(connect=True) + container = self.get_container(pool) + + self.log_step("Insert one object with one dkey with API.") + obj_dataset = insert_objects( + context=self.context, container=container, object_count=1, dkey_count=1, akey_count=2, + base_dkey=self.random_dkey, base_akey=self.random_akey, base_data=self.random_data) + ioreqs = obj_dataset[0] + data_list = obj_dataset[3] + + self.log_step("Stop the server to use ddb.") + dmg_command = self.get_dmg_command() + dmg_command.system_stop() + + self.log_step("Find the vos file name.") + vos_paths = self.server_managers[0].get_vos_files(pool) + if not vos_paths: + self.fail("vos file wasn't found!") + ddb_command = DdbCommand(self.server_managers[0].hosts[0:1], self.bin, vos_paths[0]) + + self.log_step("Dump the two akeys to files.") + akey1_file_path = os.path.join(self.test_dir, "akey1.txt") + ddb_command.value_dump( + component_path="[0]/[0]/[0]/[0]", out_file_path=akey1_file_path) + akey2_file_path = os.path.join(self.test_dir, "akey2.txt") + ddb_command.value_dump( + component_path="[0]/[0]/[0]/[1]", out_file_path=akey2_file_path) + + self.log_step("Copy them from remote server node to local test node.") + copy_remote_to_local( + remote_file_path=akey1_file_path, test_dir=self.test_dir, + remote=self.hostlist_servers[0]) + copy_remote_to_local( + remote_file_path=akey2_file_path, test_dir=self.test_dir, + remote=self.hostlist_servers[0]) + + self.log_step("Verify the content of the files.") + actual_akey1_data = None + with open(akey1_file_path, "r", encoding="utf-8") as file: + actual_akey1_data = file.readlines()[0] + actual_akey2_data = None + with open(akey2_file_path, "r", encoding="utf-8") as file: + actual_akey2_data = file.readlines()[0] + + errors = [] + str_data_list = [] + # Convert the data to string. + for data in data_list: + str_data_list.append(data.value.decode("utf-8")) + # Verify that we were able to obtain the data and akey1 and akey2 aren't the same. + if actual_akey1_data is None or actual_akey2_data is None or \ + actual_akey1_data == actual_akey2_data: + msg = (f"Invalid dumped value! Dumped akey1 data = {actual_akey1_data}; " + f"Dumped akey2 data = {actual_akey2_data}") + errors.append(msg) + # Verify that the data we obtained with ddb are the ones we wrote. The order isn't + # deterministic, so check with "in". + if actual_akey1_data not in str_data_list or \ + actual_akey2_data not in str_data_list: + msg = (f"Unexpected dumped value! Dumped akey data 1 = {actual_akey1_data}; Dumped " + f"akey data 2 = {actual_akey2_data}; Expected data list = {str_data_list}") + errors.append(msg) + + self.log_step("Restart the server for the cleanup.") + dmg_command.system_start() + + self.log_step("Reset the object, container, and pool to prepare for the cleanup.") + ioreqs[0].obj.close() + container.close() + pool.disconnect() + pool.connect() + container.open() + ioreqs[0].obj.open() + + report_errors(test=self, errors=errors) diff --git a/src/tests/ftest/recovery/ddb_pmem.yaml b/src/tests/ftest/recovery/ddb_pmem.yaml new file mode 100644 index 00000000000..d1591f5eee8 --- /dev/null +++ b/src/tests/ftest/recovery/ddb_pmem.yaml @@ -0,0 +1,27 @@ +hosts: + test_servers: 1 + test_clients: 1 + +timeout: 7M + +server_config: + name: daos_server + engines_per_host: 1 + engines: + 0: + targets: 1 + storage: + 0: + class: ram + scm_mount: /mnt/daos + system_ram_reserved: 1 + +# In CI, all tests in ddb.py are ran in a single launch.py execution. In that case, the +# test_dir (/var/tmp/daos_testing/) in the server node will not be created +# for each test if "start_servers_once: False" isn't set. test_load() needs this +# directory, so we need to set it. +setup: + start_servers_once: False + +pool: + scm_size: 1G diff --git a/src/tests/ftest/util/ddb_utils.py b/src/tests/ftest/util/ddb_utils.py index da6e122592e..a2736c82026 100644 --- a/src/tests/ftest/util/ddb_utils.py +++ b/src/tests/ftest/util/ddb_utils.py @@ -34,11 +34,14 @@ def __init__(self, server_host, path, verbose=True, timeout=None, sudo=True): # Write mode that's necessary for the commands that alters the data such as load. self.write_mode = FormattedParameter("-w", default=False) - # Command to run on the VOS file that contains container, object info, etc. - self.single_command = BasicParameter(None, position=2) + # Path to the system database. Used for MD-on-SSD. + self.db_path = BasicParameter(None, position=1) # VOS file path. - self.vos_path = FormattedParameter("--vos_path {}", position=1) + self.vos_path = FormattedParameter("--vos_path {}", position=2) + + # Command to run on the VOS file that contains container, object info, etc. + self.single_command = BasicParameter(None, position=3) # Members needed for run(). self.verbose = verbose @@ -282,3 +285,21 @@ def dtx_cmt_clear(self, component_path="[0]"): self.single_command.value = " ".join(["dtx_cmt_clear", component_path]) return self.run() + + def prov_mem(self, db_path, tmpfs_mount): + """Call ddb --vos_path "" prov_mem . + + Args: + db_path (str): Path to the system database. e.g., + /var/tmp/daos_testing/control_metadata/daos_control/engine0 + tmpfs_mount (str): Path to the tmpfs mount point. Directory that needs to be created + beforehand. e.g., /mnt/daos_load + + Returns: + CommandResult: groups of command results from the same hosts with the same return status + """ + self.vos_path.value = '""' + cmd = ["prov_mem", db_path, tmpfs_mount] + self.single_command.value = " ".join(cmd) + + return self.run() From f76da9615414e85f1b1a44f342de030e07b9b163 Mon Sep 17 00:00:00 2001 From: Tomasz Gromadzki Date: Fri, 27 Feb 2026 19:30:27 +0100 Subject: [PATCH 232/253] DAOS-623 ci: fix spdk.sh script name (#17600) Give the spdk.sh and spdk.changelog files new names that reflect the output of the RPM build process - the daos-spdk package. Signed-off-by: Tomasz Gromadzki --- utils/rpms/build_packages.sh | 2 +- utils/rpms/{spdk.changelog => daos-spdk.changelog} | 0 utils/rpms/{spdk.sh => daos-spdk.sh} | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) rename utils/rpms/{spdk.changelog => daos-spdk.changelog} (100%) rename utils/rpms/{spdk.sh => daos-spdk.sh} (98%) diff --git a/utils/rpms/build_packages.sh b/utils/rpms/build_packages.sh index e696c48fee8..6c72cfd52e6 100755 --- a/utils/rpms/build_packages.sh +++ b/utils/rpms/build_packages.sh @@ -10,7 +10,7 @@ if [[ "${build_type}" =~ deps|all ]]; then utils/rpms/libfabric.sh utils/rpms/mercury.sh utils/rpms/pmdk.sh - utils/rpms/spdk.sh + utils/rpms/daos-spdk.sh fi if [[ "${build_type}" =~ daos|all ]]; then utils/rpms/daos.sh diff --git a/utils/rpms/spdk.changelog b/utils/rpms/daos-spdk.changelog similarity index 100% rename from utils/rpms/spdk.changelog rename to utils/rpms/daos-spdk.changelog diff --git a/utils/rpms/spdk.sh b/utils/rpms/daos-spdk.sh similarity index 98% rename from utils/rpms/spdk.sh rename to utils/rpms/daos-spdk.sh index d041511ea42..4a7bed3428c 100755 --- a/utils/rpms/spdk.sh +++ b/utils/rpms/daos-spdk.sh @@ -17,7 +17,7 @@ DESCRIPTION="The Storage Performance Development Kit provides a set of tools and libraries for writing high performance, scalable, user-mode storage applications." URL="https://spdk.io" -RPM_CHANGELOG="spdk.changelog" +RPM_CHANGELOG="daos-spdk.changelog" files=() TARGET_PATH="${bindir}" From bd45a1bdcec8f4d10302e369bccdd2586d5884ff Mon Sep 17 00:00:00 2001 From: Tom Nabarro Date: Fri, 27 Feb 2026 19:59:02 +0000 Subject: [PATCH 233/253] DAOS-18592 control: Only use joined ranks when calculating pool size (#17580) In order to avoid failing pool create with storage percentage (-z X%) when ranks have been stopped, only take into account joined ranks when calculating maximum available pool sizes. Signed-off-by: Tom Nabarro --- src/control/lib/control/pool.go | 52 +++--- src/control/lib/control/pool_test.go | 241 +++++++++++++++++++++------ 2 files changed, 227 insertions(+), 66 deletions(-) diff --git a/src/control/lib/control/pool.go b/src/control/lib/control/pool.go index afc55ead998..6fd79c04af5 100644 --- a/src/control/lib/control/pool.go +++ b/src/control/lib/control/pool.go @@ -1188,16 +1188,8 @@ func ListPools(ctx context.Context, rpcClient UnaryInvoker, req *ListPoolsReq) ( type rankFreeSpaceMap map[ranklist.Rank]uint64 -type filterRankFn func(rank ranklist.Rank) bool - -func newFilterRankFunc(ranks ranklist.RankList) filterRankFn { - return func(rank ranklist.Rank) bool { - return len(ranks) == 0 || rank.InList(ranks) - } -} - // Add namespace ranks to rankNVMeFreeSpace map and return minimum free available SCM namespace bytes. -func processSCMSpaceStats(log debugLogger, filterRank filterRankFn, scmNamespaces storage.ScmNamespaces, rankNVMeFreeSpace rankFreeSpaceMap) (uint64, error) { +func processSCMSpaceStats(log debugLogger, ranks ranklist.RankList, scmNamespaces storage.ScmNamespaces, rankNVMeFreeSpace rankFreeSpaceMap) (uint64, error) { scmBytes := uint64(math.MaxUint64) // Realistically there should only be one-per-rank but handle the case for multiple anyway. @@ -1207,7 +1199,7 @@ func processSCMSpaceStats(log debugLogger, filterRank filterRankFn, scmNamespace scmNamespace.UUID, scmNamespace.BlockDevice, scmNamespace.Name) } - if !filterRank(scmNamespace.Mount.Rank) { + if !scmNamespace.Mount.Rank.InList(ranks) { log.Debugf("Skipping SCM device %s (bdev %s, name %s, rank %d) not in ranklist", scmNamespace.UUID, scmNamespace.BlockDevice, scmNamespace.Name, scmNamespace.Mount.Rank) @@ -1233,7 +1225,7 @@ func processSCMSpaceStats(log debugLogger, filterRank filterRankFn, scmNamespace } // Add NVMe free bytes to rankNVMeFreeSpace map. -func processNVMeSpaceStats(log debugLogger, filterRank filterRankFn, nvmeControllers storage.NvmeControllers, rankNVMeFreeSpace rankFreeSpaceMap) error { +func processNVMeSpaceStats(log debugLogger, ranks ranklist.RankList, nvmeControllers storage.NvmeControllers, rankNVMeFreeSpace rankFreeSpaceMap) error { for _, controller := range nvmeControllers { for _, smdDevice := range controller.SmdDevices { msgDev := fmt.Sprintf("SMD device %s (rank %d, ctrlr %s", smdDevice.UUID, @@ -1258,7 +1250,7 @@ func processNVMeSpaceStats(log debugLogger, filterRank filterRankFn, nvmeControl controller.NvmeState.String()) } - if !filterRank(smdDevice.Rank) { + if !smdDevice.Rank.InList(ranks) { log.Debugf("Skipping %s, not in ranklist", msgDev) continue } @@ -1288,10 +1280,34 @@ func getMaxPoolSize(ctx context.Context, rpcClient UnaryInvoker, createReq *Pool return 0, 0, errors.New("invalid mem-ratio, should not be greater than one") } - // Verify that the DAOS system is ready before attempting to query storage. - if _, err := SystemQuery(ctx, rpcClient, &SystemQueryReq{}); err != nil { - return 0, 0, err + // Verify that the DAOS system is ready before attempting to query storage and record joined. + queryResp, err := SystemQuery(ctx, rpcClient, &SystemQueryReq{}) + if err != nil { + return 0, 0, errors.Wrap(err, "getMaxPoolSize: SystemQuery") + } + joinedRanks := ranklist.RankList{} + for _, member := range queryResp.Members { + if member.State == system.MemberStateJoined { + joinedRanks = append(joinedRanks, member.Rank) + } + } + + // Refuse if any requested ranks are not joined, update ranklist to contain only joined ranks. + filterRanks := ranklist.RankList{} + if len(createReq.Ranks) == 0 { + filterRanks = joinedRanks + } else { + for _, rank := range createReq.Ranks { + if !rank.InList(joinedRanks) { + return 0, 0, errors.Errorf("specified rank %d is not joined", rank) + } + filterRanks = append(filterRanks, rank) + } } + slices.Sort(filterRanks) + rpcClient.Debugf("requested/joined/filter ranks: %v/%v/%v", createReq.Ranks, joinedRanks, + filterRanks) + createReq.Ranks = filterRanks scanReq := &StorageScanReq{ Usage: true, @@ -1307,8 +1323,6 @@ func getMaxPoolSize(ctx context.Context, rpcClient UnaryInvoker, createReq *Pool return 0, 0, errors.New("Empty host storage response from StorageScan") } - // Generate function to verify a rank is in the provided rank slice. - filterRank := newFilterRankFunc(ranklist.RankList(createReq.Ranks)) rankNVMeFreeSpace := make(rankFreeSpaceMap) scmBytes := uint64(math.MaxUint64) for _, key := range scanResp.HostStorage.Keys() { @@ -1319,7 +1333,7 @@ func getMaxPoolSize(ctx context.Context, rpcClient UnaryInvoker, createReq *Pool scanResp.HostStorage[key].HostSet.String()) } - sb, err := processSCMSpaceStats(rpcClient, filterRank, hostStorage.ScmNamespaces, rankNVMeFreeSpace) + sb, err := processSCMSpaceStats(rpcClient, filterRanks, hostStorage.ScmNamespaces, rankNVMeFreeSpace) if err != nil { return 0, 0, err } @@ -1328,7 +1342,7 @@ func getMaxPoolSize(ctx context.Context, rpcClient UnaryInvoker, createReq *Pool scmBytes = sb } - if err := processNVMeSpaceStats(rpcClient, filterRank, hostStorage.NvmeDevices, rankNVMeFreeSpace); err != nil { + if err := processNVMeSpaceStats(rpcClient, filterRanks, hostStorage.NvmeDevices, rankNVMeFreeSpace); err != nil { return 0, 0, err } } diff --git a/src/control/lib/control/pool_test.go b/src/control/lib/control/pool_test.go index eed5044a5f9..14f098b6a62 100644 --- a/src/control/lib/control/pool_test.go +++ b/src/control/lib/control/pool_test.go @@ -3114,19 +3114,39 @@ func newNvmeCfg(rank int, roles storage.OptionBits, size ...uint64) MockNvmeConf } } +// Helper to add joined members in SystemQueryResp for all ranks in hostsConfigArray. +func getSysQueryRespMembers(cfg []MockHostStorageConfig, resp *mgmtpb.SystemQueryResp) { + rankSet := make(map[ranklist.Rank]bool) + for _, hostCfg := range cfg { + for _, scmCfg := range hostCfg.ScmConfig { + rankSet[scmCfg.Rank] = true + } + } + for rank := range rankSet { + resp.Members = append(resp.Members, &mgmtpb.SystemMember{ + Rank: uint32(rank), + Uuid: test.MockUUID(int32(rank)), + State: system.MemberStateJoined.String(), + Addr: fmt.Sprintf("10.0.0.%d:10001", rank), + }) + } +} + func TestControl_getMaxPoolSize(t *testing.T) { devStateFaulty := storage.NvmeStateFaulty devStateNew := storage.NvmeStateNew for name, tc := range map[string]struct { - hostsConfigArray []MockHostStorageConfig - tgtRanks []ranklist.Rank - memRatio float32 - queryError error - expScmBytes uint64 - expNvmeBytes uint64 - expError error - expDebug string + hostsConfigArray []MockHostStorageConfig + tgtRanks []ranklist.Rank + memberStates map[ranklist.Rank]system.MemberState + memRatio float32 + queryError error + expCreateReqRanks []ranklist.Rank + expScmBytes uint64 + expNvmeBytes uint64 + expError error + expDebug string }{ "single server": { hostsConfigArray: []MockHostStorageConfig{ @@ -3136,8 +3156,9 @@ func TestControl_getMaxPoolSize(t *testing.T) { NvmeConfig: []MockNvmeConfig{newNvmeCfg(0, 0)}, }, }, - expScmBytes: 100 * humanize.GByte, - expNvmeBytes: humanize.TByte, + expCreateReqRanks: ranklist.RankList{0}, + expScmBytes: 100 * humanize.GByte, + expNvmeBytes: humanize.TByte, }, "single MD-on-SSD server; no mem-ratio specified; defaults to 1.0": { hostsConfigArray: []MockHostStorageConfig{ @@ -3152,8 +3173,9 @@ func TestControl_getMaxPoolSize(t *testing.T) { }, }, }, - expScmBytes: 100 * humanize.GByte, - expNvmeBytes: humanize.TByte, + expCreateReqRanks: ranklist.RankList{0}, + expScmBytes: 100 * humanize.GByte, + expNvmeBytes: humanize.TByte, }, "single MD-on-SSD server; invalid mem-ratio; high": { hostsConfigArray: []MockHostStorageConfig{ @@ -3200,9 +3222,10 @@ func TestControl_getMaxPoolSize(t *testing.T) { }, }, }, - memRatio: 1, - expScmBytes: 100 * humanize.GByte, - expNvmeBytes: humanize.TByte, + memRatio: 1, + expCreateReqRanks: ranklist.RankList{0}, + expScmBytes: 100 * humanize.GByte, + expNvmeBytes: humanize.TByte, }, "single MD-on-SSD server; phase-2 mode (mem-file-sz < meta-blob-sz)": { hostsConfigArray: []MockHostStorageConfig{ @@ -3217,9 +3240,10 @@ func TestControl_getMaxPoolSize(t *testing.T) { }, }, }, - memRatio: 0.5, - expScmBytes: 200 * humanize.GByte, // Double meta-blob-sz due to mem-ratio. - expNvmeBytes: humanize.TByte, + memRatio: 0.5, + expCreateReqRanks: ranklist.RankList{0}, + expScmBytes: 200 * humanize.GByte, // Double meta-blob-sz due to mem-ratio. + expNvmeBytes: humanize.TByte, }, "single ephemeral server": { hostsConfigArray: []MockHostStorageConfig{ @@ -3229,8 +3253,9 @@ func TestControl_getMaxPoolSize(t *testing.T) { NvmeConfig: []MockNvmeConfig{newNvmeCfg(0, 0)}, }, }, - expScmBytes: 100 * humanize.GByte, - expNvmeBytes: humanize.TByte, + expCreateReqRanks: ranklist.RankList{0}, + expScmBytes: 100 * humanize.GByte, + expNvmeBytes: humanize.TByte, }, "double server": { hostsConfigArray: []MockHostStorageConfig{ @@ -3286,8 +3311,9 @@ func TestControl_getMaxPoolSize(t *testing.T) { }, }, }, - expScmBytes: 50 * humanize.GByte, - expNvmeBytes: 700 * humanize.GByte, + expCreateReqRanks: ranklist.RankList{0, 1, 2, 3}, + expScmBytes: 50 * humanize.GByte, + expNvmeBytes: 700 * humanize.GByte, }, "double server; rank filter": { hostsConfigArray: []MockHostStorageConfig{ @@ -3367,8 +3393,9 @@ func TestControl_getMaxPoolSize(t *testing.T) { NvmeConfig: []MockNvmeConfig{}, }, }, - expScmBytes: 100 * humanize.GByte, - expNvmeBytes: uint64(0), + expCreateReqRanks: []ranklist.Rank{0}, + expScmBytes: 100 * humanize.GByte, + expNvmeBytes: uint64(0), }, "No NVMe; double server": { hostsConfigArray: []MockHostStorageConfig{ @@ -3449,8 +3476,9 @@ func TestControl_getMaxPoolSize(t *testing.T) { }, }, }, - expScmBytes: 100 * humanize.GByte, - expNvmeBytes: 100 * humanize.TByte, + expCreateReqRanks: ranklist.RankList{0}, + expScmBytes: 100 * humanize.GByte, + expNvmeBytes: 100 * humanize.TByte, }, "invalid response message": { hostsConfigArray: []MockHostStorageConfig{{}}, @@ -3528,8 +3556,9 @@ func TestControl_getMaxPoolSize(t *testing.T) { }, }, }, - expScmBytes: 100 * humanize.GByte, - expNvmeBytes: uint64(0), + expCreateReqRanks: ranklist.RankList{0}, + expScmBytes: 100 * humanize.GByte, + expNvmeBytes: uint64(0), }, "unmounted SCM device": { hostsConfigArray: []MockHostStorageConfig{ @@ -3599,6 +3628,11 @@ func TestControl_getMaxPoolSize(t *testing.T) { NvmeConfig: []MockNvmeConfig{newNvmeCfg(1, 0)}, }, }, + tgtRanks: []ranklist.Rank{0, 1}, + memberStates: map[ranklist.Rank]system.MemberState{ + 0: system.MemberStateJoined, + 1: system.MemberStateJoined, + }, expError: errors.New("without SCM device and at least one SMD device"), }, "no SCM": { @@ -3610,20 +3644,127 @@ func TestControl_getMaxPoolSize(t *testing.T) { }, }, tgtRanks: []ranklist.Rank{1}, + memberStates: map[ranklist.Rank]system.MemberState{ + 1: system.MemberStateJoined, + }, expError: errors.New("No SCM storage space available"), }, + "requested rank not joined": { + hostsConfigArray: []MockHostStorageConfig{ + { + HostName: "foo", + ScmConfig: []MockScmConfig{newScmCfg(0)}, + NvmeConfig: []MockNvmeConfig{newNvmeCfg(0, 0)}, + }, + }, + tgtRanks: []ranklist.Rank{0}, + memberStates: map[ranklist.Rank]system.MemberState{ + 0: system.MemberStateStopped, + }, + expError: errors.New("specified rank 0 is not joined"), + }, + "multiple requested ranks not joined": { + hostsConfigArray: []MockHostStorageConfig{ + { + HostName: "foo", + ScmConfig: []MockScmConfig{newScmCfg(0)}, + NvmeConfig: []MockNvmeConfig{newNvmeCfg(0, 0)}, + }, + { + HostName: "bar", + ScmConfig: []MockScmConfig{newScmCfg(1), newScmCfg(2)}, + NvmeConfig: []MockNvmeConfig{newNvmeCfg(1, 0), newNvmeCfg(2, 0)}, + }, + }, + tgtRanks: []ranklist.Rank{0, 1, 2}, + memberStates: map[ranklist.Rank]system.MemberState{ + 0: system.MemberStateJoined, + 1: system.MemberStateStopped, + 2: system.MemberStateExcluded, + }, + expError: errors.New("specified rank 1 is not joined"), + }, + "all requested ranks joined": { + hostsConfigArray: []MockHostStorageConfig{ + { + HostName: "foo", + ScmConfig: []MockScmConfig{newScmCfg(0)}, + NvmeConfig: []MockNvmeConfig{newNvmeCfg(0, 0)}, + }, + { + HostName: "bar", + ScmConfig: []MockScmConfig{newScmCfg(1), newScmCfg(2)}, + NvmeConfig: []MockNvmeConfig{newNvmeCfg(1, 0), newNvmeCfg(2, 0)}, + }, + }, + tgtRanks: []ranklist.Rank{0, 1}, + memberStates: map[ranklist.Rank]system.MemberState{ + 0: system.MemberStateJoined, + 1: system.MemberStateJoined, + 2: system.MemberStateStopped, + }, + expScmBytes: 100 * humanize.GByte, + expNvmeBytes: humanize.TByte, + }, + "no requested ranks; filters to joined ranks only": { + hostsConfigArray: []MockHostStorageConfig{ + { + HostName: "foo", + ScmConfig: []MockScmConfig{newScmCfg(0)}, + NvmeConfig: []MockNvmeConfig{newNvmeCfg(0, 0)}, + }, + { + HostName: "bar", + ScmConfig: []MockScmConfig{ + newScmCfg(1, humanize.TByte), + newScmCfg(2), + newScmCfg(3, 50*humanize.GByte), + }, + NvmeConfig: []MockNvmeConfig{ + newNvmeCfg(1, 0), + newNvmeCfg(2, 0), + newNvmeCfg(3, 0, 500*humanize.GByte), + }, + }, + }, + memberStates: map[ranklist.Rank]system.MemberState{ + 0: system.MemberStateJoined, + 1: system.MemberStateJoined, + 2: system.MemberStateStopped, + 3: system.MemberStateExcluded, + }, + expCreateReqRanks: ranklist.RankList{0, 1}, + expScmBytes: 100 * humanize.GByte, + expNvmeBytes: humanize.TByte, + }, } { t.Run(name, func(t *testing.T) { log, buf := logging.NewTestLogger(t.Name()) defer test.ShowBufferOnFailure(t, buf) + // Build SystemQueryResp with members based on memberStates + systemQueryResp := &mgmtpb.SystemQueryResp{} + if tc.memberStates != nil { + for rank, state := range tc.memberStates { + systemQueryResp.Members = append(systemQueryResp.Members, &mgmtpb.SystemMember{ + Rank: uint32(rank), + Uuid: test.MockUUID(int32(rank)), + State: state.String(), + Addr: fmt.Sprintf("10.0.0.%d:10001", rank), + }) + } + } else { + // If memberStates not specified, create joined members for all ranks in hostsConfigArray + getSysQueryRespMembers(tc.hostsConfigArray, systemQueryResp) + } + mockInvokerConfig := &MockInvokerConfig{ UnaryResponseSet: []*UnaryResponse{ { Responses: []*HostResponse{ { Addr: "foo", - Message: &mgmtpb.SystemQueryResp{}, + Message: systemQueryResp, Error: tc.queryError, }, }, @@ -3661,6 +3802,13 @@ func TestControl_getMaxPoolSize(t *testing.T) { return } + if tc.expCreateReqRanks == nil { + tc.expCreateReqRanks = tc.tgtRanks + } + if diff := cmp.Diff(tc.expCreateReqRanks, createReq.Ranks); diff != "" { + t.Fatalf("Unexpected ranks in create request (-want, +got):\n%s\n", diff) + } + test.AssertEqual(t, tc.expScmBytes, scmBytes, fmt.Sprintf("Invalid SCM pool size, want %s got %s", humanize.Bytes(tc.expScmBytes), humanize.Bytes(scmBytes))) @@ -3689,12 +3837,13 @@ func (invoker *MockRequestsRecorderInvoker) InvokeUnaryRPC(context context.Conte func TestControl_PoolCreateAllCmd(t *testing.T) { for name, tc := range map[string]struct { - hostsConfigArray []MockHostStorageConfig - storageRatio float64 - tgtRanks string - expPoolConfig MockPoolRespConfig - expError error - expWarning string + hostsConfigArray []MockHostStorageConfig + storageRatio float64 + tgtRanks string + expPoolConfig MockPoolRespConfig + expCreateReqRanks []ranklist.Rank + expError error + expWarning string }{ "single server": { storageRatio: 1, @@ -3929,13 +4078,17 @@ func TestControl_PoolCreateAllCmd(t *testing.T) { log, buf := logging.NewTestLogger(t.Name()) defer test.ShowBufferOnFailure(t, buf) + // Add joined members for ranks referenced in MockHostStorageConfig. + systemQueryResp := new(mgmtpb.SystemQueryResp) + getSysQueryRespMembers(tc.hostsConfigArray, systemQueryResp) + mockInvokerConfig := &MockInvokerConfig{ UnaryResponseSet: []*UnaryResponse{ { Responses: []*HostResponse{ { Addr: "foo", - Message: &mgmtpb.SystemQueryResp{}, + Message: systemQueryResp, }, }, }, @@ -4012,20 +4165,14 @@ func TestControl_PoolCreateAllCmd(t *testing.T) { poolCreateRequest.TotalBytes, uint64(0), "Invalid size of TotalBytes attribute: disabled with manual allocation") - if tc.tgtRanks != "" { - test.AssertEqual(t, - ranklist.RankList(poolCreateRequest.Ranks).String(), - tc.expPoolConfig.Ranks, - "Invalid list of Ranks") - } else { - test.AssertEqual(t, - ranklist.RankList(poolCreateRequest.Ranks).String(), - "", - "Invalid list of Ranks") - } test.AssertTrue(t, poolCreateRequest.TierRatio == nil, "Invalid size of TierRatio attribute: disabled with manual allocation") + + test.AssertEqual(t, + poolCreateRequest.Ranks, + ranklist.MustCreateRankSet(tc.expPoolConfig.Ranks).Ranks(), + "Invalid list of Ranks") }) } } From 3ded3f43e646894b88bc9f101766201dca0c804d Mon Sep 17 00:00:00 2001 From: Tom Nabarro Date: Fri, 27 Feb 2026 19:59:53 +0000 Subject: [PATCH 234/253] DAOS-18472 control: Use AdamExcluded ranks in dmg format replace (#17598) Allow ranks that have been previously marked as AdminExcluded to be re-joined after a storage reformat using the dmg storage format --replace command. Signed-off-by: Tom Nabarro --- src/control/system/membership.go | 4 ++-- src/control/system/membership_test.go | 23 ++++++++++++++++++++--- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/src/control/system/membership.go b/src/control/system/membership.go index eb4746c0769..691243c0682 100644 --- a/src/control/system/membership.go +++ b/src/control/system/membership.go @@ -1,6 +1,6 @@ // // (C) Copyright 2020-2024 Intel Corporation. -// (C) Copyright 2025 Hewlett Packard Enterprise Development LP +// (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -102,7 +102,7 @@ func (m *Membership) FindRankFromJoinRequest(req *JoinRequest) (Rank, error) { return NilRank, errors.New("unexpected rank in replace-rank request") } - currentMembers, err := m.Members(nil) + currentMembers, err := m.Members(nil, AllMemberFilter) if err != nil { return NilRank, errors.Wrap(err, "failed to get all system members") } diff --git a/src/control/system/membership_test.go b/src/control/system/membership_test.go index 238b296ebcf..7d3e2db8473 100644 --- a/src/control/system/membership_test.go +++ b/src/control/system/membership_test.go @@ -1,6 +1,6 @@ // // (C) Copyright 2020-2024 Intel Corporation. -// (C) Copyright 2025 Hewlett Packard Enterprise Development LP +// (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -779,7 +779,8 @@ func TestSystem_Membership_FindRankFromJoinRequest(t *testing.T) { FabricContexts: newMember.PrimaryFabricContexts, FaultDomain: newMember.FaultDomain, }, - expErr: FaultJoinReplaceRankNotFound(4), // Takes nr not matching fields + // Fault constructor takes the number of non-matching fields. + expErr: FaultJoinReplaceRankNotFound(4), }, "partially matching member": { req: &JoinRequest{ @@ -790,7 +791,9 @@ func TestSystem_Membership_FindRankFromJoinRequest(t *testing.T) { FabricContexts: curMember.PrimaryFabricContexts, FaultDomain: curMember.FaultDomain, }, - expErr: FaultJoinReplaceRankNotFound(1), // Diff resolution when nr == 1 + // A different fault resolution is printed when the number of non-matching + // fields is only one. + expErr: FaultJoinReplaceRankNotFound(1), }, "matching member; identical UUID": { req: &JoinRequest{ @@ -825,6 +828,20 @@ func TestSystem_Membership_FindRankFromJoinRequest(t *testing.T) { }, expRank: curMember.Rank, }, + "admin excluded existing member": { + curMembers: []*Member{ + MockMember(t, 1, MemberStateAdminExcluded).WithFaultDomain(fd1), + }, + req: &JoinRequest{ + Rank: NilRank, + UUID: newUUID, + ControlAddr: curMember.Addr, + PrimaryFabricURI: curMember.Addr.String(), + FabricContexts: curMember.PrimaryFabricContexts, + FaultDomain: curMember.FaultDomain, + }, + expRank: curMember.Rank, + }, } { t.Run(name, func(t *testing.T) { log, buf := logging.NewTestLogger(t.Name()) From 6a3bac87aa408990349be17bd1949f67ce325b1d Mon Sep 17 00:00:00 2001 From: Tom Nabarro Date: Fri, 27 Feb 2026 23:12:39 +0000 Subject: [PATCH 235/253] DAOS-18427 control: Retry system self-heal eval (#17575) Retry dmg self-heal eval command when engine not started error is returned. Do this by updating the IsUnavailable() helper. Signed-off-by: Tom Nabarro --- src/control/lib/control/system.go | 7 ++- src/control/lib/control/system_test.go | 81 +++++++++++++++++++++++++- src/control/system/errors.go | 10 +++- src/control/system/errors_test.go | 23 ++++++++ 4 files changed, 116 insertions(+), 5 deletions(-) diff --git a/src/control/lib/control/system.go b/src/control/lib/control/system.go index 5c5fd8e4eb0..3c77374d932 100644 --- a/src/control/lib/control/system.go +++ b/src/control/lib/control/system.go @@ -1,6 +1,6 @@ // // (C) Copyright 2020-2024 Intel Corporation. -// (C) Copyright 2025 Hewlett Packard Enterprise Development LP +// (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -1318,6 +1318,7 @@ func SystemRebuildManage(ctx context.Context, rpcClient UnaryInvoker, req *Syste type SystemSelfHealEvalReq struct { unaryRequest msRequest + retryableRequest } // SystemSelfHealEvalResp contains the response. @@ -1341,6 +1342,10 @@ func SystemSelfHealEval(ctx context.Context, rpcClient UnaryInvoker, req *System req.setRPC(func(ctx context.Context, conn *grpc.ClientConn) (proto.Message, error) { return mgmtpb.NewMgmtSvcClient(conn).SystemSelfHealEval(ctx, pbReq) }) + req.retryTestFn = func(err error, _ uint) bool { + return (system.IsUnavailable(err) || IsRetryableConnErr(err) || + system.IsNotLeader(err) || system.IsNotReplica(err)) + } rpcClient.Debugf("DAOS system self-heal eval request: %s", pbUtil.Debug(pbReq)) ur, err := rpcClient.InvokeUnaryRPC(ctx, req) diff --git a/src/control/lib/control/system_test.go b/src/control/lib/control/system_test.go index 7d12cc04529..d43db611704 100644 --- a/src/control/lib/control/system_test.go +++ b/src/control/lib/control/system_test.go @@ -1,6 +1,6 @@ // // (C) Copyright 2020-2024 Intel Corporation. -// (C) Copyright 2025 Hewlett Packard Enterprise Development LP +// (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -19,6 +19,8 @@ import ( mgmtpb "github.com/daos-stack/daos/src/control/common/proto/mgmt" sharedpb "github.com/daos-stack/daos/src/control/common/proto/shared" "github.com/daos-stack/daos/src/control/common/test" + "github.com/daos-stack/daos/src/control/fault" + "github.com/daos-stack/daos/src/control/fault/code" "github.com/daos-stack/daos/src/control/lib/hostlist" "github.com/daos-stack/daos/src/control/lib/ranklist" "github.com/daos-stack/daos/src/control/logging" @@ -2075,3 +2077,80 @@ func TestControl_SystemSelfHealEval(t *testing.T) { }) } } + +func TestControl_SystemSelfHealEval_RetryableErrors(t *testing.T) { + for name, testErr := range map[string]error{ + "system unavailable": system.ErrRaftUnavail, + "leader step-up": system.ErrLeaderStepUpInProgress, + "connection closed": FaultConnectionClosed(""), + "connection refused": FaultConnectionRefused(""), + "not leader": &system.ErrNotLeader{LeaderHint: "host1", Replicas: []string{"host2"}}, + "not replica": &system.ErrNotReplica{Replicas: []string{"host1", "host2"}}, + "data plane not started": &fault.Fault{Code: code.ServerDataPlaneNotStarted}, + } { + t.Run(name, func(t *testing.T) { + log, buf := logging.NewTestLogger(name) + defer test.ShowBufferOnFailure(t, buf) + + client := NewMockInvoker(log, &MockInvokerConfig{ + UnaryResponseSet: []*UnaryResponse{ + MockMSResponse("", testErr, nil), + MockMSResponse("", nil, &mgmtpb.DaosResp{}), + }, + }) + + gotResp, gotErr := SystemSelfHealEval(test.Context(t), client, &SystemSelfHealEvalReq{}) + if gotErr != nil { + t.Fatalf("unexpected error: %v", gotErr) + } + + expResp := new(SystemSelfHealEvalResp) + if diff := cmp.Diff(expResp, gotResp, cmpopts.IgnoreUnexported(SystemSelfHealEvalResp{})); diff != "" { + t.Fatalf("unexpected response (-want, +got):\n%s\n", diff) + } + }) + } +} + +func TestControl_SystemSelfHealEval_NonRetryableErrors(t *testing.T) { + for name, tc := range map[string]struct { + testErr error + expErr error + }{ + "system uninitialized": { + testErr: system.ErrUninitialized, + expErr: system.ErrUninitialized, + }, + "generic error": { + testErr: errors.New("something went wrong"), + expErr: errors.New("something went wrong"), + }, + "connection bad host": { + testErr: FaultConnectionBadHost("badhost"), + expErr: FaultConnectionBadHost("badhost"), + }, + "connection no route": { + testErr: FaultConnectionNoRoute("10.0.0.1"), + expErr: FaultConnectionNoRoute("10.0.0.1"), + }, + "member exists": { + testErr: system.ErrRankExists(1), + expErr: system.ErrRankExists(1), + }, + } { + t.Run(name, func(t *testing.T) { + log, buf := logging.NewTestLogger(name) + defer test.ShowBufferOnFailure(t, buf) + + client := NewMockInvoker(log, &MockInvokerConfig{ + UnaryResponseSet: []*UnaryResponse{ + MockMSResponse("", tc.testErr, nil), + MockMSResponse("", nil, &mgmtpb.DaosResp{}), + }, + }) + + _, gotErr := SystemSelfHealEval(test.Context(t), client, &SystemSelfHealEvalReq{}) + test.CmpErr(t, tc.expErr, gotErr) + }) + } +} diff --git a/src/control/system/errors.go b/src/control/system/errors.go index 509bee13906..335a255bf2f 100644 --- a/src/control/system/errors.go +++ b/src/control/system/errors.go @@ -1,6 +1,6 @@ // // (C) Copyright 2020-2024 Intel Corporation. -// (C) Copyright 2025 Hewlett Packard Enterprise Development LP +// (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -17,6 +17,8 @@ import ( "github.com/pkg/errors" "github.com/daos-stack/daos/src/control/build" + "github.com/daos-stack/daos/src/control/fault" + "github.com/daos-stack/daos/src/control/fault/code" "github.com/daos-stack/daos/src/control/lib/ranklist" ) @@ -39,8 +41,10 @@ func IsUnavailable(err error) bool { if err == nil { return false } - cause := errors.Cause(err).Error() - return strings.Contains(cause, ErrRaftUnavail.Error()) || strings.Contains(cause, ErrLeaderStepUpInProgress.Error()) + cause := errors.Cause(err) + return strings.Contains(cause.Error(), ErrRaftUnavail.Error()) || + strings.Contains(cause.Error(), ErrLeaderStepUpInProgress.Error()) || + fault.IsFaultCode(cause, code.ServerDataPlaneNotStarted) } // IsEmptyGroupMap returns a boolean indicating whether or not the diff --git a/src/control/system/errors_test.go b/src/control/system/errors_test.go index d2ea4eda1ab..02c896a1b86 100644 --- a/src/control/system/errors_test.go +++ b/src/control/system/errors_test.go @@ -1,5 +1,6 @@ // // (C) Copyright 2024 Intel Corporation. +// (C) Copyright 2026 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -12,6 +13,8 @@ import ( "github.com/pkg/errors" "github.com/daos-stack/daos/src/control/common/test" + "github.com/daos-stack/daos/src/control/fault" + "github.com/daos-stack/daos/src/control/fault/code" ) func TestSystem_Errors_IsNotReady(t *testing.T) { @@ -79,12 +82,32 @@ func TestSystem_Errors_IsUnavailable(t *testing.T) { err: ErrLeaderStepUpInProgress, expResult: true, }, + "data plane not started": { + err: &fault.Fault{Code: code.ServerDataPlaneNotStarted}, + expResult: true, + }, + "wrapped data plane not started": { + err: errors.Wrap(&fault.Fault{Code: code.ServerDataPlaneNotStarted}, "wrapped error"), + expResult: true, + }, "uninitialized not unavailable": { err: ErrUninitialized, }, "something else": { err: errors.New("something is wrong"), }, + "member exists not unavailable": { + err: ErrRankExists(1), + }, + "member not found not unavailable": { + err: ErrMemberRankNotFound(1), + }, + "pool not found not unavailable": { + err: ErrPoolRankNotFound(1), + }, + "different fault code not unavailable": { + err: &fault.Fault{Code: code.ClientUnknown}, + }, } { t.Run(name, func(t *testing.T) { test.AssertEqual(t, tc.expResult, IsUnavailable(tc.err), "") From 92aad25a9846fef96df2d868fcf508f103babb77 Mon Sep 17 00:00:00 2001 From: Tom Nabarro Date: Fri, 27 Feb 2026 23:13:58 +0000 Subject: [PATCH 236/253] DAOS-18472 doc: Note that format replace ignores AdminExcluded (#17610) Signed-off-by: Tom Nabarro --- docs/admin/administration.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/admin/administration.md b/docs/admin/administration.md index 459d10dbb16..ca61456d7be 100644 --- a/docs/admin/administration.md +++ b/docs/admin/administration.md @@ -1024,6 +1024,12 @@ An examples workflow would be: rank will be created). - Formatted engine will join using the existing (old) rank which is mapped to the engine's hardware. +!!! note + `dmg storage format --replace` can be used to replace a rank in `AdminExcluded` state. The + subsequent state of the rank will then no longer be `AdminExcluded`. This special case reduces + a chance that a duplicate rank entry is introduced inadvertently because the rank to be replaced + is in the `AdminExcluded` state and so is recreated rather than replaced. + ### System Erase To erase the DAOS sorage configuration, the `dmg system erase` From cb079968f75ce72cb00c166775690d875ba72a24 Mon Sep 17 00:00:00 2001 From: Jerome Soumagne Date: Mon, 2 Mar 2026 10:14:22 -0600 Subject: [PATCH 237/253] DAOS-18539 cart: detect SLINGSHOT_VNIS when set (#17583) - Prevent server from passing fabric_auth_key to client - Clean up ep_credit/ctx_max_num/crt_timeout init parsing - Remove ENV_STR_NO_PRINT that was used to hide env var content Signed-off-by: Jerome Soumagne --- src/cart/crt_context.c | 4 +- src/cart/crt_init.c | 89 ++++++++++++---------- src/cart/crt_internal_types.h | 47 +++--------- src/cart/crt_rpc.h | 6 +- src/control/server/config/server.go | 5 -- src/control/server/engine/config.go | 5 -- src/include/cart/types.h | 26 +++---- src/tests/ftest/cart/test_ep_cred_client.c | 2 +- src/tests/ftest/cart/test_ep_cred_server.c | 3 +- utils/config/daos_server.yml | 2 +- 10 files changed, 77 insertions(+), 112 deletions(-) diff --git a/src/cart/crt_context.c b/src/cart/crt_context.c index 1b88bab56b7..ccfa40ea9c6 100644 --- a/src/cart/crt_context.c +++ b/src/cart/crt_context.c @@ -1,6 +1,6 @@ /* * (C) Copyright 2016-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * (C) Copyright 2025 Google LLC * * SPDX-License-Identifier: BSD-2-Clause-Patent @@ -665,7 +665,7 @@ crt_ctx_epi_abort(struct crt_ep_inflight *epi, int flags) break; } ts_now = d_timeus_secdiff(0); - if (ts_now - ts_start > 2 * CRT_DEFAULT_TIMEOUT_US) { + if (ts_now - ts_start > 2 * CRT_TIMEOUT_DEFAULT * 1e6) { D_ERROR("stop progress due to timed out.\n"); d_list_for_each_entry(rpc_priv, &epi->epi_req_q, crp_epi_link) RPC_ERROR(rpc_priv, diff --git a/src/cart/crt_init.c b/src/cart/crt_init.c index cf4c17db79c..626ace92221 100644 --- a/src/cart/crt_init.c +++ b/src/cart/crt_init.c @@ -96,10 +96,11 @@ dump_opt(crt_init_options_t *opt) D_INFO("domain = %s\n", opt->cio_domain); D_INFO("port = %s\n", opt->cio_port); D_INFO("auth_key = %s\n", opt->cio_auth_key); - D_INFO("Flags: fault_inject = %d, use_credits = %d, use_sensors = %d, " - "thread_mode_single = %d, progress_busy = %d, mem_device = %d\n", - opt->cio_fault_inject, opt->cio_use_credits, opt->cio_use_sensors, - opt->cio_thread_mode_single, opt->cio_progress_busy, opt->cio_mem_device); + D_INFO("ep_credits = %d\n", opt->cio_ep_credits); + D_INFO("Flags: fault_inject = %d, use_sensors = %d, thread_mode_single = %d, " + "progress_busy = %d, mem_device = %d\n", + opt->cio_fault_inject, opt->cio_use_sensors, opt->cio_thread_mode_single, + opt->cio_progress_busy, opt->cio_mem_device); if (opt->cio_use_expected_size) D_INFO("max_expected_size = %d\n", opt->cio_max_expected_size); @@ -160,10 +161,9 @@ prov_data_init(struct crt_prov_gdata *prov_data, crt_provider_t provider, bool p crt_init_options_t *opt) { - uint32_t ctx_num = 0; uint32_t max_expect_size = 0; uint32_t max_unexpect_size = 0; - uint32_t max_num_ctx = CRT_SRV_CONTEXT_NUM; + uint32_t ctx_max_num = 0; int i; int rc; @@ -172,27 +172,29 @@ prov_data_init(struct crt_prov_gdata *prov_data, crt_provider_t provider, bool p return rc; if (crt_is_service()) { - ctx_num = CRT_SRV_CONTEXT_NUM; - max_num_ctx = CRT_SRV_CONTEXT_NUM; + ctx_max_num = CRT_SRV_CONTEXT_NUM; } else { /* Only limit the number of contexts for clients */ - crt_env_get(CRT_CTX_NUM, &ctx_num); + CRT_ENV_OPT_GET(opt, ctx_max_num, CRT_CTX_NUM); /* Default setting to the number of cores */ - if (opt) - max_num_ctx = - ctx_num ? ctx_num : max(crt_gdata.cg_num_cores, opt->cio_ctx_max_num); - else - max_num_ctx = ctx_num ? ctx_num : crt_gdata.cg_num_cores; - } + if (!ctx_max_num) + ctx_max_num = crt_gdata.cg_num_cores; - if (max_num_ctx > CRT_SRV_CONTEXT_NUM) - max_num_ctx = CRT_SRV_CONTEXT_NUM; - /* To be able to run on VMs */ - if (max_num_ctx < CRT_SRV_CONTEXT_NUM_MIN) - max_num_ctx = CRT_SRV_CONTEXT_NUM_MIN; + if (ctx_max_num > CRT_SRV_CONTEXT_NUM) { + D_WARN("ctx_max_num %u exceeds max %u, using max\n", ctx_max_num, + CRT_SRV_CONTEXT_NUM); + ctx_max_num = CRT_SRV_CONTEXT_NUM; + } + /* To be able to run on VMs */ + if (ctx_max_num < CRT_SRV_CONTEXT_NUM_MIN) { + D_INFO("ctx_max_num %u is less than min %u, using min\n", ctx_max_num, + CRT_SRV_CONTEXT_NUM_MIN); + ctx_max_num = CRT_SRV_CONTEXT_NUM_MIN; + } + } - D_DEBUG(DB_ALL, "Max number of contexts set to %d\n", max_num_ctx); + D_DEBUG(DB_ALL, "Max number of contexts set to %u\n", ctx_max_num); if (opt && opt->cio_use_expected_size) max_expect_size = opt->cio_max_expected_size; @@ -205,7 +207,7 @@ prov_data_init(struct crt_prov_gdata *prov_data, crt_provider_t provider, bool p prov_data->cpg_ctx_num = 0; prov_data->cpg_sep_mode = false; prov_data->cpg_contig_ports = true; - prov_data->cpg_ctx_max_num = max_num_ctx; + prov_data->cpg_ctx_max_num = ctx_max_num; prov_data->cpg_max_exp_size = max_expect_size; prov_data->cpg_max_unexp_size = max_unexpect_size; prov_data->cpg_primary = primary; @@ -218,7 +220,7 @@ prov_data_init(struct crt_prov_gdata *prov_data, crt_provider_t provider, bool p prov_data->cpg_last_remote_tag = 0; D_DEBUG(DB_ALL, "prov_idx: %d primary: %d sizes: (%d/%d) max_ctx: %d\n", provider, primary, - max_expect_size, max_unexpect_size, max_num_ctx); + max_expect_size, max_unexpect_size, ctx_max_num); D_INIT_LIST_HEAD(&prov_data->cpg_ctx_list); @@ -261,8 +263,8 @@ crt_str_to_tc(const char *str) static int data_init(int server, crt_init_options_t *opt) { - uint32_t timeout = 0; - uint32_t credits; + uint32_t crt_timeout = 0; + uint32_t ep_credits = CRT_DEFAULT_CREDITS_PER_EP_CTX; uint32_t fi_univ_size = 0; uint32_t mem_pin_enable = 0; uint32_t is_secondary; @@ -310,24 +312,25 @@ data_init(int server, crt_init_options_t *opt) } crt_gdata.cg_provider_is_primary = (is_secondary) ? 0 : 1; - if (opt && opt->cio_crt_timeout != 0) - timeout = opt->cio_crt_timeout; - else - crt_env_get(CRT_TIMEOUT, &timeout); + CRT_ENV_OPT_GET(opt, crt_timeout, CRT_TIMEOUT); + if (crt_timeout == 0) + crt_gdata.cg_timeout = CRT_TIMEOUT_DEFAULT; + else if (crt_timeout > CRT_TIMEOUT_MAX) { + D_WARN("crt_timeout %u exceeds max %u, using max\n", crt_timeout, CRT_TIMEOUT_MAX); + crt_gdata.cg_timeout = CRT_TIMEOUT_MAX; + } else + crt_gdata.cg_timeout = crt_timeout; - if (timeout == 0 || timeout > 3600) - crt_gdata.cg_timeout = CRT_DEFAULT_TIMEOUT_S; - else - crt_gdata.cg_timeout = timeout; crt_gdata.cg_swim_ctx_idx = CRT_DEFAULT_PROGRESS_CTX_IDX; /* Override defaults and environment if option is set */ - if (opt && opt->cio_use_credits) { - credits = opt->cio_ep_credits; - } else { - credits = CRT_DEFAULT_CREDITS_PER_EP_CTX; - crt_env_get(CRT_CREDIT_EP_CTX, &credits); + CRT_ENV_OPT_GET(opt, ep_credits, CRT_CREDIT_EP_CTX); + if (ep_credits > CRT_MAX_CREDITS_PER_EP_CTX) { + D_WARN("ep_credits %u exceeds max %u, using max\n", ep_credits, + CRT_MAX_CREDITS_PER_EP_CTX); + ep_credits = CRT_MAX_CREDITS_PER_EP_CTX; } + crt_gdata.cg_credit_ep_ctx = ep_credits; /* Enable quotas by default only on clients */ crt_gdata.cg_rpc_quota = server ? 0 : CRT_QUOTA_RPCS_DEFAULT; @@ -352,10 +355,6 @@ data_init(int server, crt_init_options_t *opt) d_setenv("FI_UNIVERSE_SIZE", "2048", 1); } - if (credits > CRT_MAX_CREDITS_PER_EP_CTX) - credits = CRT_MAX_CREDITS_PER_EP_CTX; - crt_gdata.cg_credit_ep_ctx = credits; - /** enable sensors if requested */ crt_gdata.cg_use_sensors = (opt && opt->cio_use_sensors); @@ -752,6 +751,12 @@ crt_init_opt(crt_group_id_t grpid, uint32_t flags, crt_init_options_t *opt) if (interface == NULL && prov != CRT_PROV_OFI_CXI) D_WARN("No interface specified\n"); + /* For PALS-enabled environments, auto-detect svc ID / VNI and use DAOS VNI */ + if (prov == CRT_PROV_OFI_CXI && auth_key == NULL && crt_env_is_set(SLINGSHOT_VNIS)) + auth_key = "0:0:2"; /* format is svc_id:vni:vni_idx, use hard-coded value to tell + mercury to detect svc_id and vni from the env vars and use + the DAOS VNI at index 2 */ + crt_gdata.cg_primary_prov = prov; /* * Note: If on the client the 'interface' contains a diff --git a/src/cart/crt_internal_types.h b/src/cart/crt_internal_types.h index 0d532244c55..02fbe3eea0c 100644 --- a/src/cart/crt_internal_types.h +++ b/src/cart/crt_internal_types.h @@ -200,7 +200,7 @@ struct crt_event_cb_priv { /* * List of environment variables to read at CaRT library load time. * for integer envs use ENV() - * for string ones ENV_STR() or ENV_STR_NO_PRINT() + * for string ones ENV_STR() **/ #define CRT_ENV_LIST \ ENV_STR(CRT_ATTACH_INFO_PATH) \ @@ -243,7 +243,8 @@ struct crt_event_cb_priv { ENV(D_MRECV_BUF) \ ENV(D_MRECV_BUF_COPY) \ ENV_STR(D_PROVIDER) \ - ENV_STR_NO_PRINT(D_PROVIDER_AUTH_KEY) \ + ENV_STR(D_PROVIDER_AUTH_KEY) \ + ENV_STR(SLINGSHOT_VNIS) \ ENV(D_QUOTA_RPCS) \ ENV(D_QUOTA_BULKS) \ ENV(FI_OFI_RXM_USE_SRX) \ @@ -258,16 +259,12 @@ struct crt_event_cb_priv { /* uint env */ #define ENV(x) \ unsigned int _##x; \ - int _rc_##x; \ - bool _no_print_##x; + int _rc_##x; /* char* env */ #define ENV_STR(x) \ char *_##x; \ - int _rc_##x; \ - bool _no_print_##x; - -#define ENV_STR_NO_PRINT(x) ENV_STR(x) + int _rc_##x; struct crt_envs { CRT_ENV_LIST; @@ -276,7 +273,6 @@ struct crt_envs { #undef ENV #undef ENV_STR -#undef ENV_STR_NO_PRINT extern struct crt_envs crt_genvs; @@ -293,26 +289,17 @@ crt_env_init(void) #define ENV(x) \ do { \ - crt_genvs._rc_##x = d_getenv_uint(#x, &crt_genvs._##x); \ - crt_genvs._no_print_##x = false; \ + crt_genvs._rc_##x = d_getenv_uint(#x, &crt_genvs._##x); \ } while (0); #define ENV_STR(x) \ do { \ - crt_genvs._rc_##x = d_agetenv_str(&crt_genvs._##x, #x); \ - crt_genvs._no_print_##x = false; \ - } while (0); - -#define ENV_STR_NO_PRINT(x) \ - do { \ - crt_genvs._rc_##x = d_agetenv_str(&crt_genvs._##x, #x); \ - crt_genvs._no_print_##x = true; \ + crt_genvs._rc_##x = d_agetenv_str(&crt_genvs._##x, #x); \ } while (0); CRT_ENV_LIST; #undef ENV #undef ENV_STR -#undef ENV_STR_NO_PRINT crt_genvs.inited = true; } @@ -323,13 +310,11 @@ crt_env_fini(void) { #define ENV(x) (void) #define ENV_STR(x) d_freeenv_str(&crt_genvs._##x); -#define ENV_STR_NO_PRINT ENV_STR CRT_ENV_LIST #undef ENV #undef ENV_STR -#undef ENV_STR_NO_PRINT crt_genvs.inited = false; } @@ -342,6 +327,9 @@ crt_env_fini(void) *val = crt_genvs._##name; \ } while (0) +/* Check if the env is set */ +#define crt_env_is_set(name) (crt_genvs._rc_##name == 0) + /* Check envs that contain strings to not exceed CRT_ENV_STR_MAX_SIZE */ static inline bool crt_env_list_valid(void) @@ -357,20 +345,12 @@ crt_env_list_valid(void) return false; \ } -/* if string env exceeds CRT_ENV_STR_MAX_SIZE - return false */ -#define ENV_STR_NO_PRINT(x) \ - if (crt_genvs._rc_##x == 0 && strlen(crt_genvs._##x) + 1 > CRT_ENV_STR_MAX_SIZE) { \ - D_ERROR("env '%s' exceeded max size %d\n", #x, CRT_ENV_STR_MAX_SIZE); \ - return false; \ - } - /* expand env list using the above ENV_* definitions */ CRT_ENV_LIST; return true; #undef ENV #undef ENV_STR -#undef ENV_STR_NO_PRINT } /* dump environment variables from the CRT_ENV_LIST */ @@ -381,20 +361,17 @@ crt_env_dump(void) /* Only dump envariables that were set */ #define ENV(x) \ - if (!crt_genvs._rc_##x && crt_genvs._no_print_##x == 0) \ + if (!crt_genvs._rc_##x) \ D_INFO("%s = %d\n", #x, crt_genvs._##x); #define ENV_STR(x) \ if (!crt_genvs._rc_##x) \ - D_INFO("%s = %s\n", #x, crt_genvs._no_print_##x ? "****" : crt_genvs._##x); - -#define ENV_STR_NO_PRINT ENV_STR + D_INFO("%s = %s\n", #x, crt_genvs._##x); CRT_ENV_LIST; #undef ENV #undef ENV_STR -#undef ENV_STR_NO_PRINT } /* structure of global fault tolerance data */ diff --git a/src/cart/crt_rpc.h b/src/cart/crt_rpc.h index 8af18e0166d..64fffdcc9e4 100644 --- a/src/cart/crt_rpc.h +++ b/src/cart/crt_rpc.h @@ -15,9 +15,9 @@ #include #include -/* default RPC timeout 60 seconds */ -#define CRT_DEFAULT_TIMEOUT_S (60) /* second */ -#define CRT_DEFAULT_TIMEOUT_US (CRT_DEFAULT_TIMEOUT_S * 1e6) /* micro-second */ +/* default RPC timeout */ +#define CRT_TIMEOUT_DEFAULT (60U) /* 60 seconds */ +#define CRT_TIMEOUT_MAX (3600U) /* 1 hour */ #define CRT_QUOTA_RPCS_DEFAULT 64 #define CRT_QUOTA_BULKS_DEFAULT 64 diff --git a/src/control/server/config/server.go b/src/control/server/config/server.go index f92771621d1..c6a808baf29 100644 --- a/src/control/server/config/server.go +++ b/src/control/server/config/server.go @@ -148,7 +148,6 @@ func (cfg *Server) WithFabricProvider(provider string) *Server { // WithFabricAuthKey sets the top-level fabric authorization key. func (cfg *Server) WithFabricAuthKey(key string) *Server { cfg.Fabric.AuthKey = key - cfg.ClientEnvVars = common.MergeKeyValues(cfg.ClientEnvVars, []string{cfg.Fabric.GetAuthKeyEnv()}) for _, engine := range cfg.Engines { engine.Fabric.AuthKey = cfg.Fabric.AuthKey } @@ -404,10 +403,6 @@ func (cfg *Server) Load(log logging.Logger) error { cfg.updateServerConfig(&cfg.Engines[i]) } - if cfg.Fabric.AuthKey != "" { - cfg.ClientEnvVars = common.MergeKeyValues(cfg.ClientEnvVars, []string{cfg.Fabric.GetAuthKeyEnv()}) - } - if len(cfg.deprecatedParams.AccessPoints) > 0 { if len(cfg.MgmtSvcReplicas) > 0 { return errors.New(msgAPsMSReps) diff --git a/src/control/server/engine/config.go b/src/control/server/engine/config.go index 65748106adf..e8c16ee15fb 100644 --- a/src/control/server/engine/config.go +++ b/src/control/server/engine/config.go @@ -215,11 +215,6 @@ func (fc *FabricConfig) Validate() error { return nil } -// GetAuthKeyEnv returns the environment variable string for the auth key. -func (fc *FabricConfig) GetAuthKeyEnv() string { - return fmt.Sprintf("D_PROVIDER_AUTH_KEY=%s", fc.AuthKey) -} - // cleanEnvVars scrubs the supplied slice of environment // variables by removing all variables not included in the // allow list. diff --git a/src/include/cart/types.h b/src/include/cart/types.h index c3119883a96..65786f57ede 100644 --- a/src/include/cart/types.h +++ b/src/include/cart/types.h @@ -46,26 +46,20 @@ typedef struct crt_init_options { * evnironment variable. */ int cio_crt_timeout; - uint32_t cio_sep_override:1, /**< Deprecated */ - cio_use_sep:1, /**< Deprecated */ - /** whether or not to inject faults */ - cio_fault_inject:1, - /** - * whether or not to override credits. When set - * overrides CRT_CTX_EP_CREDITS envariable - */ - cio_use_credits:1, - /** whether or not to enable per-context sensors */ - cio_use_sensors:1, - - /** whether or not to use expected sizes */ - cio_use_expected_size:1, - cio_use_unexpected_size:1; + uint32_t cio_sep_override : 1, /**< Deprecated */ + cio_use_sep : 1, /**< Deprecated */ + /** whether or not to inject faults */ + cio_fault_inject : 1, + /** whether or not to enable per-context sensors */ + cio_use_sensors : 1, + + /** whether or not to use expected sizes */ + cio_use_expected_size : 1, cio_use_unexpected_size : 1; /** overrides the value of the environment variable CRT_CTX_NUM */ int cio_ctx_max_num; - /** Used with cio_use_credits to set credit limit */ + /** set credit limit */ int cio_ep_credits; /** diff --git a/src/tests/ftest/cart/test_ep_cred_client.c b/src/tests/ftest/cart/test_ep_cred_client.c index 0e491c58bc2..83686667d76 100644 --- a/src/tests/ftest/cart/test_ep_cred_client.c +++ b/src/tests/ftest/cart/test_ep_cred_client.c @@ -1,5 +1,6 @@ /* * (C) Copyright 2018-2022 Intel Corporation. + * (C) Copyright 2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -61,7 +62,6 @@ test_run() D_ASSERTF(rc == 0, "crt_group_config_path_set failed %d\n", rc); } - opt.cio_use_credits = 1; opt.cio_ep_credits = test.tg_credits; DBG_PRINT("Number of credits: %d Number of burst: %d\n", diff --git a/src/tests/ftest/cart/test_ep_cred_server.c b/src/tests/ftest/cart/test_ep_cred_server.c index 8ed17c398ce..7a24dd3b3c2 100644 --- a/src/tests/ftest/cart/test_ep_cred_server.c +++ b/src/tests/ftest/cart/test_ep_cred_server.c @@ -1,6 +1,6 @@ /* * (C) Copyright 2018-2022 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -21,7 +21,6 @@ test_run(d_rank_t my_rank) DBG_PRINT("local group: %s remote group: %s\n", test.tg_local_group_name, test.tg_remote_group_name); - opt.cio_use_credits = 1; opt.cio_ep_credits = test.tg_credits; rc = crtu_srv_start_basic(test.tg_local_group_name, &test.tg_crt_ctx, &test.tg_tid, &grp, diff --git a/utils/config/daos_server.yml b/utils/config/daos_server.yml index a7acd216909..b712a600f56 100644 --- a/utils/config/daos_server.yml +++ b/utils/config/daos_server.yml @@ -131,7 +131,7 @@ # ## CART: Fabric authorization key ## If the fabric requires an authorization key, set it here to -## be used on the server and clients. +## be used on the server. # #fabric_auth_key: foo:bar # From 81ddc13dcefa620aa7ab12209809214a37277d78 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 2 Mar 2026 08:54:44 -0800 Subject: [PATCH 238/253] DAOS-18636 cq: update isort to 8.0.1 (#17625) Updates `isort` from 8.0.0 to 8.0.1 Signed-off-by: dependabot[bot] --- utils/cq/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/cq/requirements.txt b/utils/cq/requirements.txt index f0a62b75f78..ef9340fcb73 100644 --- a/utils/cq/requirements.txt +++ b/utils/cq/requirements.txt @@ -3,7 +3,7 @@ pyenchant ## flake8 6 removed --diff option which breaks flake precommit hook. ## https://github.com/pycqa/flake8/issues/1389 https://github.com/PyCQA/flake8/pull/1720 flake8==7.3.0 -isort==8.0.0 +isort==8.0.1 pylint==4.0.5 yamllint==1.38.0 codespell==2.4.1 From e99cb66e3c4a298a12a88d1181fa074b764e01fb Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 2 Mar 2026 09:10:49 -0800 Subject: [PATCH 239/253] DAOS-18636 cq: Bump GHA versions (#17626) Updates `EnricoMi/publish-unit-test-result-action` from 2.22.0 to 2.23.0 Updates `actions/upload-artifact` from 6.0.0 to 7.0.0 Signed-off-by: dependabot[bot] --- .github/workflows/bullseye-coverage.yml | 12 ++++++------ .github/workflows/ci2.yml | 2 +- .github/workflows/landing-builds.yml | 2 +- .github/workflows/linting.yml | 4 ++-- .github/workflows/ossf-scorecard.yml | 2 +- .github/workflows/rpm-build-and-test.yml | 12 ++++++------ .github/workflows/trivy.yml | 2 +- 7 files changed, 18 insertions(+), 18 deletions(-) diff --git a/.github/workflows/bullseye-coverage.yml b/.github/workflows/bullseye-coverage.yml index a7d9d97ab35..306a0d55a02 100644 --- a/.github/workflows/bullseye-coverage.yml +++ b/.github/workflows/bullseye-coverage.yml @@ -366,7 +366,7 @@ jobs: if: (!cancelled()) && (success() || failure()) && steps.run-test.outcome != 'skipped' # yamllint disable-line rule:line-length - uses: EnricoMi/publish-unit-test-result-action@27d65e188ec43221b20d26de30f4892fad91df2f # v2.22.0 + uses: EnricoMi/publish-unit-test-result-action@c950f6fb443cb5af20a377fd0dfaa78838901040 # v2.23.0 with: check_name: ${{ env.STAGE_NAME }} Test Results github_token: ${{ secrets.GITHUB_TOKEN }} @@ -374,14 +374,14 @@ jobs: - name: Publish artifacts if: (!cancelled()) && (success() || failure()) && steps.run-test.outcome != 'skipped' - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: name: ${{ env.STAGE_NAME }} artifacts path: ${{ env.STAGE_NAME }}/** - name: Upload test results if: (success() || failure()) && steps.run-test.outcome != 'skipped' - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: name: ${{ env.STAGE_NAME }} test-results path: ${{ env.STAGE_NAME }}/**/results.xml @@ -636,7 +636,7 @@ jobs: if: (!cancelled()) && (success() || failure()) && steps.run-test.outcome != 'skipped' # yamllint disable-line rule:line-length - uses: EnricoMi/publish-unit-test-result-action@27d65e188ec43221b20d26de30f4892fad91df2f # v2.22.0 + uses: EnricoMi/publish-unit-test-result-action@c950f6fb443cb5af20a377fd0dfaa78838901040 # v2.23.0 with: check_name: ${{ env.STAGE_NAME }} Test Results github_token: ${{ secrets.GITHUB_TOKEN }} @@ -644,14 +644,14 @@ jobs: - name: Publish artifacts if: (!cancelled()) && (success() || failure()) && steps.run-test.outcome != 'skipped' - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: name: ${{ env.STAGE_NAME }} artifacts path: ${{ env.STAGE_NAME }}/** - name: Upload test results if: (success() || failure()) && steps.run-test.outcome != 'skipped' - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: name: ${{ env.STAGE_NAME }} test-results path: ${{ env.STAGE_NAME }}/**/results.xml diff --git a/.github/workflows/ci2.yml b/.github/workflows/ci2.yml index ab377fe0624..3aad063bda3 100644 --- a/.github/workflows/ci2.yml +++ b/.github/workflows/ci2.yml @@ -68,7 +68,7 @@ jobs: - name: Publish NLT test results if: always() # yamllint disable-line rule:line-length - uses: EnricoMi/publish-unit-test-result-action@27d65e188ec43221b20d26de30f4892fad91df2f # v2.22.0 + uses: EnricoMi/publish-unit-test-result-action@c950f6fb443cb5af20a377fd0dfaa78838901040 # v2.23.0 with: github_token: ${{ secrets.GITHUB_TOKEN }} files: nlt-junit.xml diff --git a/.github/workflows/landing-builds.yml b/.github/workflows/landing-builds.yml index 11e7e9723ca..e527e021ff7 100644 --- a/.github/workflows/landing-builds.yml +++ b/.github/workflows/landing-builds.yml @@ -144,7 +144,7 @@ jobs: - name: Publish NLT test results if: always() # yamllint disable-line rule:line-length - uses: EnricoMi/publish-unit-test-result-action@27d65e188ec43221b20d26de30f4892fad91df2f # v2.22.0 + uses: EnricoMi/publish-unit-test-result-action@c950f6fb443cb5af20a377fd0dfaa78838901040 # v2.23.0 with: github_token: ${{ secrets.GITHUB_TOKEN }} files: nlt-junit.xml diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 59c01678ed1..777cf428652 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -129,7 +129,7 @@ jobs: - name: Run check run: doxygen Doxyfile - name: 'Upload Artifact' - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: name: API Documentation path: docs/doxygen/html/ @@ -191,7 +191,7 @@ jobs: with: target: ${{ steps.get_merge_base.outputs.ref }} - name: Export changes - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 if: failure() with: name: format-patch-for-pr-${{ github.event.pull_request.number }} diff --git a/.github/workflows/ossf-scorecard.yml b/.github/workflows/ossf-scorecard.yml index 90de01e8201..21822ae1038 100644 --- a/.github/workflows/ossf-scorecard.yml +++ b/.github/workflows/ossf-scorecard.yml @@ -62,7 +62,7 @@ jobs: # uploads of run results in SARIF # format to the repository Actions tab. - name: "Upload artifact" - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: name: SARIF file path: results.sarif diff --git a/.github/workflows/rpm-build-and-test.yml b/.github/workflows/rpm-build-and-test.yml index 4bbb942d788..ff429771a44 100644 --- a/.github/workflows/rpm-build-and-test.yml +++ b/.github/workflows/rpm-build-and-test.yml @@ -375,7 +375,7 @@ jobs: if: (!cancelled()) && (success() || failure()) && steps.run-test.outcome != 'skipped' # yamllint disable-line rule:line-length - uses: EnricoMi/publish-unit-test-result-action@27d65e188ec43221b20d26de30f4892fad91df2f # v2.22.0 + uses: EnricoMi/publish-unit-test-result-action@c950f6fb443cb5af20a377fd0dfaa78838901040 # v2.23.0 with: check_name: ${{ env.STAGE_NAME }} Test Results github_token: ${{ secrets.GITHUB_TOKEN }} @@ -383,14 +383,14 @@ jobs: - name: Publish artifacts if: (!cancelled()) && (success() || failure()) && steps.run-test.outcome != 'skipped' - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: name: ${{ env.STAGE_NAME }} artifacts path: ${{ env.STAGE_NAME }}/** - name: Upload test results if: (success() || failure()) && steps.run-test.outcome != 'skipped' - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: name: ${{ env.STAGE_NAME }} test-results path: ${{ env.STAGE_NAME }}/**/results.xml @@ -645,7 +645,7 @@ jobs: if: (!cancelled()) && (success() || failure()) && steps.run-test.outcome != 'skipped' # yamllint disable-line rule:line-length - uses: EnricoMi/publish-unit-test-result-action@27d65e188ec43221b20d26de30f4892fad91df2f # v2.22.0 + uses: EnricoMi/publish-unit-test-result-action@c950f6fb443cb5af20a377fd0dfaa78838901040 # v2.23.0 with: check_name: ${{ env.STAGE_NAME }} Test Results github_token: ${{ secrets.GITHUB_TOKEN }} @@ -653,14 +653,14 @@ jobs: - name: Publish artifacts if: (!cancelled()) && (success() || failure()) && steps.run-test.outcome != 'skipped' - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: name: ${{ env.STAGE_NAME }} artifacts path: ${{ env.STAGE_NAME }}/** - name: Upload test results if: (success() || failure()) && steps.run-test.outcome != 'skipped' - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: name: ${{ env.STAGE_NAME }} test-results path: ${{ env.STAGE_NAME }}/**/results.xml diff --git a/.github/workflows/trivy.yml b/.github/workflows/trivy.yml index 752b35a63a6..21aa575975f 100644 --- a/.github/workflows/trivy.yml +++ b/.github/workflows/trivy.yml @@ -49,7 +49,7 @@ jobs: cp utils/trivy/.trivyignore report/trivyignore.txt - name: Upload the report to the GitHub artifact store - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: path: report/* name: trivy-report-daos From 6a94c31bd5aa9602c31537a93cc2f9da55b2e9f2 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Tue, 3 Mar 2026 12:58:38 -0600 Subject: [PATCH 240/253] DAOS-18388 client: increase protoquery timeout to 10 seconds (#17383) Since we do not return dead ranks from agent anymore for protoquery, we can increase the timeout more for server to reply to protoquery. Signed-off-by: Mohamad Chaarawi --- src/client/api/rpc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/client/api/rpc.c b/src/client/api/rpc.c index 7e75daec705..13daf4ee8d2 100644 --- a/src/client/api/rpc.c +++ b/src/client/api/rpc.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2016-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -192,7 +192,7 @@ daos_rpc_proto_query(crt_opcode_t base_opc, uint32_t *ver_array, int count, int rproto->array_size = count; rproto->ep.ep_grp = sys->sy_group; rproto->base_opc = base_opc; - rproto->timeout = 3; + rproto->timeout = 10; rc = crt_proto_query_with_ctx(&rproto->ep, base_opc, ver_array, count, rproto->timeout, query_cb, rproto, ctx); From 820ab68edf4af36e50074aa5ad6d1647aaedd33d Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Tue, 3 Mar 2026 13:27:06 -0600 Subject: [PATCH 241/253] DAOS-18604 dfs: EC should not be used for directories (#17573) - if dir-oclass is set to EC on container create, use default instead. - daos fs set-attr of an EC oclass on directory should apply only to files. directories will be create with the default in that case. - fix daos fs get-attr to show such changes Signed-off-by: Mohamad Chaarawi --- src/client/dfs/README.md | 9 +++++- src/client/dfs/common.c | 10 ++++++- src/client/dfs/cont.c | 10 +++++-- src/client/dfs/obj.c | 13 +++++++-- src/include/daos/object.h | 13 ++++++++- src/tests/suite/dfs_unit_test.c | 51 +++++++++++++++++++++++++++++++-- 6 files changed, 97 insertions(+), 9 deletions(-) diff --git a/src/client/dfs/README.md b/src/client/dfs/README.md index 485846b660a..8b0fe59e6c7 100644 --- a/src/client/dfs/README.md +++ b/src/client/dfs/README.md @@ -134,7 +134,14 @@ Object testdir By default, all directories are created with an object class with 1 shard. This means, that if the container redundancy factor (RF) is 0, OC_S1 oclass will be used; if RF=1 OC_RP_2G1 is used, and so on. The user can of course change that when creating the directory and set the desired object class -manually, or set the default object class when creating the container. +manually, or set the default object class when creating the container. Using an EC object class +class for directories is not recommended since directory entries are small and EC overhead will be +large anyway. Thus when setting the directory object class on container creation to an EC object +class, DAOS will ignore the user setting and use the default replication object class depending on +the redundancy factory of the container as explained earlier. If one uses the DAOS tool to change +the object class of new files and directories to be created under an existing directory (daos fs +set-attr), and that object class is EC, that setting will apply only to files. New directories will +use the container default in that case. Note that with this mapping, the inode information is stored with the entry that it corresponds to in the parent directory object. Thus, hard links won't be supported, since it won't be possible to diff --git a/src/client/dfs/common.c b/src/client/dfs/common.c index acbc7eb11f7..72cae4dceb5 100644 --- a/src/client/dfs/common.c +++ b/src/client/dfs/common.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2018-2024 Intel Corporation. + * (C) Copyright 2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -643,8 +644,15 @@ create_dir(dfs_t *dfs, dfs_obj_t *parent, daos_oclass_id_t cid, dfs_obj_t *dir) if (cid == 0) { if (parent->d.oclass == 0) cid = dfs->attr.da_dir_oclass_id; - else + else { cid = parent->d.oclass; + /* + * If the parent oclass is EC, do not use that for a directory and use the + * container default instead. + */ + if (daos_cid_is_ec(cid)) + cid = dfs->attr.da_dir_oclass_id; + } } /** Allocate an OID for the dir - local operation */ diff --git a/src/client/dfs/cont.c b/src/client/dfs/cont.c index b3c133a8580..b01d527c112 100644 --- a/src/client/dfs/cont.c +++ b/src/client/dfs/cont.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2018-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -189,8 +189,14 @@ dfs_cont_create(daos_handle_t poh, uuid_t *cuuid, dfs_attr_t *attr, daos_handle_ } if (attr->da_file_oclass_id) dattr.da_file_oclass_id = attr->da_file_oclass_id; - if (attr->da_dir_oclass_id) + if (attr->da_dir_oclass_id) { dattr.da_dir_oclass_id = attr->da_dir_oclass_id; + if (daos_cid_is_ec(dattr.da_dir_oclass_id)) { + D_WARN("EC object class for directories is not supported," + " reverting to use default"); + dattr.da_dir_oclass_id = 0; + } + } /** check non default mode */ if ((attr->da_mode & MODE_MASK) == DFS_RELAXED || diff --git a/src/client/dfs/obj.c b/src/client/dfs/obj.c index b4a81ba4855..c85a8f84f9c 100644 --- a/src/client/dfs/obj.c +++ b/src/client/dfs/obj.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2018-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -81,7 +81,16 @@ dfs_obj_get_info(dfs_t *dfs, dfs_obj_t *obj, dfs_obj_info_t *info) /** what is the default oclass files and dirs will be created with in this dir */ if (obj->d.oclass) { - info->doi_dir_oclass_id = obj->d.oclass; + /** if parent oclass is EC, dir would be chosen as container default */ + if (!daos_cid_is_ec(obj->d.oclass)) { + info->doi_dir_oclass_id = obj->d.oclass; + } else { + if (dfs->attr.da_dir_oclass_id) + info->doi_dir_oclass_id = dfs->attr.da_dir_oclass_id; + else + rc = daos_obj_get_oclass(dfs->coh, DAOS_OT_MULTI_HASHED, 0, + 0, &info->doi_dir_oclass_id); + } info->doi_file_oclass_id = obj->d.oclass; } else { if (dfs->attr.da_dir_oclass_id) diff --git a/src/include/daos/object.h b/src/include/daos/object.h index ffad53056c1..1aa93bce457 100644 --- a/src/include/daos/object.h +++ b/src/include/daos/object.h @@ -1,6 +1,6 @@ /** * (C) Copyright 2016-2023 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -404,6 +404,17 @@ daos_oclass_is_ec(struct daos_oclass_attr *oca) return oca->ca_resil == DAOS_RES_EC; } +static inline bool +daos_cid_is_ec(daos_oclass_id_t cid) +{ + struct daos_oclass_attr *oca; + + oca = daos_oclass_id2attr(cid, NULL); + if (oca == NULL) + return false; + return daos_oclass_is_ec(oca); +} + static inline void daos_obj_set_oid(daos_obj_id_t *oid, enum daos_otype_t type, enum daos_obj_redun ord, uint32_t nr_grps, diff --git a/src/tests/suite/dfs_unit_test.c b/src/tests/suite/dfs_unit_test.c index cb2ad322f89..6a537ffa048 100644 --- a/src/tests/suite/dfs_unit_test.c +++ b/src/tests/suite/dfs_unit_test.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2019-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -2256,11 +2256,12 @@ dfs_test_oclass_hints(void **state) daos_oclass_id_t cid; daos_handle_t coh; dfs_t *dfs_l; - dfs_obj_t *obj; + dfs_obj_t *obj, *dir; daos_obj_id_t oid; daos_oclass_id_t ecidx; daos_prop_t *prop = NULL; dfs_attr_t dattr = {0}; + dfs_obj_info_t oinfo = {0}; struct pl_map_attr attr = {0}; int rc; @@ -2416,6 +2417,21 @@ dfs_test_oclass_hints(void **state) rc = compare_oclass(coh, cid, OC_RP_2GX); assert_rc_equal(rc, 0); + /** create a directory and set EC to be used on the directory */ + rc = dfs_open(dfs_l, NULL, "d1", S_IFDIR | S_IWUSR | S_IRUSR, O_RDWR | O_CREAT, 0, 0, NULL, + &dir); + assert_int_equal(rc, 0); + rc = dfs_obj_set_oclass(dfs_l, dir, 0, ecidx); + assert_int_equal(rc, 0); + /** get the dir info to query what oclass will be used */ + rc = dfs_obj_get_info(dfs_l, dir, &oinfo); + assert_int_equal(rc, 0); + rc = compare_oclass(coh, oinfo.doi_dir_oclass_id, OC_RP_2G1); + assert_int_equal(rc, 0); + rc = compare_oclass(coh, oinfo.doi_file_oclass_id, ecidx); + assert_int_equal(rc, 0); + dfs_release(dir); + rc = dfs_umount(dfs_l); assert_int_equal(rc, 0); rc = daos_cont_close(coh, NULL); @@ -2468,6 +2484,21 @@ dfs_test_oclass_hints(void **state) rc = compare_oclass(coh, cid, OC_RP_3GX); assert_rc_equal(rc, 0); + /** create a directory and set EC to be used on the directory */ + rc = dfs_open(dfs_l, NULL, "d1", S_IFDIR | S_IWUSR | S_IRUSR, O_RDWR | O_CREAT, 0, 0, NULL, + &dir); + assert_int_equal(rc, 0); + rc = dfs_obj_set_oclass(dfs_l, dir, 0, ecidx); + assert_int_equal(rc, 0); + /** get the dir info to query what oclass will be used */ + rc = dfs_obj_get_info(dfs_l, dir, &oinfo); + assert_int_equal(rc, 0); + rc = compare_oclass(coh, oinfo.doi_dir_oclass_id, OC_RP_3G1); + assert_int_equal(rc, 0); + rc = compare_oclass(coh, oinfo.doi_file_oclass_id, ecidx); + assert_int_equal(rc, 0); + dfs_release(dir); + rc = dfs_umount(dfs_l); assert_int_equal(rc, 0); rc = daos_cont_close(coh, NULL); @@ -2520,6 +2551,22 @@ dfs_test_oclass_hints(void **state) rc = compare_oclass(coh, cid, OC_RP_4GX); assert_rc_equal(rc, 0); + /** create a directory and set EC to be used on the directory */ + rc = dfs_open(dfs_l, NULL, "d1", S_IFDIR | S_IWUSR | S_IRUSR, O_RDWR | O_CREAT, 0, 0, NULL, + &dir); + assert_int_equal(rc, 0); + rc = dfs_obj_set_oclass(dfs_l, dir, 0, ecidx); + assert_int_equal(rc, 0); + /** get the dir info to query what oclass will be used */ + rc = dfs_obj_get_info(dfs_l, dir, &oinfo); + assert_int_equal(rc, 0); + rc = compare_oclass(coh, oinfo.doi_dir_oclass_id, OC_RP_4G1); + assert_int_equal(rc, 0); + rc = compare_oclass(coh, oinfo.doi_file_oclass_id, ecidx); + assert_int_equal(rc, 0); + dfs_release(dir); + + assert_int_equal(rc, 0); rc = dfs_umount(dfs_l); assert_int_equal(rc, 0); rc = daos_cont_close(coh, NULL); From 730773c044c58c953cf2f453616c0a1c81c57f98 Mon Sep 17 00:00:00 2001 From: Niu Yawei Date: Wed, 4 Mar 2026 08:20:18 +0800 Subject: [PATCH 242/253] DAOS-18615 bio: never rollback si_unused_id (#17601) The initial WAL implementation allowed the upper layer to handle WAL commit failures via UNDO operations. This included rolling back the 'si_unused_id' to prevent gaps in WAL. However, current architecture no longer supports UNDO and instead excludes targets upon WAL commit failure. Consequently, the legacy si_unused_id rollback now violates the core assumption: "New transaction ID must be greater than the last checkpointed ID" Signed-off-by: Niu Yawei --- src/bio/bio_wal.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/bio/bio_wal.c b/src/bio/bio_wal.c index 8e482b9f146..c7fc1821cf3 100644 --- a/src/bio/bio_wal.c +++ b/src/bio/bio_wal.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2018-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -728,10 +728,6 @@ wal_tx_completion(struct wal_tx_desc *wal_tx, bool complete_next) biod_tx->bd_result = wal_tx->td_error; if (wal_tx->td_error) { - /* Rollback unused ID */ - if (wal_id_cmp(si, wal_tx->td_id, si->si_unused_id) < 0) - si->si_unused_id = wal_tx->td_id; - if (next != NULL) { /* Propagate error to depended transactions, block incoming transactions */ si->si_tx_failed = 1; From b34e4e8ed9ed96798d77f2629baf0e67665d774c Mon Sep 17 00:00:00 2001 From: Jan Michalski Date: Wed, 4 Mar 2026 18:38:36 +0000 Subject: [PATCH 243/253] DAOS-18608 ddb: md-on-ssd interactive open fix (#17589) Signed-off-by: Jan Michalski --- src/control/cmd/ddb/commands_wrapper.go | 9 +++++++++ src/control/cmd/ddb/ddb_commands.go | 4 ++++ src/control/cmd/ddb/main.go | 14 ++++---------- 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/src/control/cmd/ddb/commands_wrapper.go b/src/control/cmd/ddb/commands_wrapper.go index 1ba453d082d..d9751dbd8e5 100644 --- a/src/control/cmd/ddb/commands_wrapper.go +++ b/src/control/cmd/ddb/commands_wrapper.go @@ -37,6 +37,15 @@ func freeString(s *C.char) { C.free(unsafe.Pointer(s)) } +func SetCString(out **C.char, s string) func() { + cstr := C.CString(s) + *out = cstr + + return func() { + C.free(unsafe.Pointer(cstr)) + } +} + // InitDdb initializes the ddb context and returns a closure to finalize it. func InitDdb(log *logging.LeveledLogger) (*DdbContext, func(), error) { // Must lock to OS thread because vos init/fini uses ABT init and finalize which must be called on the same thread diff --git a/src/control/cmd/ddb/ddb_commands.go b/src/control/cmd/ddb/ddb_commands.go index 7273ac94b52..d515499264c 100644 --- a/src/control/cmd/ddb/ddb_commands.go +++ b/src/control/cmd/ddb/ddb_commands.go @@ -51,6 +51,10 @@ pool shard. Part of the path is used to determine what the pool uuid is.`, a.String("path", "Path to the vos file to open.") }, Run: func(c *grumble.Context) error { + if c.Flags.String("db_path") != "" { + cleanup := SetCString(&ctx.ctx.dc_db_path, c.Flags.String("db_path")) + defer cleanup() + } return ddbOpen(ctx, c.Args.String("path"), c.Flags.Bool("write_mode")) }, Completer: openCompleter, diff --git a/src/control/cmd/ddb/main.go b/src/control/cmd/ddb/main.go index f61903827b8..9cf5bb2089d 100644 --- a/src/control/cmd/ddb/main.go +++ b/src/control/cmd/ddb/main.go @@ -16,7 +16,6 @@ import ( "path/filepath" "runtime/debug" "strings" - "unsafe" "github.com/desertbit/columnize" "github.com/desertbit/go-shlex" @@ -31,11 +30,6 @@ import ( "github.com/daos-stack/daos/src/control/server/engine" ) -/* - #include -*/ -import "C" - func exitWithError(err error) { cmdName := path.Base(os.Args[0]) fmt.Fprintf(os.Stderr, "ERROR: %s: %v\n", cmdName, err) @@ -307,13 +301,13 @@ func parseOpts(args []string, opts *cliOptions) error { app := createGrumbleApp(ctx) if opts.SysdbPath != "" { - ctx.ctx.dc_db_path = C.CString(string(opts.SysdbPath)) - defer C.free(unsafe.Pointer(ctx.ctx.dc_db_path)) + cleanup := SetCString(&ctx.ctx.dc_db_path, string(opts.SysdbPath)) + defer cleanup() } if opts.VosPath != "" { - ctx.ctx.dc_pool_path = C.CString(string(opts.VosPath)) - defer C.free(unsafe.Pointer(ctx.ctx.dc_pool_path)) + cleanup := SetCString(&ctx.ctx.dc_pool_path, string(opts.VosPath)) + defer cleanup() if !strings.HasPrefix(string(opts.Args.RunCmd), "feature") && !strings.HasPrefix(string(opts.Args.RunCmd), "open") && From aa903abdfec4b8f72c1f8934abf11a9345d4418f Mon Sep 17 00:00:00 2001 From: Tomasz Gromadzki Date: Fri, 6 Mar 2026 17:04:05 +0100 Subject: [PATCH 244/253] DAOS-18637 cq: ignore GHSA-72hv-8253-57qq (#17642) Ignore the GHSA-72hv-8253-57qq vulnerability reported in com.fasterxml.jackson.core:jackson-core 2.14.3 The com.fasterxml.jackson.core:jackson-core can not be upgraded as it is a part of org.apache.hadoop:hadoop-common:3.4.2::2d40acbf and there is no new version of hadoop. Signed-off-by: Tomasz Gromadzki --- utils/trivy/.trivyignore | 3 +++ utils/trivy/trivy.yaml | 9 ++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/utils/trivy/.trivyignore b/utils/trivy/.trivyignore index c5c2c24ccf2..4a63364afab 100644 --- a/utils/trivy/.trivyignore +++ b/utils/trivy/.trivyignore @@ -13,3 +13,6 @@ CVE-2025-58057 ## CVE-2025-33042,MEDIUM,,"org.apache.avro/avro: Apache Avro Java SDK: Code injection on Java generated code","org.apache.avro:avro","1.11.4","1.12.1, 1.11.5",https://avd.aquasec.com/nvd/cve-2025-33042 CVE-2025-33042 + +## GHSA-72hv-8253-57qq,HIGH,,"jackson-core: Number Length Constraint Bypass in Async Parser Leads to Potential DoS Condition","com.fasterxml.jackson.core:jackson-core","2.14.3","2.18.6, 2.21.1, 3.1.0",https://github.com/advisories/GHSA-72hv-8253-57qq +GHSA-72hv-8253-57qq diff --git a/utils/trivy/trivy.yaml b/utils/trivy/trivy.yaml index 5ac0b5a86c7..40ab9c24d3b 100644 --- a/utils/trivy/trivy.yaml +++ b/utils/trivy/trivy.yaml @@ -1,5 +1,12 @@ # SPDX-License-Identifier: BSD-2-Clause-Patent -# Copyright (c) 2024 Intel Corporation. +# Copyright 2024 Intel Corporation. +# Copyright 2026 Hewlett Packard Enterprise Development LP + +# +# Use the following command to run the trivy scan manually +# trivy fs -c utils/trivy/trivy.yaml . +# Scan results are written to the trivy-report-daos.txt file. +# cache: backend: fs From f4210bc8bbb43417f05fcf114181d0a9e937bab5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oksana=20Sa=C5=82yk?= Date: Fri, 6 Mar 2026 17:08:57 +0100 Subject: [PATCH 245/253] DAOS-18296 common: update PMDK to version 2.1.3 (#17403) Update PMDK to version 2.1.3 Signed-off-by: Oksana Salyk Signed-off-by: Tomasz Gromadzki --- .../post_provision_config_common_functions.sh | 2 +- docs/admin/hardware.md | 2 +- docs/admin/troubleshooting.md | 2 +- docs/overview/terminology.md | 2 +- src/vos/README.md | 4 ++-- third_party_programs.txt | 6 +++--- utils/build.config | 2 +- utils/rpms/daos.changelog | 3 +++ utils/rpms/daos.sh | 2 +- utils/rpms/daos.spec | 8 ++++---- utils/rpms/package_info.sh | 6 +++--- utils/rpms/pmdk.changelog | 13 ++++++++++++- utils/rpms/pmdk.sh | 3 ++- 13 files changed, 35 insertions(+), 20 deletions(-) diff --git a/ci/provisioning/post_provision_config_common_functions.sh b/ci/provisioning/post_provision_config_common_functions.sh index 0b3d9413ab2..5c5e2a50fbd 100755 --- a/ci/provisioning/post_provision_config_common_functions.sh +++ b/ci/provisioning/post_provision_config_common_functions.sh @@ -323,7 +323,7 @@ post_provision_config_nodes() { rm -f "$REPOS_DIR"/*_job_daos-stack_job_*_job_*.repo time dnf -y erase fio fuse ior-hpc mpich-autoload \ argobots cart daos daos-client daos-spdk dpdk \ - libisa-l libpmemobj mercury mpich \ + libisa-l libpmemobj libpmemobj1 mercury mpich \ pmix protobuf-c spdk libfabric libpmem \ munge-libs munge slurm \ slurm-example-configs slurmctld slurm-slurmmd diff --git a/docs/admin/hardware.md b/docs/admin/hardware.md index c35b00af025..939ce9b4829 100644 --- a/docs/admin/hardware.md +++ b/docs/admin/hardware.md @@ -21,7 +21,7 @@ servers. DAOS requires a 64-bit processor architecture and is primarily developed on Intel x86\_64 architecture. The DAOS software and the libraries it depends on (e.g., [ISA-L](https://github.com/intel/isa-l), -[SPDK](https://spdk.io/), [PMDK](https://github.com/pmem/pmdk/), and +[SPDK](https://spdk.io/), [PMDK](https://github.com/daos-stack/pmdk/), and [DPDK](https://www.dpdk.org/) can take advantage of Intel Streaming SIMD (SSE) and Intel Advanced Vector Extensions (AVX). diff --git a/docs/admin/troubleshooting.md b/docs/admin/troubleshooting.md index 6365142b5cd..8d615e4991f 100644 --- a/docs/admin/troubleshooting.md +++ b/docs/admin/troubleshooting.md @@ -1003,7 +1003,7 @@ Please refer the [ndctl list](https://docs.pmem.io/ndctl-user-guide/ndctl-man-pa The pmempool is a management tool for Persistent Memory pool files created by PMDK libraries. DAOS uses the PMDK library to manage persistence inside ext4 files. -[pmempool](https://github.com/pmem/pmdk/blob/stable-2.0/doc/pmempool/pmempool-check.1.md) can check consistency of a given pool file. +[pmempool](https://github.com/daos-stack/pmdk/blob/stable-2.1/doc/pmempool/pmempool-check.1.md) can check consistency of a given pool file. It can be run with -r (repair) option which can fix some of the issues with pool file. DAOS will have more number of such pool file (vos-*), based on number of targets mention per daos engine. User may need to check each vos pool file for corruption on faulty pool. diff --git a/docs/overview/terminology.md b/docs/overview/terminology.md index 677e03f579c..4815199e6cc 100644 --- a/docs/overview/terminology.md +++ b/docs/overview/terminology.md @@ -32,7 +32,7 @@ |[OFI](https://ofiwg.github.io/libfabric/)|Open Fabrics Interfaces| |OS|Operating System| |PM|Persistent Memory| -|[PMDK](https://github.com/pmem/pmdk)|Persistent Memory Devevelopment Kit| +|[PMDK](https://github.com/daos-stack/pmdk)|Persistent Memory Devevelopment Kit| |[RAFT](https://raft.github.io/)|Raft is a consensus algorithm used to distribute state transitions among DAOS server nodes.| |RAS|Reliability, Availability & Serviceability| |RDB|Replicated Database, containing pool metadata and maintained across DAOS servers using the Raft algorithm.| diff --git a/src/vos/README.md b/src/vos/README.md index db3c83c8229..5a65e7feed1 100644 --- a/src/vos/README.md +++ b/src/vos/README.md @@ -58,7 +58,7 @@ Please refer to the Blob I/O (BIO) module for mor Special care is taken when developing and modifying the VOS layer because any software bug could corrupt data structures in persistent memory. The VOS, therefore, checksums its persistent data structures despite the presence of hardware ECC. -The VOS provides a lightweight I/O stack fully in user space, leveraging the PMDK open-source libraries developed to support this programming model. +The VOS provides a lightweight I/O stack fully in user space, leveraging the PMDK open-source libraries developed to support this programming model. @@ -73,7 +73,7 @@ It is worth noting that such transactions are different from the DAOS transactio Persistent memory transactions must guarantee consistency of VOS internal data structures when processing incoming requests, regardless of their epoch number. Transactions over persistent memory can be implemented in many different ways, e.g., undo logs, redo logs, a combination of both, or copy-on-write. -PMDK is an open source collection of libraries for using persistent memory, optimized specifically for NVRAM. +PMDK is an open source collection of libraries for using persistent memory, optimized specifically for NVRAM. Among these is the libpmemobj library, which implements relocatable persistent heaps called persistent memory pools. This includes memory allocation, transactions, and general facilities for persistent memory programming. The transactions are local to one thread (not multi-threaded) and rely on undo logs. diff --git a/third_party_programs.txt b/third_party_programs.txt index a98b70907d6..3fd62a5416a 100644 --- a/third_party_programs.txt +++ b/third_party_programs.txt @@ -117,9 +117,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. Copyright (c) 2009,2014 Google Inc. All rights reserved. pmdk (BSD 3-clause "New" or "Revised" License) - https://github.com/pmem/pmdk - https://github.com/pmem/pmdk/blob/master/LICENSE - Copyright 2014-2020, Intel Corporation + https://github.com/daos-stack/pmdk + https://github.com/daos-stack/pmdk/blob/master/LICENSE + Copyright 2014-2024, Intel Corporation Portable Hardware Locality (hwloc) (BSD 3-clause "New" or "Revised" License) https://www.open-mpi.org/projects/hwloc/ diff --git a/utils/build.config b/utils/build.config index 42b49fcd125..76f5c0f6fcb 100644 --- a/utils/build.config +++ b/utils/build.config @@ -4,7 +4,7 @@ component=daos [commit_versions] argobots=v1.2 fused=v1.0.0 -pmdk=2.1.2 +pmdk=2.1.3 isal=v2.31.1 isal_crypto=v2.25.0 spdk=v24.09 diff --git a/utils/rpms/daos.changelog b/utils/rpms/daos.changelog index ac92201275b..16f420643ac 100644 --- a/utils/rpms/daos.changelog +++ b/utils/rpms/daos.changelog @@ -1,4 +1,7 @@ %changelog +* Wed Feb 18 2026 Oksana Salyk 2.7.104-2 +- Update PMDK to release 2.1.3 + * Tue Feb 10 2026 Dalton Bohning 2.7.104-1 - Bump version to 2.7.104 diff --git a/utils/rpms/daos.sh b/utils/rpms/daos.sh index 1171a079746..2db52a22eba 100755 --- a/utils/rpms/daos.sh +++ b/utils/rpms/daos.sh @@ -184,7 +184,7 @@ EOF EXTRA_OPTS+=("--rpm-attr" "2755,root,daos_server:${bindir}/daos_server") DEPENDS=( "daos = ${VERSION}-${RELEASE}" "daos-spdk = ${daos_spdk_full}" ) - DEPENDS+=( "${pmemobj_lib} >= ${pmdk_full}" "${argobots_lib} >= ${argobots_full}" ) + DEPENDS+=( "${pmemobj_lib} = ${pmdk_full}" "${argobots_lib} >= ${argobots_full}" ) DEPENDS+=( "${isal_crypto_lib} >= ${isal_crypto_version}" "numactl" "pciutils" ) build_package "daos-server" diff --git a/utils/rpms/daos.spec b/utils/rpms/daos.spec index b4b8af94452..bbba1cf3406 100644 --- a/utils/rpms/daos.spec +++ b/utils/rpms/daos.spec @@ -24,7 +24,7 @@ Name: daos Version: 2.7.104 -Release: 1%{?relval}%{?dist} +Release: 2%{?relval}%{?dist} Summary: DAOS Storage Engine License: BSD-2-Clause-Patent @@ -61,7 +61,7 @@ BuildRequires: libjson-c-devel BuildRequires: boost-devel %endif %if %{with server} -BuildRequires: libpmemobj-devel >= 2.1.0 +BuildRequires: libpmemobj-devel >= 2.1.3 %endif BuildRequires: fused-devel %if (0%{?suse_version} >= 1500) @@ -164,10 +164,10 @@ Requires: ndctl # needed to set PMem configuration goals in BIOS through control-plane %if (0%{?suse_version} >= 1500) Requires: ipmctl >= 03.00.00.0423 -Requires: libpmemobj1 >= 2.1.0-1.suse1500 +Requires: libpmemobj1 >= 2.1.3 %else Requires: ipmctl >= 03.00.00.0468 -Requires: libpmemobj >= 2.1.0-1%{?dist} +Requires: libpmemobj >= 2.1.3 %endif Requires: mercury >= %{mercury_version} Requires(post): /sbin/ldconfig diff --git a/utils/rpms/package_info.sh b/utils/rpms/package_info.sh index ac6356c3c72..ef3338124d4 100644 --- a/utils/rpms/package_info.sh +++ b/utils/rpms/package_info.sh @@ -50,7 +50,7 @@ export mercury_full="${mercury_version}-${mercury_release}" export argobots_version="1.2" export argobots_release="4${distro_name}" export argobots_full="${argobots_version}-${argobots_release}" -export pmdk_version="2.1.2" +export pmdk_version="2.1.3" export pmdk_release="1${distro_name}" export pmdk_full="${pmdk_version}-${pmdk_release}" export isal_version="2.31.1" @@ -104,9 +104,9 @@ set_lib_name mercury_libfabric lib mercury-libfabric mercury-libfabric mercury-l export mercury_libfabric_lib set_lib_name pmemobj lib libpmemobj libpmemobj1 libpmemobj1 -set_lib_name pmemobj dev libpmemobj libpmemobj1 libpmemobj1 +set_lib_name pmemobj dev libpmemobj libpmemobj libpmemobj set_lib_name pmem lib libpmem libpmem1 libpmem1 -set_lib_name pmem dev libpmem libpmem libpmem1 +set_lib_name pmem dev libpmem libpmem libpmem set_lib_name pmempool lib libpmempool libpmempool1 libpmempool1 export pmem_lib export pmem_dev diff --git a/utils/rpms/pmdk.changelog b/utils/rpms/pmdk.changelog index d4cd893efeb..0b6f288aa1f 100644 --- a/utils/rpms/pmdk.changelog +++ b/utils/rpms/pmdk.changelog @@ -1,6 +1,16 @@ %changelog +* Mon Jan 19 2026 Oksana Salyk - 2.1.3-1 +- Expand the sds.at_create CTL to disable unnecessary bad-block checking when running without PMem, preventing the stack overflow (DAOS-18296). +- Fix an issue in the PMEMOBJ allocator with a potential to corrupt the allocator's metadata (DAOS-18195). + * Wed Nov 05 2025 Tomasz Gromadzki - 2.1.2-1 -- Update to release 2.1.2 +- Expand the sds.at_create CTL to also cover pmemobj_open() (DAOS-17449) + - Previously, this CTL affected only pmemobj_create(). + - Now, it affects both pmemobj_create() and pmemobj_open(). + - pmemobj_open() won't be able to open a pool with SDS enabled if the feature is currently + force-disabled. + - Conversely, pmemobj_open() does not issue a warning when attempting to open a pool with SDS disabled + while the feature is force-disabled. * Fri Oct 31 2025 Tomasz Gromadzki - 2.1.0-7 - Restore the RPM changelog, which has not been available since version 2.1.0-4. @@ -21,6 +31,7 @@ * Mon Aug 11 2025 Jeff Olivier - 2.1.0-4 - Switch to fpm build for RPMs +- New location of the PMDK repository (https://github.com/daos-stack/pmdk) * Wed Nov 06 2024 Tomasz Gromadzki - 2.1.0-3 - Apply patches to silence annoying error messages on: diff --git a/utils/rpms/pmdk.sh b/utils/rpms/pmdk.sh index 11dc0490cc5..d26dbeee2ac 100755 --- a/utils/rpms/pmdk.sh +++ b/utils/rpms/pmdk.sh @@ -1,5 +1,6 @@ #!/bin/bash # (C) Copyright 2025 Google LLC +# Copyright 2026 Hewlett Packard Enterprise Development LP set -eEuo pipefail root="$(realpath "$(dirname "${BASH_SOURCE[0]}")")" . "${root}/fpm_common.sh" @@ -15,7 +16,7 @@ LICENSE="BSD-3-Clause" ARCH=${isa} DESCRIPTION="The Persistent Memory Development Kit is a collection of libraries for using memory-mapped persistence, optimized specifically for persistent memory." -URL="https://github.com/pmem/pmdk" +URL="https://github.com/daos-stack/pmdk" RPM_CHANGELOG="pmdk.changelog" files=() From e2042f11e25cbf1766aadb0c3c3cc2b303930215 Mon Sep 17 00:00:00 2001 From: Dalton Bohning Date: Fri, 6 Mar 2026 09:24:44 -0800 Subject: [PATCH 246/253] DAOS-18428 test: verify auto recovery policy (#17353) Verify 7 scenarios of auto recovery policy 1. System Creation 2. Disabling and Enabling Self-Heal 3. Online System Maintenance 4. Offline System Maintenance 5. Normal System Restart 6. Unexpected System Restart 7. Problematic Pools Signed-off-by: Dalton Bohning --- .../ftest/rebuild/auto_recovery_policy.py | 454 ++++++++++++++++++ .../ftest/rebuild/auto_recovery_policy.yaml | 28 ++ src/tests/ftest/util/dmg_utils.py | 42 ++ src/tests/ftest/util/dmg_utils_base.py | 46 +- src/tests/ftest/util/test_utils_pool.py | 31 +- 5 files changed, 597 insertions(+), 4 deletions(-) create mode 100644 src/tests/ftest/rebuild/auto_recovery_policy.py create mode 100644 src/tests/ftest/rebuild/auto_recovery_policy.yaml diff --git a/src/tests/ftest/rebuild/auto_recovery_policy.py b/src/tests/ftest/rebuild/auto_recovery_policy.py new file mode 100644 index 00000000000..c9e335948d0 --- /dev/null +++ b/src/tests/ftest/rebuild/auto_recovery_policy.py @@ -0,0 +1,454 @@ +""" + (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP + + SPDX-License-Identifier: BSD-2-Clause-Patent +""" +import re +import time +from functools import partial + +from apricot import TestWithServers +from data_utils import assert_val_in_list +from general_utils import list_to_str + + +class RbldAutoRecoveryPolicy(TestWithServers): + """Rebuild test cases related to Auto Recovery Policies. + + :avocado: recursive + """ + + def test_rebuild_auto_recovery_policy(self): + """Jira ID: DAOS-17420. + + Test Description: Verify Rebuild Auto Recovery Policy + + Scenario 1: System Creation and default self_heal. + Scenario 2: Disabling and Enabling Self-Heal. + Scenario 3: Online System Maintenance. + Scenario 4: Offline System Maintenance. + Scenario 5: Normal System Restart. + Scenario 6: Unexpected System Restart. + Scenario 7: Problematic Pools. + + See each corresponding _verify_scenario_X method for detailed steps. + + :avocado: tags=all,full_regression + :avocado: tags=vm + :avocado: tags=pool,rebuild,self_heal + :avocado: tags=RbldAutoRecoveryPolicy,test_rebuild_auto_recovery_policy + """ + self.log_step('Setup pool') + pool = self.get_pool(connect=False) + + # Run just the scenarios requested, or all by default + total_scenarios = 0 + scenarios_passed = 0 + scenarios_to_verify = set(self.params.get('scenarios_to_verify', '/run/test/*', ['all'])) + for method in filter(lambda x: '_verify_scenario_' in x, dir(self)): + total_scenarios += 1 + scenario_number = int(method.split('_')[-1]) + if scenario_number not in scenarios_to_verify and 'all' not in scenarios_to_verify: + self.log.warning('Skipping scenario %s', scenario_number) + continue + self.log.info('Running scenario %s', scenario_number) + scenario_method = getattr(self, method) + scenario_method(pool) + scenarios_passed += 1 + + self.log_step('Destroy pool') + pool.destroy() + + self.log_step(f'Test passed on {scenarios_passed}/{total_scenarios} scenarios') + + def _verify_scenario_1(self, pool): + """Scenario 1: System Creation and default self_heal. + + Verify the default self_heal properties at the system and pool level. + + See self.log_step() calls for test steps. + + Args: + pool (TestPool): The pool to use + """ + dmg = self.get_dmg_command() + + self.log_step('Scenario 1 - Verify default system self_heal policy') + response = dmg.system_get_prop(properties='self_heal')['response'] + actual_value = response[0]['value'] + expected_value = 'exclude;pool_exclude;pool_rebuild' + if actual_value != expected_value: + self.fail( + f'Expected system self_heal policy to be {expected_value}, ' + f'but got {actual_value}') + + self.log_step('Scenario 1 - Verify default pool self_heal policy') + response = pool.get_prop(name='self_heal')['response'] + actual_value = response[0]['value'] + expected_value = 'exclude;rebuild' + if actual_value != expected_value: + self.fail( + f'Expected pool self_heal policy to be {expected_value}, ' + f'but got {actual_value}') + + def _verify_scenario_2(self, pool): + """Scenario 2: Disabling and Enabling Self-Heal. + + Verify disabling self_heal prevents exclusions and rebuilds. + Verify enabling self_heal allows exclusions and rebuilds. + + See self.log_step() calls for test steps. + + Args: + pool (TestPool): The pool to use + """ + dmg = self.get_dmg_command() + + # Get 2 distinct sets of ranks to stop + all_ranks = list(self.server_managers[0].ranks.keys()) + ranks_x = sorted(self.random.sample(all_ranks, k=1)) + ranks_y = sorted(self.random.sample(list(set(all_ranks) - set(ranks_x)), k=1)) + + self.log_step('Scenario 2 - Disable system self_heal') + dmg.system_set_prop('self_heal:none') + + self.log_step('Scenario 2 - Stop a rank and verify it is not excluded') + dmg.system_stop(ranks=ranks_x) + self.server_managers[0].update_expected_states(ranks_x, 'stopped') + self._wait_detection_delay() + self._verify_rank_state(ranks_x, 'stopped') + + self.log_step( + 'Scenario 2 - Enable system self_heal and invoke dmg system self-heal eval') + dmg.system_set_prop('self_heal:exclude;pool_exclude;pool_rebuild') + dmg.system_self_heal_eval() + self.server_managers[0].update_expected_states(ranks_x, ['stopped', 'excluded']) + + self.log_step('Scenario 2 - Verify ranks are excluded and rebuilt in the pool') + self._verify_rank_state(ranks_x, 'excluded') + pool.wait_for_rebuild_to_start(interval=1) + pool.wait_for_rebuild_to_end(interval=3) + + self.log_step( + 'Scenario 2 - Stop another rank and verify it is excluded and rebuilt in the pool') + dmg.system_stop(ranks=ranks_y) + self.server_managers[0].update_expected_states(ranks_y, ['stopped', 'excluded']) + pool.wait_for_rebuild_to_start(interval=1) + pool.wait_for_rebuild_to_end(interval=3) + self._verify_rank_state(ranks_y, 'excluded') + + self.log_step( + 'Scenario 2 - Reintegrate stopped ranks to bring system back to original state') + stopped_ranks_str = list_to_str(ranks_x + ranks_y) + dmg.system_start(stopped_ranks_str) + dmg.system_reintegrate(stopped_ranks_str) + self.server_managers[0].update_expected_states(ranks_x + ranks_y, ['joined']) + pool.wait_for_rebuild_to_start(interval=1) + pool.wait_for_rebuild_to_end(interval=3) + self._verify_rank_state(all_ranks, 'joined') + + def _verify_scenario_3(self, pool): + """Scenario 3: Online System Maintenance. + + Verify self_heal can be set such that ranks are excluded but not rebuilt. + + See self.log_step() calls for test steps. + + Args: + pool (TestPool): The pool to use + """ + dmg = self.get_dmg_command() + + # Get a random rank to stop + all_ranks = list(self.server_managers[0].ranks.keys()) + ranks_x = sorted(self.random.sample(all_ranks, k=1)) + + self.log_step('Scenario 3 - Set system.self_heal.pool_rebuild = disabled') + dmg.system_set_prop('self_heal:exclude;pool_exclude') + dmg.system_get_prop(properties='self_heal') + + self.log_step('Scenario 3 - Stop a rank and verify it is excluded without rebuild') + dmg.system_stop(ranks=ranks_x) + self.server_managers[0].update_expected_states(ranks_x, ['stopped', 'excluded']) + self._wait_detection_delay() + self._verify_rank_state(ranks_x, 'excluded') + pool.verify_query({ + 'disabled_ranks': ranks_x, + 'rebuild': { + 'state': partial(assert_val_in_list, allowed_list=['done', 'idle'])}}) + # Targets should be down but not down_out + pool.verify_query_targets_state(ranks_x, 'down') + + self.log_step('Scenario 3 - Restart the rank and make sure it rejoins') + dmg.system_start(ranks=ranks_x) + self.server_managers[0].update_expected_states(ranks_x, ['joined']) + self._verify_rank_state(all_ranks, 'joined', tries=5, delay=3) + + self.log_step('Scenario 3 - Reintegrate the rank and wait for rebuild') + dmg.system_reintegrate(list_to_str(ranks_x)) + self.server_managers[0].update_expected_states(ranks_x, ['joined']) + pool.wait_for_rebuild_to_start(interval=1) + pool.wait_for_rebuild_to_end(interval=3) + + # The pool version changes after exclusion, + # but should not changed after resetting self_heal + self.log.info('Save current pool version') + pool_version = pool.query()['response']['version'] + + self.log_step('Scenario 3 - Reset system self_heal to default') + dmg.system_set_prop('self_heal:exclude;pool_exclude;pool_rebuild') + + self.log_step('Scenario 3 - Verify dmg system self-heal eval does not trigger rebuild') + dmg.system_self_heal_eval() + self._wait_detection_delay() + pool.verify_query({ + 'disabled_ranks': [], + 'rebuild': { + 'state': partial(assert_val_in_list, allowed_list=['done', 'idle'])}, + 'version': pool_version}) + + def _verify_scenario_4(self, pool): + """Scenario 4: Offline System Maintenance. + + Verify disabling self_heal prevents exclusions even when the ranks restart. + + See self.log_step() calls for test steps. + + Args: + pool (TestPool): The pool to use + """ + dmg = self.get_dmg_command() + + # Get a list of all ranks + all_ranks = list(self.server_managers[0].ranks.keys()) + + self.log_step('Scenario 4 - Disable system self_heal') + dmg.system_set_prop('self_heal:none') + + # We expect the pool version to stay the same through this scenario since + # there are no exclusions or rebuilds + self.log.info('Save current pool version') + pool_version = pool.query()['response']['version'] + + self.log_step( + 'Scenario 4 - Stop more ranks than the pool RF and verify there are no exclusions') + pool_rf = int(re.findall(r'rd_fac:([0-9]+)', pool.properties.value)[0]) + self.assertGreater( + len(all_ranks), pool_rf, 'Not enough ranks to stop more than pool RF') + ranks_over_rf = sorted(self.random.sample(all_ranks, k=pool_rf + 1)) + dmg.system_stop(ranks=list_to_str(ranks_over_rf)) + self.server_managers[0].update_expected_states(ranks_over_rf, ['stopped']) + self._wait_detection_delay() + self._verify_rank_state(ranks_over_rf, 'stopped') + + self.log_step('Scenario 4 - Restart the stopped ranks and make sure they rejoin') + dmg.system_start(ranks=list_to_str(ranks_over_rf)) + self.server_managers[0].update_expected_states(ranks_over_rf, ['joined']) + self._verify_rank_state(all_ranks, 'joined', tries=5, delay=3) + + self.log_step('Scenario 4 - Reset system self_heal to default') + dmg.system_set_prop('self_heal:exclude;pool_exclude;pool_rebuild') + + self.log_step('Scenario 4 - Verify dmg system self-heal eval does not trigger rebuild') + dmg.system_self_heal_eval() + self._wait_detection_delay() + pool.verify_query({ + 'disabled_ranks': [], + 'rebuild': { + 'state': partial(assert_val_in_list, allowed_list=['done', 'idle'])}, + 'version': pool_version}) + + def _verify_scenario_5(self, pool): + """Scenario 5: Normal System Restart. + + Verify disabling self_heal prevents exclusions even when the system restarts. + + See self.log_step() calls for test steps. + + Args: + pool (TestPool): The pool to use + """ + dmg = self.get_dmg_command() + + # Get a list of all ranks + all_ranks = list(self.server_managers[0].ranks.keys()) + + self.log_step('Scenario 5 - Disable system self_heal') + dmg.system_set_prop('self_heal:none') + + # We expect the pool version to stay the same through this scenario since + # there are no exclusions or rebuilds + self.log.info('Save current pool version') + pool_version = pool.query()['response']['version'] + + self.log_step('Scenario 5 - Stop the system and verify no ranks are excluded') + dmg.system_stop() + self.server_managers[0].update_expected_states(all_ranks, ['stopped']) + self._wait_detection_delay() + self._verify_rank_state(all_ranks, 'stopped') + + self.log_step('Scenario 5 - Restart the system and make sure all ranks rejoin') + dmg.system_start() + self.server_managers[0].update_expected_states(all_ranks, ['joined']) + self._verify_rank_state(all_ranks, 'joined', tries=5, delay=3) + + self.log_step('Scenario 5 - Reset system self_heal to default') + dmg.system_set_prop('self_heal:exclude;pool_exclude;pool_rebuild') + + self.log_step('Scenario 5 - Verify dmg system self-heal eval does not trigger rebuild') + dmg.system_self_heal_eval() + self._wait_detection_delay() + pool.verify_query({ + 'disabled_ranks': [], + 'rebuild': { + 'state': partial(assert_val_in_list, allowed_list=['done', 'idle'])}, + 'version': pool_version}) + + def _verify_scenario_6(self, pool): + """Scenario 6: Unexpected System Restart. + + Verify disabling self_heal immediately after unexpected system restart prevents exclusions. + + See self.log_step() calls for test steps. + + Args: + pool (TestPool): The pool to use + """ + dmg = self.get_dmg_command() + + # Get a list of all ranks except 1 + all_ranks = list(self.server_managers[0].ranks.keys()) + all_ranks_minus_1 = sorted(self.random.sample(all_ranks, k=len(all_ranks) - 1)) + + self.log_step('Scenario 6 - Simulate restart with dmg system stop') + # We expect the pool version to stay the same through this scenario since + # there are no exclusions or rebuilds + self.log.info('Save current pool version') + pool_version = pool.query()['response']['version'] + dmg.system_stop() + self.server_managers[0].update_expected_states(all_ranks, ['stopped']) + + self.log_step('Scenario 6 - Start all but 1 rank and immediately disable self-heal') + dmg.system_start(ranks=list_to_str(all_ranks_minus_1)) + self.server_managers[0].update_expected_states(all_ranks_minus_1, ['joined']) + dmg.system_set_prop('self_heal:none') + + self.log_step('Scenario 6 - Verify all but 1 rank rejoins') + self._verify_rank_state(all_ranks_minus_1, 'joined', tries=5, delay=3) + + self.log_step('Scenario 6 - Restart the last rank and make sure it rejoins') + dmg.system_start() + self.server_managers[0].update_expected_states(all_ranks, ['joined']) + self._verify_rank_state(all_ranks, 'joined', tries=5, delay=3) + + self.log_step('Scenario 6 - Reset system self_heal to default') + dmg.system_set_prop('self_heal:exclude;pool_exclude;pool_rebuild') + + self.log_step('Scenario 6 - Verify dmg system self-heal eval does not trigger rebuild') + dmg.system_self_heal_eval() + self._wait_detection_delay() + pool.verify_query({ + 'disabled_ranks': [], + 'rebuild': { + 'state': partial(assert_val_in_list, allowed_list=['done', 'idle'])}, + 'version': pool_version}) + + def _verify_scenario_7(self, pool): + """Scenario 7: Problematic Pools. + + Verify disabling self_heal on specific pools prevents exclusions and rebuilds for + those pools only. + + See self.log_step() calls for test steps. + + Args: + pool (TestPool): The pool to use + """ + dmg = self.get_dmg_command() + + # Get a random rank to stop + all_ranks = list(self.server_managers[0].ranks.keys()) + ranks_x = sorted(self.random.sample(all_ranks, k=1)) + + self.log_step('Scenario 7 - Create a second pool') + pool2 = self.get_pool(connect=False) + + self.log_step('Scenario 7 - Disable self_heal rebuild on just the second pool') + pool2.set_prop('self_heal:exclude') + pool2.query() + + self.log_step('Scenario 7 - Stop a rank and wait for the detection delay') + dmg.system_stop(ranks=ranks_x) + self.server_managers[0].update_expected_states(ranks_x, ['stopped', 'excluded']) + self._wait_detection_delay() + + self.log_step( + 'Scenario 7 - Verify the rank is excluded and rebuilds in first pool only') + self._verify_rank_state(ranks_x, 'excluded') + pool.wait_for_rebuild_to_start(interval=1) + pool.wait_for_rebuild_to_end(interval=3) + pool.verify_query({ + 'disabled_ranks': ranks_x, + 'rebuild': { + 'state': 'done'}}) + self.log_step( + 'Scenario 7 - Verify the rank is excluded and does not rebuild in second pool') + pool2.verify_query({ + 'disabled_ranks': ranks_x, + 'rebuild': { + 'state': partial(assert_val_in_list, allowed_list=['done', 'idle'])}}) + # Targets should be down but not down_out + pool2.verify_query_targets_state(ranks_x, 'down') + + self.log_step( + 'Scenario 7 - Reintegrate stopped ranks to bring system back to original state') + stopped_ranks_str = list_to_str(ranks_x) + dmg.system_start(stopped_ranks_str) + dmg.system_reintegrate(stopped_ranks_str) + self.server_managers[0].update_expected_states(ranks_x, ['joined']) + pool.wait_for_rebuild_to_start(interval=1) + pool.wait_for_rebuild_to_end(interval=3) + self._verify_rank_state(all_ranks, 'joined') + + self.log_step('Scenario 7 - Destroy second pool') + pool2.destroy() + + def _verify_rank_state(self, ranks, state, tries=1, delay=3): + """Verify the state of the given ranks. + + Args: + ranks (list): The list of ranks to verify. + state (str): The expected state of the ranks. + tries (int, optional): Number of attempts to verify the state. Defaults to 1. + delay (int, optional): Delay between attempts in seconds. Defaults to 3. + """ + for current_try in range(tries): + current_state = self.server_managers[0].get_current_state() + + # All ranks are in expected state + if set(current_state[rank]['state'] for rank in ranks) == {state}: + return + + # Retry + if current_try < tries - 1: + self.log.info( + 'Not all ranks are in expected state %s. Retrying in %s seconds...', + state, delay) + time.sleep(delay) + continue + + # Final attempt failed + for rank in ranks: + if current_state[rank]['state'] != state: + self.fail( + f'Expected rank {rank} to be in state {state}, ' + f'but current state is {current_state[rank]["state"]}') + + def _wait_detection_delay(self): + """Wait for the detection delay.""" + # The detection delay shall be a couple of SWIM periods (1s) + SWIM suspicion timeout (20s) + # + CRT_EVENT_DELAY (1s) + some margin of error (?) + # This is difficult to calculate so set to 30 based on current environment + detection_delay = 30 + self.log.info('Waiting for detection delay of %s seconds', detection_delay) + time.sleep(detection_delay) diff --git a/src/tests/ftest/rebuild/auto_recovery_policy.yaml b/src/tests/ftest/rebuild/auto_recovery_policy.yaml new file mode 100644 index 00000000000..98499991195 --- /dev/null +++ b/src/tests/ftest/rebuild/auto_recovery_policy.yaml @@ -0,0 +1,28 @@ +hosts: + test_servers: 7 + test_clients: 1 + +timeout: 900 + +server_config: + name: daos_server + engines_per_host: 1 + engines: + 0: + targets: 4 + nr_xs_helpers: 0 + storage: + 0: + class: ram + scm_mount: /mnt/daos + system_ram_reserved: 1 + +pool: + size: 10G + properties: rd_fac:2 + pool_query_timeout: 30 + register_cleanup: False # if something goes wrong, this will likely timeout + +test: + scenarios_to_verify: + - all diff --git a/src/tests/ftest/util/dmg_utils.py b/src/tests/ftest/util/dmg_utils.py index 88d4b26784c..10e56a616a1 100644 --- a/src/tests/ftest/util/dmg_utils.py +++ b/src/tests/ftest/util/dmg_utils.py @@ -1157,6 +1157,21 @@ def system_exclude(self, ranks=None, rank_hosts=None): return self._get_json_result( ("system", "exclude"), ranks=ranks, rank_hosts=rank_hosts) + def system_get_prop(self, properties=None): + """Call dmg system get-prop. + + Args: + properties (str, optional): Comma separated properties to get. + + Raises: + CommandFailure: if the command fails. + + Returns: + dict: the dmg json command output converted to a python dictionary + + """ + return self._get_json_result(("system", "get-prop"), properties=properties) + def system_leader_query(self): """Call dmg system leader-query. @@ -1268,6 +1283,33 @@ def system_rebuild_stop(self, verbose=False, force=False): return self._get_json_result( ("system", "rebuild", "stop"), verbose=verbose, force=force) + def system_self_heal_eval(self): + """Call dmg system self-heal eval. + + Raises: + CommandFailure: if the command fails. + + Returns: + dict: the dmg json command output converted to a python dictionary + + """ + return self._get_json_result(("system", "self-heal", "eval")) + + def system_set_prop(self, properties=None): + """Call dmg system set-prop. + + Args: + properties (str): properties in the form of key:val[,key:val...] + + Raises: + CommandFailure: if the command fails. + + Returns: + dict: the dmg json command output converted to a python dictionary + + """ + return self._get_json_result(("system", "set-prop"), properties=properties) + def system_start(self, ranks=None, ignore_admin_excluded=False): """Start the system. diff --git a/src/tests/ftest/util/dmg_utils_base.py b/src/tests/ftest/util/dmg_utils_base.py index be13a06c107..96ddc889625 100644 --- a/src/tests/ftest/util/dmg_utils_base.py +++ b/src/tests/ftest/util/dmg_utils_base.py @@ -1,6 +1,6 @@ """ (C) Copyright 2020-2024 Intel Corporation. - (C) Copyright 2025 Hewlett Packard Enterprise Development LP + (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -885,6 +885,8 @@ def get_sub_command_class(self): self.sub_command_class = self.EraseSubCommand() elif self.sub_command.value == "exclude": self.sub_command_class = self.ExcludeSubCommand() + elif self.sub_command.value == "get-prop": + self.sub_command_class = self.GetPropSubCommand() elif self.sub_command.value == "leader-query": self.sub_command_class = self.LeaderQuerySubCommand() elif self.sub_command.value == "list-pools": @@ -895,6 +897,10 @@ def get_sub_command_class(self): self.sub_command_class = self.RebuildSubCommand() elif self.sub_command.value == "reintegrate": self.sub_command_class = self.ReintegrateSubCommand() + elif self.sub_command.value == "self-heal": + self.sub_command_class = self.SelfHealSubCommand() + elif self.sub_command.value == "set-prop": + self.sub_command_class = self.SetPropSubCommand() elif self.sub_command.value == "start": self.sub_command_class = self.StartSubCommand() elif self.sub_command.value == "stop": @@ -945,6 +951,14 @@ def __init__(self): self.ranks = FormattedParameter("--ranks={}") self.rank_hosts = FormattedParameter("--rank-hosts={}") + class GetPropSubCommand(CommandWithParameters): + """Defines an object for the dmg system get-prop command.""" + + def __init__(self): + """Create a dmg system get-prop command object.""" + super().__init__("/run/dmg/system/get-prop/*", "get-prop") + self.properties = BasicParameter(None, position=1) + class LeaderQuerySubCommand(CommandWithParameters): """Defines an object for the dmg system leader-query command.""" @@ -1011,6 +1025,36 @@ def __init__(self): self.verbose = FormattedParameter("--verbose", False) self.force = FormattedParameter("--force", False) + class SelfHealSubCommand(CommandWithSubCommand): + """Defines an object for the dmg system self-heal command.""" + + def __init__(self): + """Create a dmg system self-heal command object.""" + super().__init__("/run/dmg/system/self-heal/*", "self-heal") + + def get_sub_command_class(self): + # pylint: disable=redefined-variable-type + """Get the dmg system sub command object.""" + if self.sub_command.value == "eval": + self.sub_command_class = self.EvalSubCommand() + else: + self.sub_command_class = None + + class EvalSubCommand(CommandWithParameters): + """Defines an object for the dmg system self-heal eval command.""" + + def __init__(self): + """Create a dmg system self-heal eval command object.""" + super().__init__("/run/dmg/system/self-heal/eval/*", "eval") + + class SetPropSubCommand(CommandWithParameters): + """Defines an object for the dmg system set-prop command.""" + + def __init__(self): + """Create a dmg system set-prop command object.""" + super().__init__("/run/dmg/system/set-prop/*", "set-prop") + self.properties = BasicParameter(None, position=1) + class StartSubCommand(CommandWithParameters): """Defines an object for the dmg system start command.""" diff --git a/src/tests/ftest/util/test_utils_pool.py b/src/tests/ftest/util/test_utils_pool.py index 591c4fe59ae..36539943abc 100644 --- a/src/tests/ftest/util/test_utils_pool.py +++ b/src/tests/ftest/util/test_utils_pool.py @@ -96,8 +96,9 @@ def add_pool(test, namespace=POOL_NAMESPACE, create=True, connect=True, dmg=None # Add a step to remove this pool when the test completes and ensure their is enough time for the # pool destroy to be attempted - accounting for a possible dmg command timeout - test.increment_timeout(POOL_TIMEOUT_INCREMENT) - test.register_cleanup(remove_pool, test=test, pool=pool) + if pool.register_cleanup.value is True: + test.increment_timeout(POOL_TIMEOUT_INCREMENT) + test.register_cleanup(remove_pool, test=test, pool=pool) return pool @@ -306,6 +307,8 @@ def __init__(self, context, dmg_command, label_generator=None, namespace=POOL_NA # Parameter to control running 'dmg storage query usage --show_usable' if pool create fails self.query_on_create_error = BasicParameter(None, False) + self.register_cleanup = BasicParameter(True, True) # call register_cleanup by default + self.pool = None self.info = None self.svc_ranks = None @@ -724,7 +727,7 @@ def set_prop(self, *args, **kwargs): dict: json output of dmg pool set-prop command """ - return self.dmg.pool_set_prop(pool=self.identifier, *args, **kwargs) + return self.dmg.pool_set_prop(self.identifier, *args, **kwargs) @fail_on(CommandFailure) def get_prop(self, *args, **kwargs): @@ -1623,3 +1626,25 @@ def verify_query(self, expected_response, use_cached_query=False): response = self.query_data['response'] assert_dict_subset(expected_response, response) + + def verify_query_targets_state(self, ranks, expected_target_state): + """Verify all targets are in the expected state with dmg pool query-targets. + + Args: + ranks (list): The list of ranks to verify. + expected_target_state (str): The expected target state. + + Raises: + AssertionError: if the targets are not in the expected state + + """ + for rank in ranks: + self.log.info( + 'Verifying targets on rank %s are in state %s', rank, expected_target_state) + response = self.query_targets(rank=rank)['response'] + infos = response['Infos'] + for target, info in enumerate(infos): + if info['target_state'] != expected_target_state: + raise AssertionError( + f'Expected target {target} to be in state {expected_target_state}, ' + f'but current state is {info["target_state"]}') From bcd7fd96bd033528f9dafc5f79fc607307e87e7c Mon Sep 17 00:00:00 2001 From: Joseph Moore <26410038+jgmoore-or@users.noreply.github.com> Date: Mon, 9 Mar 2026 11:46:04 -0600 Subject: [PATCH 247/253] DAOS-18614 cart: Fix UCX provider init for re-init of daos client. (#17593) Signed-off-by: Joseph Moore --- src/cart/crt_init.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/cart/crt_init.c b/src/cart/crt_init.c index 626ace92221..1b926465ac3 100644 --- a/src/cart/crt_init.c +++ b/src/cart/crt_init.c @@ -451,8 +451,13 @@ crt_str_to_provider(const char *str_provider) if (len > strlen(CRT_UCX_STR) && strchr(str_provider, '+')) { D_STRNDUP(p, str_provider, len); if (!p) { + /* Return provider unknown if allocation fails. */ return prov; } else { + /* Store the default UCX provider string in the alt_str + * to allow it to be restored if finalize is called. + */ + crt_na_dict[i].nad_alt_str = crt_na_dict[i].nad_str; crt_na_dict[i].nad_str = p; crt_na_dict[i].nad_str_alloc = true; } @@ -1003,8 +1008,12 @@ crt_finalize(void) } for (i = 0; crt_na_dict[i].nad_str != NULL; i++) - if (crt_na_dict[i].nad_str_alloc) + if (crt_na_dict[i].nad_str_alloc) { D_FREE(crt_na_dict[i].nad_str); + crt_na_dict[i].nad_str = crt_na_dict[i].nad_alt_str; + crt_na_dict[i].nad_alt_str = NULL; + crt_na_dict[i].nad_str_alloc = false; + } D_FREE(crt_gdata.cg_secondary_provs); D_FREE(crt_gdata.cg_prov_gdata_secondary); From 4e7feb2487be1109c50e3a98a20189ab0159d898 Mon Sep 17 00:00:00 2001 From: Tom Nabarro Date: Tue, 10 Mar 2026 12:38:50 +0000 Subject: [PATCH 248/253] DAOS-18606 control: Avoid NVMe driver unbind in VMD if blocklisted (#17612) Signed-off-by: Tom Nabarro --- src/control/server/storage/bdev/backend.go | 13 ++++++++++--- .../server/storage/bdev/backend_test.go | 19 ++++--------------- src/control/server/storage/bdev/runner.go | 3 +++ 3 files changed, 17 insertions(+), 18 deletions(-) diff --git a/src/control/server/storage/bdev/backend.go b/src/control/server/storage/bdev/backend.go index d1fd8e56f07..0921f56d3e8 100644 --- a/src/control/server/storage/bdev/backend.go +++ b/src/control/server/storage/bdev/backend.go @@ -1,6 +1,6 @@ // // (C) Copyright 2019-2023 Intel Corporation. -// (C) Copyright 2025 Hewlett Packard Enterprise Development LP +// (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP // (C) Copyright 2025 Google LLC // // SPDX-License-Identifier: BSD-2-Clause-Patent @@ -320,8 +320,15 @@ func (sb *spdkBackend) prepare(req storage.BdevPrepareRequest, vmdDetect vmdDete // // Applies block (not allow) list if VMD is configured so specific NVMe devices can // be reserved for other use (bdev_exclude). - if err := sb.script.Unbind(&req); err != nil { - return resp, errors.Wrap(err, "un-binding devices") + // + // NOTE DAOS-18606: There may be a bug in SPDK setup script that results in backing + // device addresses being unbound despite relevant VMD address + // being supplied in blocklist. As a workaround, skip unbind in VMD + // mode if blocklist populated. + if req.PCIBlockList == "" { + if err := sb.script.Unbind(&req); err != nil { + return resp, errors.Wrap(err, "un-binding devices") + } } } else { if err := sb.script.Reset(&req); err != nil { diff --git a/src/control/server/storage/bdev/backend_test.go b/src/control/server/storage/bdev/backend_test.go index 2a3d5d7dcad..24934d9db62 100644 --- a/src/control/server/storage/bdev/backend_test.go +++ b/src/control/server/storage/bdev/backend_test.go @@ -1,6 +1,6 @@ // // (C) Copyright 2018-2022 Intel Corporation. -// (C) Copyright 2025 Hewlett Packard Enterprise Development LP +// (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP // (C) Copyright 2025 Google LLC // // SPDX-License-Identifier: BSD-2-Clause-Patent @@ -1112,6 +1112,7 @@ func TestBackend_prepare_reset(t *testing.T) { { Env: []string{ fmt.Sprintf("PATH=%s", os.Getenv("PATH")), + fmt.Sprintf("%s=%s", pciAllowListEnv, mockAddrList(1, 2)), fmt.Sprintf("%s=%s", driverOverrideEnv, noDriver), }, }, @@ -1174,6 +1175,7 @@ func TestBackend_prepare_reset(t *testing.T) { Env: []string{ fmt.Sprintf("PATH=%s", os.Getenv("PATH")), fmt.Sprintf("%s=%s", driverOverrideEnv, noDriver), + fmt.Sprintf("%s=%s", pciAllowListEnv, mockAddrList(3)), }, }, { @@ -1198,13 +1200,6 @@ func TestBackend_prepare_reset(t *testing.T) { }, vmdDetectRet: mockAddrList(3, 5), expScriptCalls: []scriptCall{ - { - Env: []string{ - fmt.Sprintf("PATH=%s", os.Getenv("PATH")), - fmt.Sprintf("%s=%s", driverOverrideEnv, noDriver), - fmt.Sprintf("%s=%s", pciBlockListEnv, mockAddrList(4)), - }, - }, { Env: []string{ fmt.Sprintf("PATH=%s", os.Getenv("PATH")), @@ -1245,6 +1240,7 @@ func TestBackend_prepare_reset(t *testing.T) { }, }, }, + // Populated blocklist results in unbind operation being skipped. "prepare setup; vmd enabled; vmd devices allowed and blocked": { req: storage.BdevPrepareRequest{ HugepageCount: testNrHugepages, @@ -1255,13 +1251,6 @@ func TestBackend_prepare_reset(t *testing.T) { }, vmdDetectRet: mockAddrList(3, 2), expScriptCalls: []scriptCall{ - { - Env: []string{ - fmt.Sprintf("PATH=%s", os.Getenv("PATH")), - fmt.Sprintf("%s=%s", driverOverrideEnv, noDriver), - fmt.Sprintf("%s=%s", pciBlockListEnv, mockAddrList(4)), - }, - }, { Env: []string{ fmt.Sprintf("PATH=%s", os.Getenv("PATH")), diff --git a/src/control/server/storage/bdev/runner.go b/src/control/server/storage/bdev/runner.go index 79a2207adf8..350af544c17 100644 --- a/src/control/server/storage/bdev/runner.go +++ b/src/control/server/storage/bdev/runner.go @@ -1,5 +1,6 @@ // // (C) Copyright 2019-2022 Intel Corporation. +// (C) Copyright 2026 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -131,6 +132,7 @@ func (s *spdkSetupScript) Prepare(req *storage.BdevPrepareRequest) error { func (s *spdkSetupScript) Unbind(req *storage.BdevPrepareRequest) error { s.env = map[string]string{ "PATH": os.Getenv("PATH"), + pciAllowListEnv: req.PCIAllowList, pciBlockListEnv: req.PCIBlockList, driverOverrideEnv: noDriver, } @@ -144,6 +146,7 @@ func (s *spdkSetupScript) Unbind(req *storage.BdevPrepareRequest) error { // is not set, otherwise PCI devices can be specified by passing in a allow list of PCI addresses. // // NOTE: will make the controller reappear in /dev. +// TODO DAOS-18606: Should allowlist not be sent so all non-blocked devices get bound back to kernel? func (s *spdkSetupScript) Reset(req *storage.BdevPrepareRequest) error { s.env = map[string]string{ "PATH": os.Getenv("PATH"), From c79c123ad5a9911edc3dd90368dfd0f1ad3fb51c Mon Sep 17 00:00:00 2001 From: Makito Kano Date: Tue, 10 Mar 2026 22:15:25 +0900 Subject: [PATCH 249/253] DAOS-18389 test: recovery/ddb.py test_recovery_ddb_rm MD-on-SSD support (#17631) Move the test from ddb_pmem.py to ddb.py and add MD-on-SSD support. Add position=1 to -w (self.write_mode) so that it's added immediately after ddb. Signed-off-by: Makito Kano --- src/tests/ftest/recovery/ddb.py | 183 +++++++++++++++++++++++++++ src/tests/ftest/recovery/ddb_pmem.py | 148 ---------------------- src/tests/ftest/util/ddb_utils.py | 16 +-- 3 files changed, 191 insertions(+), 156 deletions(-) diff --git a/src/tests/ftest/recovery/ddb.py b/src/tests/ftest/recovery/ddb.py index 00c5e7c8266..21ac91230b4 100644 --- a/src/tests/ftest/recovery/ddb.py +++ b/src/tests/ftest/recovery/ddb.py @@ -301,3 +301,186 @@ def test_recovery_ddb_ls(self): container.open() report_errors(test=self, errors=errors) + + def test_recovery_ddb_rm(self): + """Test ddb rm. + + 1. Create a pool and a container. Insert objects, dkeys, and akeys. + 2. Stop the server to use ddb. + 3. Find the vos file name. e.g., /mnt/daos0//vos-0. + 4. Call ddb rm to remove the akey. + 5. Restart the server to use the API. + 6. Reset the object, container, and pool to use the API after server restart. + 7. Call list_akey() in pydaos API to verify that the akey was removed. + 8. Stop the server to use ddb. + 9. Call ddb rm to remove the dkey. + 10. Restart the server to use the API. + 11. Reset the object, container, and pool to use the API after server restart. + 12. Call list_dkey() in pydaos API to verify that the dkey was removed. + 13. Stop the server to use ddb. + 14. Call ddb rm to remove the object. + 15. Restart the server to use daos command. + 16. Reset the container and pool so that cleanup works. + 17. Call "daos container list-objects " to verify that the object was + removed. + + :avocado: tags=all,full_regression + :avocado: tags=hw,medium + :avocado: tags=recovery + :avocado: tags=DdbTest,ddb_cmd,test_recovery_ddb_rm + """ + # This is where we load pool for MD-on-SSD. It's called tmpfs_mount in ddb prov_mem + # documentation, but use daos_load_path here for clarity. + daos_load_path = "/mnt/daos_load" + md_on_ssd = self.server_managers[0].manager.job.using_control_metadata + if md_on_ssd: + self.log_step("MD-on-SSD: Create a directory to load pool data under /mnt.") + self.run_cmd_check_result(command=f"mkdir {daos_load_path}") + + self.log_step("Create a pool and a container.") + pool = self.get_pool(connect=True) + container = self.get_container(pool) + + if md_on_ssd: + vos_path = '""' + else: + # Find the vos file name. e.g., /mnt/daos0//vos-0. + vos_paths = self.server_managers[0].get_vos_files(pool) + if not vos_paths: + self.fail("vos file wasn't found!") + vos_path = vos_paths[0] + + ddb_command = DdbCommand( + server_host=self.server_managers[0].hosts[0:1], path=self.bin, vos_path=vos_path) + + self.log_step("Insert one object with one dkey and one akey with API.") + obj_dataset = insert_objects( + context=self.context, container=container, object_count=1, dkey_count=1, akey_count=2, + base_dkey=self.random_dkey, base_akey=self.random_akey, base_data=self.random_data) + ioreqs = obj_dataset[0] + dkeys_inserted = obj_dataset[1] + akeys_inserted = obj_dataset[2] + + # For debugging/reference, check that the dkey and the akey we just inserted are returned + # from the API. + akeys_api = ioreqs[0].list_akey(dkey=dkeys_inserted[0]) + self.log.info("akeys from API (before) = %s", akeys_api) + dkeys_api = ioreqs[0].list_dkey() + self.log.info("dkeys from API (before) = %s", dkeys_api) + + # For debugging/reference, check that the object was inserted using daos command. + list_obj_out = self.get_daos_command().container_list_objects( + pool=pool.identifier, cont=container.uuid) + self.log.info("Object list (before) = %s", list_obj_out["response"]) + + self.log_step("Stop the server to use ddb.") + dmg_command = self.get_dmg_command() + dmg_command.system_stop() + + db_path = None + if md_on_ssd: + self.log_step(f"MD-on-SSD: Load pool dir to {daos_load_path}") + db_path = os.path.join( + self.server_managers[0].manager.job.yaml.metadata_params.path.value, "daos_control", + "engine0") + ddb_command.prov_mem(db_path=db_path, tmpfs_mount=daos_load_path) + + self.log_step("Call ddb rm to remove the akey.") + if md_on_ssd: + # "ddb rm" command for MD-on-SSD is quite different. + # PMEM: ddb /mnt/daos//vos-0 rm + # MD-on-SSD: ddb -w --db_path=/var/tmp/daos_testing/control_metadata/daos_control + # /engine0 --vos_path /mnt/daos_load//vos-0 rm + ddb_command.db_path.update(value=" ".join(["--db_path", db_path])) + ddb_command.vos_path.update( + value=os.path.join(daos_load_path, pool.uuid.lower(), "vos-0")) + cmd_result = ddb_command.remove_component(component_path="[0]/[0]/[0]/[0]") + self.log.info("rm akey stdout = %s", cmd_result.joined_stdout) + + self.log_step("Restart the server to use the API.") + dmg_command.system_start() + + self.log_step("Reset the object, container, and pool to use the API after server restart.") + ioreqs[0].obj.close() + container.close() + pool.disconnect() + pool.connect() + container.open() + ioreqs[0].obj.open() + + self.log_step("Call list_akey() in pydaos API to verify that the akey was removed.") + akeys_api = ioreqs[0].list_akey(dkey=dkeys_inserted[0]) + self.log.info("akeys from API (after) = %s", akeys_api) + errors = [] + expected_len = len(akeys_inserted) - 1 + actual_len = len(akeys_api) + if actual_len != expected_len: + msg = (f"Unexpected number of akeys after ddb rm! Expected = {expected_len}; " + f"Actual = {actual_len}") + errors.append(msg) + + self.log_step("Stop the server to use ddb.") + dmg_command.system_stop() + + self.log_step("Call ddb rm to remove the dkey.") + cmd_result = ddb_command.remove_component(component_path="[0]/[0]/[0]") + self.log.info("rm dkey stdout = %s", cmd_result.joined_stdout) + + self.log_step("Restart the server to use the API.") + dmg_command.system_start() + + self.log_step("Reset the object, container, and pool to use the API after server restart.") + ioreqs[0].obj.close() + container.close() + pool.disconnect() + pool.connect() + container.open() + ioreqs[0].obj.open() + + self.log_step("Call list_dkey() in pydaos API to verify that the dkey was removed.") + dkeys_api = ioreqs[0].list_dkey() + self.log.info("dkeys from API (after) = %s", dkeys_api) + expected_len = len(dkeys_inserted) - 1 + actual_len = len(dkeys_api) + if actual_len != expected_len: + msg = (f"Unexpected number of dkeys after ddb rm! Expected = {expected_len}; " + f"Actual = {actual_len}") + errors.append(msg) + + self.log_step("Stop the server to use ddb.") + dmg_command.system_stop() + + self.log_step("Call ddb rm to remove the object.") + cmd_result = ddb_command.remove_component(component_path="[0]/[0]") + self.log.info("rm object stdout = %s", cmd_result.joined_stdout) + + self.log_step("Restart the server to use daos command.") + dmg_command.system_start() + + self.log_step("Reset the container and pool so that cleanup works.") + container.close() + pool.disconnect() + pool.connect() + container.open() + + self.log_step("Call daos container list-objects to verify that the object was removed.") + list_obj_out = self.get_daos_command().container_list_objects( + pool=pool.identifier, cont=container.uuid) + obj_list = list_obj_out["response"] + self.log.info("Object list (after) = %s", obj_list) + expected_len = len(ioreqs) - 1 + if obj_list: + actual_len = len(obj_list) + else: + actual_len = 0 + if actual_len != expected_len: + msg = (f"Unexpected number of objects after ddb rm! Expected = {expected_len}; " + f"Actual = {actual_len}") + errors.append(msg) + + if md_on_ssd: + self.log_step(f"MD-on-SSD: Clean {daos_load_path}") + self.run_cmd_check_result(command=f"umount {daos_load_path}") + self.run_cmd_check_result(command=f"rm -rf {daos_load_path}") + + report_errors(test=self, errors=errors) diff --git a/src/tests/ftest/recovery/ddb_pmem.py b/src/tests/ftest/recovery/ddb_pmem.py index 509bcfeeba4..5c3f5f80df2 100644 --- a/src/tests/ftest/recovery/ddb_pmem.py +++ b/src/tests/ftest/recovery/ddb_pmem.py @@ -117,154 +117,6 @@ def __init__(self, *args, **kwargs): self.random_akey = get_random_string(10) self.random_data = get_random_string(10) - def test_recovery_ddb_rm(self): - """Test rm. - - 1. Create a pool and a container. Insert objects, dkeys, and akeys. - 2. Stop the server to use ddb. - 3. Find the vos file name. e.g., /mnt/daos0//vos-0. - 4. Call ddb rm to remove the akey. - 5. Restart the server to use the API. - 6. Reset the object, container, and pool to use the API after server restart. - 7. Call list_akey() in pydaos API to verify that the akey was removed. - 8. Stop the server to use ddb. - 9. Call ddb rm to remove the dkey. - 10. Restart the server to use the API. - 11. Reset the object, container, and pool to use the API after server restart. - 12. Call list_dkey() in pydaos API to verify that the dkey was removed. - 13. Stop the server to use ddb. - 14. Call ddb rm to remove the object. - 15. Restart the server to use daos command. - 16. Reset the container and pool so that cleanup works. - 17. Call "daos container list-objects " to verify that the object was - removed. - - :avocado: tags=all,full_regression - :avocado: tags=vm - :avocado: tags=recovery - :avocado: tags=DdbPMEMTest,ddb_cmd,test_recovery_ddb_rm - """ - self.log_step("Create a pool and a container. Insert objects, dkeys, and akeys.") - pool = self.get_pool(connect=True) - container = self.get_container(pool) - - # Insert one object with one dkey and one akey with API. - obj_dataset = insert_objects( - context=self.context, container=container, object_count=1, dkey_count=1, akey_count=2, - base_dkey=self.random_dkey, base_akey=self.random_akey, base_data=self.random_data) - ioreqs = obj_dataset[0] - dkeys_inserted = obj_dataset[1] - akeys_inserted = obj_dataset[2] - - # For debugging/reference, check that the dkey and the akey we just inserted are returned - # from the API. - akeys_api = ioreqs[0].list_akey(dkey=dkeys_inserted[0]) - self.log.info("akeys from API (before) = %s", akeys_api) - dkeys_api = ioreqs[0].list_dkey() - self.log.info("dkeys from API (before) = %s", dkeys_api) - - # For debugging/reference, check that the object was inserted using daos command. - list_obj_out = self.get_daos_command().container_list_objects( - pool=pool.identifier, cont=container.uuid) - self.log.info("Object list (before) = %s", list_obj_out["response"]) - - self.log_step("Stop the server to use ddb.") - dmg_command = self.get_dmg_command() - dmg_command.system_stop() - - self.log_step("Find the vos file name.") - vos_paths = self.server_managers[0].get_vos_files(pool) - if not vos_paths: - self.fail("vos file wasn't found!") - ddb_command = DdbCommand(self.server_managers[0].hosts[0:1], self.bin, vos_paths[0]) - - self.log_step("Call ddb rm to remove the akey.") - cmd_result = ddb_command.remove_component(component_path="[0]/[0]/[0]/[0]") - self.log.info("rm akey stdout = %s", cmd_result.joined_stdout) - - self.log_step("Restart the server to use the API.") - dmg_command.system_start() - - self.log_step("Reset the object, container, and pool to use the API after server restart.") - ioreqs[0].obj.close() - container.close() - pool.disconnect() - pool.connect() - container.open() - ioreqs[0].obj.open() - - self.log_step("Call list_akey() in pydaos API to verify that the akey was removed.") - akeys_api = ioreqs[0].list_akey(dkey=dkeys_inserted[0]) - self.log.info("akeys from API (after) = %s", akeys_api) - errors = [] - expected_len = len(akeys_inserted) - 1 - actual_len = len(akeys_api) - if actual_len != expected_len: - msg = (f"Unexpected number of akeys after ddb rm! Expected = {expected_len}; " - f"Actual = {actual_len}") - errors.append(msg) - - self.log_step("Stop the server to use ddb.") - dmg_command.system_stop() - - self.log_step("Call ddb rm to remove the dkey.") - cmd_result = ddb_command.remove_component(component_path="[0]/[0]/[0]") - self.log.info("rm dkey stdout = %s", cmd_result.joined_stdout) - - self.log_step("Restart the server to use the API.") - dmg_command.system_start() - - self.log_step("Reset the object, container, and pool to use the API after server restart.") - ioreqs[0].obj.close() - container.close() - pool.disconnect() - pool.connect() - container.open() - ioreqs[0].obj.open() - - self.log_step("Call list_dkey() in pydaos API to verify that the dkey was removed.") - dkeys_api = ioreqs[0].list_dkey() - self.log.info("dkeys from API (after) = %s", dkeys_api) - expected_len = len(dkeys_inserted) - 1 - actual_len = len(dkeys_api) - if actual_len != expected_len: - msg = (f"Unexpected number of dkeys after ddb rm! Expected = {expected_len}; " - f"Actual = {actual_len}") - errors.append(msg) - - self.log_step("Stop the server to use ddb.") - dmg_command.system_stop() - - self.log_step("Call ddb rm to remove the object.") - cmd_result = ddb_command.remove_component(component_path="[0]/[0]") - self.log.info("rm object stdout = %s", cmd_result.joined_stdout) - - self.log_step("Restart the server to use daos command.") - dmg_command.system_start() - - self.log_step("Reset the container and pool so that cleanup works.") - container.close() - pool.disconnect() - pool.connect() - container.open() - - self.log_step("Call daos container list-objects to verify that the object was removed.") - list_obj_out = self.get_daos_command().container_list_objects( - pool=pool.identifier, cont=container.uuid) - obj_list = list_obj_out["response"] - self.log.info("Object list (after) = %s", obj_list) - expected_len = len(ioreqs) - 1 - if obj_list: - actual_len = len(obj_list) - else: - actual_len = 0 - if actual_len != expected_len: - msg = (f"Unexpected number of objects after ddb rm! Expected = {expected_len}; " - f"Actual = {actual_len}") - errors.append(msg) - - report_errors(test=self, errors=errors) - def test_recovery_ddb_load(self): """Test ddb value_load. diff --git a/src/tests/ftest/util/ddb_utils.py b/src/tests/ftest/util/ddb_utils.py index a2736c82026..beea3092606 100644 --- a/src/tests/ftest/util/ddb_utils.py +++ b/src/tests/ftest/util/ddb_utils.py @@ -32,16 +32,16 @@ def __init__(self, server_host, path, verbose=True, timeout=None, sudo=True): self.host = server_host # Write mode that's necessary for the commands that alters the data such as load. - self.write_mode = FormattedParameter("-w", default=False) + self.write_mode = FormattedParameter("-w", default=False, position=1) # Path to the system database. Used for MD-on-SSD. - self.db_path = BasicParameter(None, position=1) + self.db_path = BasicParameter(None, position=2) # VOS file path. - self.vos_path = FormattedParameter("--vos_path {}", position=2) + self.vos_path = FormattedParameter("--vos_path {}", position=3) # Command to run on the VOS file that contains container, object info, etc. - self.single_command = BasicParameter(None, position=3) + self.single_command = BasicParameter(None, position=4) # Members needed for run(). self.verbose = verbose @@ -95,7 +95,7 @@ def __init__(self, server_host, path, vos_path): self.vos_path.update(vos_path, "vos_path") def list_component(self, component_path=None): - """Call ddb -R "ls " + """Call ddb ls ls is similar to the Linux ls command. It lists objects inside the container, dkeys inside the object, and so on. @@ -163,11 +163,11 @@ def value_load(self, component_path, load_file_path): return self.run() def remove_component(self, component_path): - """Call ddb -w -R "rm " + """Call ddb -w rm Args: - component_path (str): Component that comes after rm. e.g., [0]/[1] for first - container, second object. + component_path (str): Component that comes after rm. e.g., [0]/[1] for first container, + second object. Returns: CommandResult: groups of command results from the same hosts with the same return status From c9472de8dda6ae732b5e71b58cb8690060398286 Mon Sep 17 00:00:00 2001 From: Jan Michalski Date: Tue, 10 Mar 2026 17:01:40 +0000 Subject: [PATCH 250/253] DAOS-16964 ddb: read-only mode requires enabling Copy-on-Write for... (#17533) ... PMEMOBJ pools PMEMOBJ maintains its own metadata. Copy-on-Write prevents these changes from taking effect so read-only mode will be truly read-only. Also removing the `mlock()` workaround because: - PMEM + Copy-on-Write + `mlock()` leads to increased memory usage, since the entire pool is pulled into RAM when it is opened. Where the `mlock()` serves no role whatsoever. - `mlock()` has been unnecessary for quite some time. It was originally added to work around a cryptic issue observed when using libfabric with the verbs provider and performing direct RDMA writes into pool memory. Direct RDMA writes to pool memory are no longer used, so the workaround is obsolete. For details please see the ticket to get the complete paper trail. Signed-off-by: Jan Michalski --- src/placement/tests/place_obj_common.h | 4 +- src/placement/tests/placement_test.c | 2 + src/utils/ddb/ddb_commands.c | 5 +- src/utils/ddb/ddb_main.c | 3 +- src/utils/ddb/ddb_vos.c | 37 +++++++++- src/utils/ddb/ddb_vos.h | 3 +- src/utils/ddb/tests/ddb_commands_tests.c | 2 +- src/utils/ddb/tests/ddb_main_tests.c | 3 +- src/utils/ddb/tests/ddb_vos_tests.c | 94 ++++++++++++++++++++++-- src/vos/vos_internal.h | 4 +- src/vos/vos_pool.c | 70 ------------------ 11 files changed, 138 insertions(+), 89 deletions(-) diff --git a/src/placement/tests/place_obj_common.h b/src/placement/tests/place_obj_common.h index 537fbe963a2..34abd403b77 100644 --- a/src/placement/tests/place_obj_common.h +++ b/src/placement/tests/place_obj_common.h @@ -1,11 +1,9 @@ /** * (C) Copyright 2016-2023 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ -#define D_LOGFAC DD_FAC(tests) - #ifndef __PL_MAP_COMMON_H__ #define __PL_MAP_COMMON_H__ diff --git a/src/placement/tests/placement_test.c b/src/placement/tests/placement_test.c index 1649fdd7fa1..c86d0e10ec9 100644 --- a/src/placement/tests/placement_test.c +++ b/src/placement/tests/placement_test.c @@ -1,9 +1,11 @@ /** * (C) Copyright 2021-2023 Intel Corporation. + * (C) Copyright 2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent * */ +#define D_LOGFAC DD_FAC(tests) #include #include "place_obj_common.h" diff --git a/src/utils/ddb/ddb_commands.c b/src/utils/ddb/ddb_commands.c index 04cdd4e41ce..77553fe2bd2 100644 --- a/src/utils/ddb/ddb_commands.c +++ b/src/utils/ddb/ddb_commands.c @@ -83,7 +83,7 @@ ddb_run_open(struct ddb_ctx *ctx, struct open_options *opt) DDB_POOL_SHOULD_CLOSE(ctx); ctx->dc_write_mode = opt->write_mode; - return dv_pool_open(opt->path, opt->db_path, &ctx->dc_poh, 0); + return dv_pool_open(opt->path, opt->db_path, &ctx->dc_poh, 0, ctx->dc_write_mode); } int @@ -1101,7 +1101,8 @@ ddb_run_feature(struct ddb_ctx *ctx, struct feature_options *opt) if (!opt->db_path || strnlen(opt->db_path, PATH_MAX) == 0) opt->db_path = ctx->dc_db_path; - rc = dv_pool_open(opt->path, opt->db_path, &ctx->dc_poh, VOS_POF_FOR_FEATURE_FLAG); + rc = dv_pool_open(opt->path, opt->db_path, &ctx->dc_poh, VOS_POF_FOR_FEATURE_FLAG, + ctx->dc_write_mode); if (rc) return rc; close = true; diff --git a/src/utils/ddb/ddb_main.c b/src/utils/ddb/ddb_main.c index 8b66a4b17d5..050ae9d9176 100644 --- a/src/utils/ddb/ddb_main.c +++ b/src/utils/ddb/ddb_main.c @@ -252,7 +252,8 @@ ddb_main(struct ddb_io_ft *io_ft, int argc, char *argv[]) if (!SUCCESS(rc)) D_GOTO(done, rc); if (open) { - rc = dv_pool_open(pa.pa_pool_path, pa.pa_db_path, &ctx.dc_poh, 0); + rc = + dv_pool_open(pa.pa_pool_path, pa.pa_db_path, &ctx.dc_poh, 0, ctx.dc_write_mode); if (!SUCCESS(rc)) D_GOTO(done, rc); } diff --git a/src/utils/ddb/ddb_vos.c b/src/utils/ddb/ddb_vos.c index 7d4409a36c2..4ee22fbe099 100644 --- a/src/utils/ddb/ddb_vos.c +++ b/src/utils/ddb/ddb_vos.c @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -27,9 +28,11 @@ anchors, cb, NULL, args, NULL) int -dv_pool_open(const char *path, const char *db_path, daos_handle_t *poh, uint32_t flags) +dv_pool_open(const char *path, const char *db_path, daos_handle_t *poh, uint32_t flags, + bool write_mode) { struct vos_file_parts path_parts = {0}; + int cow_val; int rc; /* @@ -47,11 +50,34 @@ dv_pool_open(const char *path, const char *db_path, daos_handle_t *poh, uint32_t strncpy(path_parts.vf_db_path, db_path, sizeof(path_parts.vf_db_path) - 1); } + /** + * When the user requests read‑only mode (write_mode == false), DDB itself will not attempt + * to modify the pool. However, PMEMOBJ performs several operations that do modify the pool + * during open and/or close, for example: + * - Internal bookkeeping required to ensure resilience in case of an ADR failure (SDS). + * - ULOG replay, which restores the pool to a consistent state. + * These mechanisms cannot be disabled because they are essential for PMEMOBJ to maintain + * the consistency of the pool. + * + * However, since none of these changes need to be persisted when the pool is opened in + * read‑only mode (write_mode == false), we can work around this by mapping the pool using + * copy‑on‑write. Copy‑on‑write allows pages to be read normally, but when a page is + * modified, a new private copy is allocated. As a result, any changes made to + * the mapped memory do not propagate to the persistent medium. + */ + if (!write_mode) { + cow_val = 1; + rc = pmemobj_ctl_set(NULL, "copy_on_write.at_open", &cow_val); + if (rc != 0) { + return daos_errno2der(errno); + } + } + rc = vos_self_init(path_parts.vf_db_path, true, path_parts.vf_target_idx); if (!SUCCESS(rc)) { D_ERROR("Failed to initialize VOS with path '%s': "DF_RC"\n", path_parts.vf_db_path, DP_RC(rc)); - return rc; + goto exit; } rc = vos_pool_open(path, path_parts.vf_pool_uuid, flags, poh); @@ -60,6 +86,13 @@ dv_pool_open(const char *path, const char *db_path, daos_handle_t *poh, uint32_t vos_self_fini(); } +exit: + if (!write_mode) { + /** Restore the default value. */ + cow_val = 0; + pmemobj_ctl_set(NULL, "copy_on_write.at_open", &cow_val); + } + return rc; } diff --git a/src/utils/ddb/ddb_vos.h b/src/utils/ddb/ddb_vos.h index 7d1da8900fc..675c4ceb33b 100644 --- a/src/utils/ddb/ddb_vos.h +++ b/src/utils/ddb/ddb_vos.h @@ -52,7 +52,8 @@ struct ddb_array { /* Open and close a pool for a ddb_ctx */ int - dv_pool_open(const char *path, const char *db_path, daos_handle_t *poh, uint32_t flags); + dv_pool_open(const char *path, const char *db_path, daos_handle_t *poh, uint32_t flags, + bool write_mode); int dv_pool_close(daos_handle_t poh); int dv_pool_destroy(const char *path, const char *db_path); diff --git a/src/utils/ddb/tests/ddb_commands_tests.c b/src/utils/ddb/tests/ddb_commands_tests.c index 35f6ff7299e..81bc9673dfe 100644 --- a/src/utils/ddb/tests/ddb_commands_tests.c +++ b/src/utils/ddb/tests/ddb_commands_tests.c @@ -583,7 +583,7 @@ dcv_suit_setup(void **state) /* test setup creates the pool, but doesn't open it ... leave it open for these tests */ tctx = *state; - assert_success(dv_pool_open(tctx->dvt_pmem_file, NULL, &tctx->dvt_poh, 0)); + assert_success(dv_pool_open(tctx->dvt_pmem_file, NULL, &tctx->dvt_poh, 0, true)); g_ctx.dc_poh = tctx->dvt_poh; diff --git a/src/utils/ddb/tests/ddb_main_tests.c b/src/utils/ddb/tests/ddb_main_tests.c index 533f64fb422..35a100cf063 100644 --- a/src/utils/ddb/tests/ddb_main_tests.c +++ b/src/utils/ddb/tests/ddb_main_tests.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2022-2024 Intel Corporation. + * (C) Copyright 2026 Hewlett Packard Enterprise Development LP * (C) Copyright 2025 Vdura Inc. * * SPDX-License-Identifier: BSD-2-Clause-Patent @@ -242,7 +243,7 @@ ddb_main_suit_setup(void **state) /* test setup creates the pool, but doesn't open it ... leave it open for these tests */ tctx = *state; - assert_success(dv_pool_open(tctx->dvt_pmem_file, NULL, &tctx->dvt_poh, 0)); + assert_success(dv_pool_open(tctx->dvt_pmem_file, NULL, &tctx->dvt_poh, 0, true)); return 0; } diff --git a/src/utils/ddb/tests/ddb_vos_tests.c b/src/utils/ddb/tests/ddb_vos_tests.c index 2812a775763..f5eb354642e 100644 --- a/src/utils/ddb/tests/ddb_vos_tests.c +++ b/src/utils/ddb/tests/ddb_vos_tests.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2022-2024 Intel Corporation. + * (C) Copyright 2026 Hewlett Packard Enterprise Development LP * (C) Copyright 2025 Vdura Inc. * * SPDX-License-Identifier: BSD-2-Clause-Patent @@ -13,6 +14,8 @@ #include "ddb_cmocka.h" #include "ddb_test_driver.h" +#include "../../placement/tests/place_obj_common.h" + /* * The tests in this file depend on a VOS instance with a bunch of data written. The tests will * verify that different parts of the VOS tree can be navigated/iterated. The way the @@ -182,13 +185,13 @@ open_pool_test(void **state) daos_handle_t poh; struct dt_vos_pool_ctx *tctx = *state; - assert_rc_equal(-DER_INVAL, dv_pool_open("/bad/path", NULL, &poh, 0)); + assert_rc_equal(-DER_INVAL, dv_pool_open("/bad/path", NULL, &poh, 0, false)); - assert_success(dv_pool_open(tctx->dvt_pmem_file, NULL, &poh, 0)); + assert_success(dv_pool_open(tctx->dvt_pmem_file, NULL, &poh, 0, false)); assert_success(dv_pool_close(poh)); /* should be able to open again after closing */ - assert_success(dv_pool_open(tctx->dvt_pmem_file, NULL, &poh, 0)); + assert_success(dv_pool_open(tctx->dvt_pmem_file, NULL, &poh, 0, false)); assert_success(dv_pool_close(poh)); } @@ -1087,7 +1090,7 @@ dv_test_setup(void **state) active_entry_handler_called = 0; committed_entry_handler_called = 0; - assert_success(dv_pool_open(tctx->dvt_pmem_file, NULL, &tctx->dvt_poh, 0)); + assert_success(dv_pool_open(tctx->dvt_pmem_file, NULL, &tctx->dvt_poh, 0, true)); return 0; } @@ -1108,7 +1111,8 @@ pool_flags_tests(void **state) uint64_t compat_flags; uint64_t incompat_flags; - assert_success(dv_pool_open(tctx->dvt_pmem_file, NULL, &poh, VOS_POF_FOR_FEATURE_FLAG)); + assert_success( + dv_pool_open(tctx->dvt_pmem_file, NULL, &poh, VOS_POF_FOR_FEATURE_FLAG, true)); assert_success(dv_pool_get_flags(poh, &compat_flags, &incompat_flags)); assert(compat_flags == 0); assert(incompat_flags == 0); @@ -1120,6 +1124,84 @@ pool_flags_tests(void **state) assert_success(dv_pool_close(poh)); } +#define SHA256_DIGEST_LEN 64 + +struct file_state { + struct stat stat; + char digest[SHA256_DIGEST_LEN]; +}; + +#define FILE_STATE_PRE 0 +#define FILE_STATE_POST 1 + +/** + * Use sha256sum utility to get the sha256 digest of the file. + * + * \note sha256sum was used to avoid introducing libcrypto dependency. + */ +static void +sha256sum(const char *file, char digest[SHA256_DIGEST_LEN]) +{ + char cmd[1024]; + snprintf(cmd, sizeof(cmd), "sha256sum \"%s\"", file); + + FILE *fp = popen(cmd, "r"); + assert_non_null(fp); + + /** sha256sum prints: <64 hex chars> */ + assert_int_equal(fscanf(fp, "%" STR(SHA256_DIGEST_LEN) "s", digest), 1); + + pclose(fp); +} + +/** + * Simple sequence of operations: + * - stat + sha256sum + * - open + * - update a single value + * - close + * - stat + sha256sum + * + * \param[in] tctx Test context to get the pool name and access to the pool handle. + * \param[out] fs [0] state of the pool file at the beginning and [1] at the end. + * \param[in] write_mode Whether to open the pool in the write mode. + */ +static void +helper_stat_open_modify_close_stat(struct dt_vos_pool_ctx *tctx, struct file_state fs[2], + bool write_mode) +{ + const char *path = tctx->dvt_pmem_file; + + assert_int_equal(stat(path, &fs[FILE_STATE_PRE].stat), 0); + sha256sum(path, fs[FILE_STATE_PRE].digest); + + assert_success(dv_pool_open(path, NULL, &tctx->dvt_poh, 0, write_mode)); + update_value_to_modify_tests((void **)&tctx); + assert_success(dv_pool_close(tctx->dvt_poh)); + + assert_int_equal(stat(path, &fs[FILE_STATE_POST].stat), 0); + sha256sum(path, fs[FILE_STATE_POST].digest); +} + +static void +read_only_vs_write_mode_test(void **state) +{ + struct dt_vos_pool_ctx *tctx = *state; + struct file_state fs[2]; + + /** In read‑only mode, the pool contents remain unchanged, and its mtime stays the same. */ + helper_stat_open_modify_close_stat(tctx, fs, false /** read-only */); + assert_int_equal(fs[FILE_STATE_PRE].stat.st_mtime, fs[FILE_STATE_POST].stat.st_mtime); + assert_memory_equal(fs[FILE_STATE_PRE].digest, fs[FILE_STATE_PRE].digest, + SHA256_DIGEST_LEN); + + /** In write mode, the pool contents will change and its mtime will increase. */ + helper_stat_open_modify_close_stat(tctx, fs, true /** read-write */); + assert_true(fs[FILE_STATE_PRE].stat.st_mtime < fs[FILE_STATE_POST].stat.st_mtime); + assert_memory_not_equal(fs[FILE_STATE_PRE].digest, fs[FILE_STATE_POST].digest, + SHA256_DIGEST_LEN); +} + /* * All these tests use the same VOS tree that is created at suit_setup. Therefore, tests * that modify the state of the tree (delete, add, etc) should be run after all others. @@ -1149,6 +1231,8 @@ const struct CMUnitTest dv_test_cases[] = { TEST(dtx_abort_active_table), TEST(path_verify), {"pool_flag_update", pool_flags_tests, NULL, NULL}, + {"read_only_vs_write_mode", read_only_vs_write_mode_test, NULL, + NULL}, /* don't want this test to run with setup */ }; int diff --git a/src/vos/vos_internal.h b/src/vos/vos_internal.h index 40927b22d28..f7a00dd0918 100644 --- a/src/vos/vos_internal.h +++ b/src/vos/vos_internal.h @@ -316,9 +316,7 @@ struct vos_pool { /** memory attribute of the @vp_umm */ struct umem_attr vp_uma; /** memory class instance of the pool */ - struct umem_instance vp_umm; - /** Size of pool file */ - uint64_t vp_size; + struct umem_instance vp_umm; /** Features enabled for this pool */ uint64_t vp_feats; /** btr handle for the container table */ diff --git a/src/vos/vos_pool.c b/src/vos/vos_pool.c index 7d3f95142da..a6e06d6e026 100644 --- a/src/vos/vos_pool.c +++ b/src/vos/vos_pool.c @@ -1162,16 +1162,6 @@ pool_hop_free(struct d_ulink *hlink) if (daos_handle_is_valid(pool->vp_cont_th)) dbtree_close(pool->vp_cont_th); - if (pool->vp_size != 0) { - rc = munlock((void *)pool->vp_umm.umm_base, pool->vp_size); - if (rc != 0) - D_WARN("Failed to unlock pool memory at "DF_X64": errno=%d (%s)\n", - pool->vp_umm.umm_base, errno, strerror(errno)); - else - D_DEBUG(DB_MGMT, "Unlocked VOS pool memory: "DF_U64" bytes at "DF_X64"\n", - pool->vp_size, pool->vp_umm.umm_base); - } - if (pool->vp_uma.uma_pool) vos_pmemobj_close(pool->vp_uma.uma_pool); @@ -1608,65 +1598,6 @@ vos_pool_destroy(const char *path, uuid_t uuid) return vos_pool_destroy_ex(path, uuid, 0); } -enum { - /** Memory locking flag not initialized */ - LM_FLAG_UNINIT, - /** Memory locking disabled */ - LM_FLAG_DISABLED, - /** Memory locking enabled */ - LM_FLAG_ENABLED -}; - -static void -lock_pool_memory(struct vos_pool *pool) -{ - static int lock_mem = LM_FLAG_UNINIT; - struct rlimit rlim; - size_t lock_bytes; - int rc; - - if (lock_mem == LM_FLAG_UNINIT) { - rc = getrlimit(RLIMIT_MEMLOCK, &rlim); - if (rc != 0) { - D_WARN("getrlimit() failed; errno=%d (%s)\n", errno, strerror(errno)); - lock_mem = LM_FLAG_DISABLED; - return; - } - - if (rlim.rlim_cur != RLIM_INFINITY || rlim.rlim_max != RLIM_INFINITY) { - D_WARN("Infinite rlimit not detected, not locking VOS pool memory\n"); - lock_mem = LM_FLAG_DISABLED; - return; - } - - lock_mem = LM_FLAG_ENABLED; - } - - if (lock_mem == LM_FLAG_DISABLED) - return; - - /* - * Mlock may take several tens of seconds to complete when memory - * is tight, so mlock is skipped in current MD-on-SSD scenario. - */ - if (bio_nvme_configured(SMD_DEV_TYPE_META)) - return; - - lock_bytes = pool->vp_pool_df->pd_scm_sz; - rc = mlock((void *)pool->vp_umm.umm_base, lock_bytes); - if (rc != 0) { - D_WARN("Could not lock memory for VOS pool "DF_U64" bytes at "DF_X64 - "; errno=%d (%s)\n", lock_bytes, pool->vp_umm.umm_base, - errno, strerror(errno)); - return; - } - - /* Only save the size if the locking was successful */ - pool->vp_size = lock_bytes; - D_DEBUG(DB_MGMT, "Locking VOS pool in memory "DF_U64" bytes at "DF_X64"\n", pool->vp_size, - pool->vp_umm.umm_base); -} - static int pool_open_prep(uuid_t uuid, unsigned int flags, struct vos_pool **p_pool) { @@ -1816,7 +1747,6 @@ pool_open_post(struct umem_pool **p_ph, struct vos_pool_df *pool_df, unsigned in vos_space_sys_init(pool); /* Ensure GC is triggered after server restart */ gc_add_pool(pool); - lock_pool_memory(pool); out: DL_CDEBUG(rc != 0, DLOG_ERR, DB_MGMT, rc, From ead45179811319d0269f8558b0426923537ee988 Mon Sep 17 00:00:00 2001 From: Cedric Koch-Hofer <94527853+knard38@users.noreply.github.com> Date: Tue, 10 Mar 2026 18:04:28 +0100 Subject: [PATCH 251/253] DAOS-18645 ddb: fix incompatible linked library (#17634) Remove from ddb, link dependency with `libdaos_common.so` Signed-off-by: Cedric Koch-Hofer --- src/control/SConscript | 14 ++++++++++++++ src/control/cmd/daos/util.go | 5 ++++- src/control/lib/daos/api/api.go | 5 ++++- src/control/lib/daos/api/container.go | 4 +++- src/control/lib/daos/api/handle.go | 4 +++- src/control/lib/daos/api/libdaos.go | 5 ++++- src/control/lib/daos/api/system.go | 5 ++++- src/control/lib/daos/container.go | 4 +++- src/control/lib/daos/container_property.go | 6 ++++-- src/control/lib/daos/pool_property.go | 6 ++++-- 10 files changed, 47 insertions(+), 11 deletions(-) diff --git a/src/control/SConscript b/src/control/SConscript index 44720122a18..aa40e619d30 100644 --- a/src/control/SConscript +++ b/src/control/SConscript @@ -24,9 +24,23 @@ def get_build_tags(benv): tags.append("pprof") else: tags.append("release") + if is_server_build(benv): + print("Building server go binary: adding 'server' build tag") + tags.append("server") return f"-tags {','.join(tags)}" +def is_server_build(benv): + """Check if the go-lang binary being built is a server binary.""" + env = benv.get("ENV") + if env is None: + return False + ld_flags = env.get("CGO_LDFLAGS") + if ld_flags is None: + return False + return "-ldaos_common_pmem" in ld_flags + + def is_release_build(benv): "Check whether this build is for release." return benv.get("BUILD_TYPE") == "release" diff --git a/src/control/cmd/daos/util.go b/src/control/cmd/daos/util.go index ce57e4e3923..f3961f288a5 100644 --- a/src/control/cmd/daos/util.go +++ b/src/control/cmd/daos/util.go @@ -1,5 +1,6 @@ // // (C) Copyright 2021-2024 Intel Corporation. +// (C) Copyright 2026 Hewlett Packard Enterprise Development LP // (C) Copyright 2025 Google LLC // // SPDX-License-Identifier: BSD-2-Clause-Patent @@ -29,7 +30,9 @@ import ( // NB: There should only be one set of CFLAGS/LDFLAGS definitions // for the whole package! #cgo CFLAGS: -I${SRCDIR}/../../../utils -#cgo LDFLAGS: -lgurt -lcart -ldaos -ldaos_common -lduns -ldfs -luuid -ldaos_cmd_hdlrs +#cgo LDFLAGS: -lgurt -lcart -ldaos -lduns -ldfs -luuid -ldaos_cmd_hdlrs +#cgo !server LDFLAGS: -ldaos_common +#cgo server LDFLAGS: -ldaos_common_pmem #include "util.h" diff --git a/src/control/lib/daos/api/api.go b/src/control/lib/daos/api/api.go index add21b6da57..16664243522 100644 --- a/src/control/lib/daos/api/api.go +++ b/src/control/lib/daos/api/api.go @@ -1,5 +1,6 @@ // // (C) Copyright 2024 Intel Corporation. +// (C) Copyright 2026 Hewlett Packard Enterprise Development LP // (C) Copyright 2025 Google LLC // // SPDX-License-Identifier: BSD-2-Clause-Patent @@ -16,7 +17,9 @@ import ( /* #include -#cgo LDFLAGS: -lcart -lgurt -ldaos -ldaos_common +#cgo LDFLAGS: -lcart -lgurt -ldaos +#cgo !server LDFLAGS: -ldaos_common +#cgo server LDFLAGS: -ldaos_common_pmem */ import "C" diff --git a/src/control/lib/daos/api/container.go b/src/control/lib/daos/api/container.go index d43c1a40a5b..abb3e3b2291 100644 --- a/src/control/lib/daos/api/container.go +++ b/src/control/lib/daos/api/container.go @@ -1,5 +1,6 @@ // // (C) Copyright 2025 Google LLC +// (C) Copyright 2026 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -27,7 +28,8 @@ import ( #include "util.h" -#cgo LDFLAGS: -ldaos_common +#cgo !server LDFLAGS: -ldaos_common +#cgo server LDFLAGS: -ldaos_common_pmem */ import "C" diff --git a/src/control/lib/daos/api/handle.go b/src/control/lib/daos/api/handle.go index 34981c71ec6..34f4deb48f7 100644 --- a/src/control/lib/daos/api/handle.go +++ b/src/control/lib/daos/api/handle.go @@ -1,5 +1,6 @@ // // (C) Copyright 2025 Google LLC +// (C) Copyright 2026 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -19,7 +20,8 @@ import ( /* #include -#cgo LDFLAGS: -ldaos_common +#cgo !server LDFLAGS: -ldaos_common +#cgo server LDFLAGS: -ldaos_common_pmem */ import "C" diff --git a/src/control/lib/daos/api/libdaos.go b/src/control/lib/daos/api/libdaos.go index 58cf619f760..61d0e59db1e 100644 --- a/src/control/lib/daos/api/libdaos.go +++ b/src/control/lib/daos/api/libdaos.go @@ -1,5 +1,6 @@ // // (C) Copyright 2024 Intel Corporation. +// (C) Copyright 2026 Hewlett Packard Enterprise Development LP // (C) Copyright 2025 Google LLC // // SPDX-License-Identifier: BSD-2-Clause-Patent @@ -15,7 +16,9 @@ package api #include #include -#cgo LDFLAGS: -lcart -lgurt -ldaos -ldaos_common +#cgo LDFLAGS: -lcart -lgurt -ldaos +#cgo !server LDFLAGS: -ldaos_common +#cgo server LDFLAGS: -ldaos_common_pmem */ import "C" import "unsafe" diff --git a/src/control/lib/daos/api/system.go b/src/control/lib/daos/api/system.go index 73001363a25..6a631740bdf 100644 --- a/src/control/lib/daos/api/system.go +++ b/src/control/lib/daos/api/system.go @@ -1,5 +1,6 @@ // // (C) Copyright 2024 Intel Corporation. +// (C) Copyright 2026 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -19,7 +20,9 @@ import ( /* #include -#cgo LDFLAGS: -lcart -lgurt -ldaos -ldaos_common +#cgo LDFLAGS: -lcart -lgurt -ldaos +#cgo !server LDFLAGS: -ldaos_common +#cgo server LDFLAGS: -ldaos_common_pmem */ import "C" diff --git a/src/control/lib/daos/container.go b/src/control/lib/daos/container.go index a5c1bae6c42..fc3b5ece6f2 100644 --- a/src/control/lib/daos/container.go +++ b/src/control/lib/daos/container.go @@ -1,5 +1,6 @@ // // (C) Copyright 2024 Intel Corporation. +// (C) Copyright 2026 Hewlett Packard Enterprise Development LP // (C) Copyright 2025 Google LLC // // SPDX-License-Identifier: BSD-2-Clause-Patent @@ -22,7 +23,8 @@ import ( #include #include -#cgo LDFLAGS: -ldaos_common +#cgo !server LDFLAGS: -ldaos_common +#cgo server LDFLAGS: -ldaos_common_pmem */ import "C" diff --git a/src/control/lib/daos/container_property.go b/src/control/lib/daos/container_property.go index 057dc262f34..c4530223182 100644 --- a/src/control/lib/daos/container_property.go +++ b/src/control/lib/daos/container_property.go @@ -1,6 +1,6 @@ // // (C) Copyright 2021-2023 Intel Corporation. -// (C) Copyright 2025 Hewlett Packard Enterprise Development LP +// (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP // (C) Copyright 2025 Google LLC // // SPDX-License-Identifier: BSD-2-Clause-Patent @@ -46,7 +46,9 @@ daos_prop_co_status_val(uint32_t status, uint32_t flag, uint32_t ver) return DAOS_PROP_CO_STATUS_VAL(status, flag, ver); } -#cgo LDFLAGS: -ldaos_common -lgurt -lcart +#cgo LDFLAGS: -lgurt -lcart +#cgo !server LDFLAGS: -ldaos_common +#cgo server LDFLAGS: -ldaos_common_pmem */ import "C" diff --git a/src/control/lib/daos/pool_property.go b/src/control/lib/daos/pool_property.go index 36c7ae08e00..59d642e1dba 100644 --- a/src/control/lib/daos/pool_property.go +++ b/src/control/lib/daos/pool_property.go @@ -1,7 +1,7 @@ // // (C) Copyright 2021-2023 Intel Corporation. // (C) Copyright 2025 Google LLC -// (C) Copyright 2025 Hewlett Packard Enterprise Development LP +// (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -30,7 +30,9 @@ import ( #include #include -#cgo LDFLAGS: -ldaos_common -lgurt -lcart +#cgo LDFLAGS: -lgurt -lcart +#cgo !server LDFLAGS: -ldaos_common +#cgo server LDFLAGS: -ldaos_common_pmem */ import "C" From c2f11e68e6717799d365f71e11f86e76243cc4d9 Mon Sep 17 00:00:00 2001 From: Jan Michalski Date: Tue, 10 Mar 2026 19:37:20 +0000 Subject: [PATCH 252/253] DAOS-18597 bio: allow SPDK reinitialization (#17614) Explicitly reset the RPC state so SPDK can be reinitialized multiple times in the same process. Ref: spdk/spdk@fba209c7324a11b9230533144c02e7a66bc738ea Ref: daos-stack/daos#16774 Signed-off-by: Jan Michalski --- src/bio/bio_xstream.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/bio/bio_xstream.c b/src/bio/bio_xstream.c index 3fe1dbdb319..043ba121ece 100644 --- a/src/bio/bio_xstream.c +++ b/src/bio/bio_xstream.c @@ -1710,6 +1710,19 @@ bio_xsctxt_init_by_config(struct common_cp_arg *cp_arg) return -DER_NOMEM; } + /** + * Initially, this was called internally spdk_subsystem_load_config() -> ... -> + * spdk_rpc_initialize(). However, since commit + * https://github.com/spdk/spdk/commit/fba209c7324a11b9230533144c02e7a66bc738ea (>=v24.01) + * SPDK_RPC_STARTUP has become the initial value of the underlying global variable and it + * is no longer reset automatically. This makes no difference for applications that + * initialize SPDK only once during the lifetime of the process. But some BIO module + * consumers—such as DDB—expect to be able to initialize, finalize, and then reinitialize + * SPDK multiple times within the same process, for example when inspecting multiple pools + * sequentially. For those use cases, the RPC state must now be reset explicitly. + */ + spdk_rpc_set_state(SPDK_RPC_STARTUP); + D_ALLOC_PTR(init_arg); if (init_arg == NULL) { free(json_data); From 10f10c5f4d1fff7163b7b42741278ba03c53dbf1 Mon Sep 17 00:00:00 2001 From: Ryon Jensen Date: Wed, 11 Mar 2026 17:59:11 -0600 Subject: [PATCH 253/253] squashed changes --- Jenkinsfile | 743 ++---------------- ci/parse_ci_envs.sh | 6 +- .../post_provision_config_nodes_LEAP.sh | 19 + src/tests/ftest/container/list.yaml | 2 +- src/tests/ftest/launch.py | 28 +- src/tests/ftest/pool/destroy.yaml | 2 +- 6 files changed, 120 insertions(+), 680 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index d1b4d4abc1e..da1b6f9672c 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -18,7 +18,7 @@ // To use a test branch (i.e. PR) until it lands to master // I.e. for testing library changes -//@Library(value='pipeline-lib@your_branch') _ +@Library(value='pipeline-lib@ryon-jensen/sles-15sp7') _ /* groovylint-disable-next-line CompileStatic */ job_status_internal = [:] @@ -455,69 +455,36 @@ pipeline { } } } - stage('Check PR') { - when { changeRequest() } - parallel { - stage('Branch name check') { - when { changeRequest() } - steps { - script { - if (env.CHANGE_ID.toInteger() > 9742 && !env.CHANGE_BRANCH.contains('/')) { - error('Your PR branch name does not follow the rules. Please rename it ' + - 'according to the rules described here: ' + - 'https://daosio.atlassian.net/l/cp/UP1sPTvc#branch_names. ' + - 'Once you have renamed your branch locally to match the ' + - 'format, close this PR and open a new one using the newly renamed ' + - 'local branch.') - } - } - } - } - } // parallel - } // stage('Check PR') - stage('Cancel Previous Builds') { - when { - beforeAgent true - expression { !paramsValue('CI_CANCEL_PREV_BUILD_SKIP', false) && !skipStage() } - } - steps { - cancelPreviousBuilds() - } - } - stage('Pre-build') { - when { - beforeAgent true - expression { !skipStage() } - } - parallel { - stage('Python Bandit check') { - when { - beforeAgent true - expression { !skipStage() } - } - agent { - dockerfile { - filename 'utils/docker/Dockerfile.code_scanning' - label 'docker_runner' - additionalBuildArgs dockerBuildArgs(add_repos: false) + - ' --build-arg FVERSION=37' - } - } - steps { - job_step_update(pythonBanditCheck()) - } - post { - always { - // Bandit will have empty results if it does not - // find any issues. - junit testResults: 'bandit.xml', - allowEmptyResults: true - job_status_update() - } - } - } // stage('Python Bandit check') - } - } +// stage('Check PR') { +// when { changeRequest() } +// parallel { +// stage('Branch name check') { +// when { changeRequest() } +// steps { +// script { +// if (env.CHANGE_ID.toInteger() > 9742 && !env.CHANGE_BRANCH.contains('/')) { +// error('Your PR branch name does not follow the rules. Please rename it ' + +// 'according to the rules described here: ' + +// 'https://daosio.atlassian.net/l/cp/UP1sPTvc#branch_names. ' + +// 'Once you have renamed your branch locally to match the ' + +// 'format, close this PR and open a new one using the newly renamed ' + +// 'local branch.') +// } +// } +// } +// } +// } // parallel +// } // stage('Check PR') +// stage('Cancel Previous Builds') { +// when { +// beforeAgent true +// expression { !paramsValue('CI_CANCEL_PREV_BUILD_SKIP', false) && !skipStage() } +// } +// steps { +// cancelPreviousBuilds() +// } +// } + stage('Build') { /* Don't use failFast here as whilst it avoids using extra resources * and gives faster results for PRs it's also on for master where we @@ -529,111 +496,6 @@ pipeline { expression { !skip_build_stage() } } parallel { - stage('Build on EL 8.8') { - when { - beforeAgent true - expression { !skip_build_stage('el8') } - } - agent { - dockerfile { - filename 'utils/docker/Dockerfile.el.8' - label 'docker_runner' - additionalBuildArgs dockerBuildArgs(repo_type: 'stable', - deps_build: false, - parallel_build: true) + - " -t ${sanitized_JOB_NAME()}-el8 " + - ' --build-arg DAOS_PACKAGES_BUILD=no ' + - ' --build-arg DAOS_KEEP_SRC=yes ' + - ' --build-arg REPOS="' + prRepos() + '"' - } - } - steps { - script { - sh label: 'Install RPMs', - script: './ci/rpm/install_deps.sh el8 "' + env.DAOS_RELVAL + '"' - sh label: 'Build deps', - script: './ci/rpm/build_deps.sh' - job_step_update( - sconsBuild(parallel_build: true, - stash_files: 'ci/test_files_to_stash.txt', - build_deps: 'no', - stash_opt: true, - scons_args: sconsArgs() + - ' PREFIX=/opt/daos TARGET_TYPE=release')) - sh label: 'Generate RPMs', - script: './ci/rpm/gen_rpms.sh el8 "' + env.DAOS_RELVAL + '"' - } - } - post { - success { - uploadNewRPMs('el8', 'success') - } - unsuccessful { - sh '''if [ -f config.log ]; then - mv config.log config.log-el8-gcc - fi''' - archiveArtifacts artifacts: 'config.log-el8-gcc', - allowEmptyArchive: true - } - cleanup { - uploadNewRPMs('el8', 'cleanup') - job_status_update() - } - } - } - stage('Build on EL 9.6') { - when { - beforeAgent true - expression { !skip_build_stage('el9') } - } - agent { - dockerfile { - filename 'utils/docker/Dockerfile.el.9' - label 'docker_runner' - additionalBuildArgs dockerBuildArgs(repo_type: 'stable', - deps_build: false, - parallel_build: true) + - " -t ${sanitized_JOB_NAME()}-el9 " + - ' --build-arg DAOS_PACKAGES_BUILD=no ' + - ' --build-arg DAOS_KEEP_SRC=yes ' + - ' --build-arg REPOS="' + prRepos() + '"' + - ' --build-arg POINT_RELEASE=.6 ' - } - } - steps { - script { - sh label: 'Install RPMs', - script: './ci/rpm/install_deps.sh el9 "' + env.DAOS_RELVAL + '"' - sh label: 'Build deps', - script: './ci/rpm/build_deps.sh' - job_step_update( - sconsBuild(parallel_build: true, - stash_files: 'ci/test_files_to_stash.txt', - build_deps: 'no', - stash_opt: true, - scons_args: sconsArgs() + - ' PREFIX=/opt/daos TARGET_TYPE=release')) - sh label: 'Generate RPMs', - script: './ci/rpm/gen_rpms.sh el9 "' + env.DAOS_RELVAL + '"' - } - } - post { - success { - uploadNewRPMs('el9', 'success') - } - unsuccessful { - sh '''if [ -f config.log ]; then - mv config.log config.log-el9-gcc - fi''' - archiveArtifacts artifacts: 'config.log-el9-gcc', - allowEmptyArchive: true - } - cleanup { - uploadNewRPMs('el9', 'cleanup') - job_status_update() - } - } - } stage('Build on Leap 15.5') { when { beforeAgent true @@ -684,188 +546,36 @@ pipeline { } } } - stage('Build on Leap 15.5 with Intel-C and TARGET_PREFIX') { - when { - beforeAgent true - expression { !skip_build_stage('leap15', 'icc') } - } - agent { - dockerfile { - filename 'utils/docker/Dockerfile.leap.15' - label 'docker_runner' - additionalBuildArgs dockerBuildArgs(repo_type: 'stable', - parallel_build: true, - deps_build: true) + - " -t ${sanitized_JOB_NAME()}-leap15-icc" + - ' --build-arg DAOS_PACKAGES_BUILD=no ' + - ' --build-arg COMPILER=icc' + - ' --build-arg POINT_RELEASE=.5 ' - } - } - steps { - job_step_update( - sconsBuild(parallel_build: true, - scons_args: sconsFaultsArgs() + - ' PREFIX=/opt/daos TARGET_TYPE=release', - build_deps: 'no')) - } - post { - unsuccessful { - sh '''if [ -f config.log ]; then - mv config.log config.log-leap15-intelc - fi''' - archiveArtifacts artifacts: 'config.log-leap15-intelc', - allowEmptyArchive: true - } - cleanup { - job_status_update() - } - } - } - } - } - stage('Unit Tests') { - when { - beforeAgent true - expression { !skipStage() } - } - parallel { - stage('Unit Test on EL 8.8') { - when { - beforeAgent true - expression { !skipStage() } - } - agent { - label cachedCommitPragma(pragma: 'VM1-label', def_val: params.CI_UNIT_VM1_LABEL) - } - steps { - job_step_update( - unitTest(timeout_time: 60, - unstash_opt: true, - inst_repos: daosRepos(), - inst_rpms: unitPackages())) - } - post { - always { - unitTestPost artifacts: ['unit_test_logs/'] - job_status_update() - } - } - } - stage('Unit Test bdev on EL 8.8') { - when { - beforeAgent true - expression { !skipStage() } - } - agent { - label params.CI_UNIT_VM1_NVME_LABEL - } - steps { - job_step_update( - unitTest(timeout_time: 60, - unstash_opt: true, - inst_repos: daosRepos(), - inst_rpms: unitPackages())) - } - post { - always { - unitTestPost artifacts: ['unit_test_bdev_logs/'] - job_status_update() - } - } - } - stage('NLT on EL 8.8') { - when { - beforeAgent true - expression { params.CI_NLT_TEST && !skipStage() } - } - agent { - label params.CI_NLT_1_LABEL - } - steps { - job_step_update( - unitTest(timeout_time: 60, - inst_repos: daosRepos(), - test_script: 'ci/unit/test_nlt.sh', - unstash_opt: true, - unstash_tests: false, - inst_rpms: unitPackages())) - // recordCoverage(tools: [[parser: 'COBERTURA', pattern:'nltir.xml']], - // skipPublishingChecks: true, - // id: 'tlc', name: 'Fault Injection Interim Report') - stash(name:'nltr', includes:'nltr.json', allowEmpty: true) - } - post { - always { - unitTestPost artifacts: ['nlt_logs/'], - testResults: 'nlt-junit.xml', - always_script: 'ci/unit/test_nlt_post.sh', - valgrind_stash: 'el8-gcc-nlt-memcheck' - recordIssues enabledForFailure: true, - failOnError: false, - ignoreQualityGate: true, - name: 'NLT server leaks', - qualityGates: [[threshold: 1, type: 'TOTAL', unstable: true]], - tool: issues(pattern: 'nlt-server-leaks.json', - name: 'NLT server results', - id: 'NLT_server'), - scm: 'daos-stack/daos' - job_status_update() - } - } - } - stage('Unit Test with memcheck on EL 8.8') { - when { - beforeAgent true - expression { !skipStage() } - } - agent { - label cachedCommitPragma(pragma: 'VM1-label', def_val: params.CI_UNIT_VM1_LABEL) - } - steps { - job_step_update( - unitTest(timeout_time: 160, - unstash_opt: true, - ignore_failure: true, - inst_repos: daosRepos(), - inst_rpms: unitPackages())) - } - post { - always { - unitTestPost artifacts: ['unit_test_memcheck_logs.tar.gz', - 'unit_test_memcheck_logs/**/*.log'], - valgrind_stash: 'el8-gcc-unit-memcheck' - job_status_update() - } - } - } // stage('Unit Test with memcheck on EL 8.8') - stage('Unit Test bdev with memcheck on EL 8.8') { - when { - beforeAgent true - expression { !skipStage() } - } - agent { - label params.CI_UNIT_VM1_NVME_LABEL - } - steps { - job_step_update( - unitTest(timeout_time: 180, - unstash_opt: true, - ignore_failure: true, - inst_repos: daosRepos(), - inst_rpms: unitPackages())) - } - post { - always { - unitTestPost artifacts: ['unit_test_memcheck_bdev_logs.tar.gz', - 'unit_test_memcheck_bdev_logs/**/*.log'], - valgrind_stash: 'el8-gcc-unit-memcheck-bdev' - job_status_update() - } - } - } // stage('Unit Test bdev with memcheck on EL 8') } } +// stage('Fetch RPMs from previous build') { +// steps { +// copyArtifacts( +// projectName: env.JOB_NAME, +// selector: specific('2'), +// filter: 'artifacts/leap15/daos/**, artifacts/leap15/deps/**', +// fingerprintArtifacts: true +// ) +// copyArtifacts( +// projectName: env.JOB_NAME, +// selector: specific('2'), +// filter: 'artifacts/leap15/repodata/**', +// fingerprintArtifacts: true +// ) +// sh ''' +// set -euxo pipefail +// ls -lah artifacts/leap15/daos || true +// ls -lah artifacts/leap15/deps || true +// ls -lah artifacts/leap15/repodata || true +// ''' +// +// archiveArtifacts artifacts: 'artifacts/leap15/**', +// fingerprint: true, +// allowEmptyArchive: true +// +// } +// } + stage('Test') { when { beforeAgent true @@ -874,80 +584,11 @@ pipeline { expression { !paramsValue('CI_FUNCTIONAL_TEST_SKIP', false) } } parallel { - stage('Functional on EL 8.8 with Valgrind') { - when { - beforeAgent true - expression { !skipStage() } - } - agent { - label vm9_label('EL8') - } - steps { - job_step_update( - functionalTest( - inst_repos: daosRepos(), - inst_rpms: functionalPackages(1, next_version(), 'tests-internal') + - ' mercury-libfabric', - test_function: 'runTestFunctionalV2')) - } - post { - always { - functionalTestPostV2() - job_status_update() - } - } - } // stage('Functional on EL 8.8 with Valgrind') - stage('Functional on EL 8.8') { - when { - beforeAgent true - expression { !skipStage() } - } - agent { - label vm9_label('EL8') - } - steps { - job_step_update( - functionalTest( - inst_repos: daosRepos(), - inst_rpms: functionalPackages(1, next_version(), 'tests-internal') + - ' mercury-libfabric', - test_function: 'runTestFunctionalV2')) - } - post { - always { - functionalTestPostV2() - job_status_update() - } - } - } // stage('Functional on EL 8.8') - stage('Functional on EL 9') { - when { - beforeAgent true - expression { !skipStage() } - } - agent { - label vm9_label('EL9') - } - steps { - job_step_update( - functionalTest( - inst_repos: daosRepos(), - inst_rpms: functionalPackages(1, next_version(), 'tests-internal') + - ' mercury-libfabric', - test_function: 'runTestFunctionalV2')) - } - post { - always { - functionalTestPostV2() - job_status_update() - } - } - } // stage('Functional on EL 9') - stage('Functional on Leap 15.6') { - when { - beforeAgent true - expression { !skipStage() } - } + stage('Functional on SLES 15.7') { +// when { +// beforeAgent true +// expression { !skipStage() } +// } agent { label vm9_label('Leap15') } @@ -958,7 +599,7 @@ pipeline { inst_rpms: functionalPackages(1, next_version(), 'tests-internal') + ' mercury-libfabric', test_function: 'runTestFunctionalV2', - image_version: 'leap15.6')) + image_version: 'sles15.7')) } post { always { @@ -966,112 +607,12 @@ pipeline { job_status_update() } } // post - } // stage('Functional on Leap 15.6') - stage('Functional on Ubuntu 20.04') { - when { - beforeAgent true - expression { !skipStage() } - } - agent { - label vm9_label('Ubuntu') - } - steps { - job_step_update( - functionalTest( - inst_repos: daosRepos(), - inst_rpms: functionalPackages(1, next_version(), 'tests-internal') + - ' mercury-libfabric', - test_function: 'runTestFunctionalV2')) - } - post { - always { - functionalTestPostV2() - job_status_update() - } - } // post - } // stage('Functional on Ubuntu 20.04') - stage('Fault injection testing on EL 8.8') { - when { - beforeAgent true - expression { !skipStage() } - } - agent { - dockerfile { - filename 'utils/docker/Dockerfile.el.8' - label 'docker_runner' - additionalBuildArgs dockerBuildArgs(repo_type: 'stable', - parallel_build: true, - deps_build: true) - args '--tmpfs /mnt/daos_0' - } - } - steps { - job_step_update( - sconsBuild(parallel_build: true, - scons_args: 'PREFIX=/opt/daos TARGET_TYPE=release BUILD_TYPE=debug', - build_deps: 'no')) - job_step_update(nlt_test()) - // recordCoverage(tools: [[parser: 'COBERTURA', pattern:'nltr.xml']], - // skipPublishingChecks: true, - // id: 'fir', name: 'Fault Injection Report') - } - post { - always { - discoverGitReferenceBuild referenceJob: 'daos-stack/daos/master', - scm: 'daos-stack/daos', - requiredResult: hudson.model.Result.UNSTABLE - recordIssues enabledForFailure: true, - /* ignore warning/errors from PMDK logging system */ - filters: [excludeFile('pmdk/.+')], - failOnError: false, - ignoreQualityGate: true, - qualityGates: [[threshold: 1, type: 'TOTAL_ERROR'], - [threshold: 1, type: 'TOTAL_HIGH'], - [threshold: 1, type: 'NEW_NORMAL', unstable: true], - [threshold: 1, type: 'NEW_LOW', unstable: true]], - tools: [issues(pattern: 'nlt-errors.json', - name: 'Fault injection issues', - id: 'Fault_Injection'), - issues(pattern: 'nlt-client-leaks.json', - name: 'Fault injection leaks', - id: 'NLT_client')], - scm: 'daos-stack/daos' - junit testResults: 'nlt-junit.xml' - stash name: 'fault-inject-valgrind', - includes: '*.memcheck.xml', - allowEmpty: true - archiveArtifacts artifacts: 'nlt_logs/el8.fault-injection/', - allowEmptyArchive: true - job_status_update() - } - } - } // stage('Fault injection testing on EL 8.8') - stage('Test RPMs on EL 8.6') { - when { - beforeAgent true - expression { params.CI_TEST_EL8_RPMs && !skipStage() } - } - agent { - label params.CI_UNIT_VM1_LABEL - } - steps { - job_step_update( - testRpm(inst_repos: daosRepos(), - daos_pkg_version: daosPackagesVersion(next_version()), - inst_rpms: 'mercury-libfabric') - ) - } - post { - always { - rpm_test_post(env.STAGE_NAME, env.NODELIST) - } - } - } // stage('Test RPMs on EL 8.6') - stage('Test RPMs on Leap 15.5') { - when { - beforeAgent true - expression { params.CI_TEST_LEAP15_RPMs && !skipStage() } - } + } // stage('Functional on SLES 15.7') + stage('Test RPMs on Leap 15.6') { +// when { +// beforeAgent true +// expression { params.CI_TEST_LEAP15_RPMs && !skipStage() } +// } agent { label params.CI_UNIT_VM1_LABEL } @@ -1081,8 +622,8 @@ pipeline { * additionally for this use-case, can't override ftest_arg with this :-( script { - 'Test RPMs on Leap 15.5': getFunctionalTestStage( - name: 'Test RPMs on Leap 15.5', + 'Test RPMs on Leap 15.6': getFunctionalTestStage( + name: 'Test RPMs on Leap 15.6', pragma_suffix: '', label: params.CI_UNIT_VM1_LABEL, next_version: next_version(), @@ -1119,143 +660,9 @@ pipeline { rpm_test_post(env.STAGE_NAME, env.NODELIST) } } - } // stage('Test RPMs on Leap 15.5') + } // stage('Test RPMs on Leap 15.6') } // parallel } // stage('Test') - stage('Test Storage Prep on EL 8.8') { - when { - beforeAgent true - expression { params.CI_STORAGE_PREP_LABEL != '' } - } - agent { - label params.CI_STORAGE_PREP_LABEL - } - steps { - job_step_update( - storagePrepTest( - inst_repos: daosRepos(), - inst_rpms: functionalPackages(1, next_version(), 'tests-internal'))) - } - post { - cleanup { - job_status_update() - } - } - } // stage('Test Storage Prep') - stage('Test Hardware') { - when { - beforeAgent true - expression { !paramsValue('CI_FUNCTIONAL_HARDWARE_TEST_SKIP', false) && !skipStage() } - } - steps { - script { - parallel( - 'Functional Hardware Medium': getFunctionalTestStage( - name: 'Functional Hardware Medium', - pragma_suffix: '-hw-medium', - label: params.FUNCTIONAL_HARDWARE_MEDIUM_LABEL, - next_version: next_version(), - stage_tags: 'hw,medium,-provider', - default_tags: startedByTimer() ? 'pr daily_regression' : 'pr', - nvme: 'auto', - run_if_pr: false, - run_if_landing: false, - job_status: job_status_internal - ), - 'Functional Hardware Medium MD on SSD': getFunctionalTestStage( - name: 'Functional Hardware Medium MD on SSD', - pragma_suffix: '-hw-medium-md-on-ssd', - label: params.FUNCTIONAL_HARDWARE_MEDIUM_LABEL, - next_version: next_version(), - stage_tags: 'hw,medium,-provider', - default_tags: startedByTimer() ? 'pr daily_regression' : 'pr', - nvme: 'auto_md_on_ssd', - run_if_pr: true, - run_if_landing: false, - job_status: job_status_internal - ), - 'Functional Hardware Medium VMD': getFunctionalTestStage( - name: 'Functional Hardware Medium VMD', - pragma_suffix: '-hw-medium-vmd', - label: params.FUNCTIONAL_HARDWARE_MEDIUM_VMD_LABEL, - next_version: next_version(), - stage_tags: 'hw_vmd,medium', - /* groovylint-disable-next-line UnnecessaryGetter */ - default_tags: startedByTimer() ? 'pr daily_regression' : 'pr', - nvme: 'auto', - run_if_pr: false, - run_if_landing: false, - job_status: job_status_internal - ), - 'Functional Hardware Medium Verbs Provider': getFunctionalTestStage( - name: 'Functional Hardware Medium Verbs Provider', - pragma_suffix: '-hw-medium-verbs-provider', - label: params.FUNCTIONAL_HARDWARE_MEDIUM_VERBS_PROVIDER_LABEL, - next_version: next_version(), - stage_tags: 'hw,medium,provider', - default_tags: startedByTimer() ? 'pr daily_regression' : 'pr', - default_nvme: 'auto', - provider: 'ofi+verbs;ofi_rxm', - run_if_pr: false, - run_if_landing: false, - job_status: job_status_internal, - image_version: 'el9.7' - ), - 'Functional Hardware Medium Verbs Provider MD on SSD': getFunctionalTestStage( - name: 'Functional Hardware Medium Verbs Provider MD on SSD', - pragma_suffix: '-hw-medium-verbs-provider-md-on-ssd', - label: params.FUNCTIONAL_HARDWARE_MEDIUM_VERBS_PROVIDER_LABEL, - next_version: next_version(), - stage_tags: 'hw,medium,provider', - default_tags: startedByTimer() ? 'pr daily_regression' : 'pr', - default_nvme: 'auto_md_on_ssd', - provider: 'ofi+verbs;ofi_rxm', - run_if_pr: true, - run_if_landing: false, - job_status: job_status_internal, - image_version: 'el9.7' - ), - 'Functional Hardware Medium UCX Provider': getFunctionalTestStage( - name: 'Functional Hardware Medium UCX Provider', - pragma_suffix: '-hw-medium-ucx-provider', - label: params.FUNCTIONAL_HARDWARE_MEDIUM_UCX_PROVIDER_LABEL, - next_version: next_version(), - stage_tags: 'hw,medium,provider', - default_tags: startedByTimer() ? 'pr daily_regression' : 'pr', - default_nvme: 'auto', - provider: cachedCommitPragma('Test-provider-ucx', 'ucx+ud_x'), - run_if_pr: false, - run_if_landing: false, - job_status: job_status_internal - ), - 'Functional Hardware Large': getFunctionalTestStage( - name: 'Functional Hardware Large', - pragma_suffix: '-hw-large', - label: params.FUNCTIONAL_HARDWARE_LARGE_LABEL, - next_version: next_version(), - stage_tags: 'hw,large', - default_tags: startedByTimer() ? 'pr daily_regression' : 'pr', - default_nvme: 'auto', - run_if_pr: false, - run_if_landing: false, - job_status: job_status_internal - ), - 'Functional Hardware Large MD on SSD': getFunctionalTestStage( - name: 'Functional Hardware Large MD on SSD', - pragma_suffix: '-hw-large-md-on-ssd', - label: params.FUNCTIONAL_HARDWARE_LARGE_LABEL, - next_version: next_version(), - stage_tags: 'hw,large', - default_tags: startedByTimer() ? 'pr daily_regression' : 'pr', - default_nvme: 'auto_md_on_ssd', - run_if_pr: true, - run_if_landing: false, - job_status: job_status_internal - ), - ) - } - } - } // stage('Test Hardware') } // stages post { always { @@ -1269,4 +676,4 @@ pipeline { notifyBrokenBranch branches: target_branch } } // post -} +} \ No newline at end of file diff --git a/ci/parse_ci_envs.sh b/ci/parse_ci_envs.sh index 84cb0183f91..2d15d81b470 100755 --- a/ci/parse_ci_envs.sh +++ b/ci/parse_ci_envs.sh @@ -22,8 +22,12 @@ if [ -n "${STAGE_NAME:?}" ]; then : "${TARGET:=centos9}" : "${REPO_SPEC:=el-9}" ;; + *SLES\ 15.7*|*sles15.7*) + : "${CHROOT_NAME:=sles-15-sp7-x86_64}" + : "${TARGET:=sles15.7}" + ;; *Leap\ 15.6*|*leap15.6*|*opensuse15.6*|*sles15.6*) - : "${CHROOT_NAME:=opensuse-leap-15.5-x86_64}" + : "${CHROOT_NAME:=opensuse-leap-15.6-x86_64}" : "${TARGET:=leap15.6}" ;; *Leap\ 15.5*|*leap15.5*|*opensuse15.5*|*sles15.5*) diff --git a/ci/provisioning/post_provision_config_nodes_LEAP.sh b/ci/provisioning/post_provision_config_nodes_LEAP.sh index 4fbc3739f0f..f5603d9d2fa 100644 --- a/ci/provisioning/post_provision_config_nodes_LEAP.sh +++ b/ci/provisioning/post_provision_config_nodes_LEAP.sh @@ -23,4 +23,23 @@ distro_custom() { sed -e '/MODULEPATH=/s/$/:\/usr\/share\/modules/' \ /etc/profile.d/lmod.sh; \ fi + + # Fix for no_pmix_multi_ctx tests on SLES/Leap 15.x + if [[ "${VERSION_ID:-}" == 15.* ]]; then + zypper rm -y -u mercury mercury-debuginfo || true + zypper rm -y -u libfabric libfabric1 libfabric-debuginfo || true + zypper clean --all + ldconfig +# zypper mr -e daos-stack-daos-sl-15-stable-local-artifactory || true +# zypper mr -p 90 daos-stack-daos-sl-15-stable-local-artifactory || true + zypper mr -p 90 daos-stack-deps-sl-15-stable-local-artifactory || true + + zypper in -y -f libfabric1 mercury-libfabric mercury + +# if [[ "${ID:-}" == "sles" ]]; then +# zypper in -y -f libfabric1 mercury-libfabric mercury daos-server daos-client daos-client-tests openmpi3 openmpi3-devel +# else +# zypper in -y -f libfabric1 mercury-libfabric mercury +# fi + fi } diff --git a/src/tests/ftest/container/list.yaml b/src/tests/ftest/container/list.yaml index 77ca02a2ca3..a21f8014a98 100644 --- a/src/tests/ftest/container/list.yaml +++ b/src/tests/ftest/container/list.yaml @@ -2,7 +2,7 @@ hosts: test_servers: 1 test_clients: 1 -timeout: 360 +timeout: 720 server_config: name: daos_server diff --git a/src/tests/ftest/launch.py b/src/tests/ftest/launch.py index a62abe0a508..e79f5fbfd61 100755 --- a/src/tests/ftest/launch.py +++ b/src/tests/ftest/launch.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 """ (C) Copyright 2018-2024 Intel Corporation. - (C) Copyright 2025 Hewlett Packard Enterprise Development LP + (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -310,15 +310,22 @@ def _run(self, args): message = f"Error detecting tests that match tags: {' '.join(args.tags)}" return self.get_exit_status(1, message, "Setup", sys.exc_info()) + logger.info("Finished detecting tests") + # Verify at least one test was requested if not group.tests: message = f"No tests found for tags: {' '.join(args.tags)}" return self.get_exit_status(1, message, "Setup", sys.exc_info()) + logger.info("BANG: 1") + # Done if just listing tests matching the tags if args.list and not args.modify: + logger.info(f"args.list: {args.list}, args.modify: {args.modify}") return self.get_exit_status(0, "Listing tests complete") + logger.info("BANG: 2") + # Setup the fuse configuration try: setup_fuse_config(logger, args.test_servers | args.test_clients) @@ -326,7 +333,7 @@ def _run(self, args): # Warn but don't fail message = "Issue detected setting up the fuse configuration" setup_result.warn_test(logger, "Setup", message, sys.exc_info()) - + logger.info("BANG: 3") # Setup override systemctl files try: clients = args.test_clients if args.test_clients else args.test_servers @@ -335,7 +342,7 @@ def _run(self, args): except LaunchException: message = "Issue detected setting up the systemctl configuration" return self.get_exit_status(1, message, "Setup", sys.exc_info()) - + logger.info("BANG: 4") # Get the core file pattern information core_files = {} if args.process_cores: @@ -347,7 +354,7 @@ def _run(self, args): return self.get_exit_status(1, message, "Setup", sys.exc_info()) else: logger.debug("Not collecting core files") - + logger.info("BANG: 5") # Determine if bullseye code coverage collection is enabled code_coverage = CodeCoverage(test_env) # pylint: disable=unsupported-binary-operation @@ -364,14 +371,14 @@ def _run(self, args): except StorageException: message = "Error detecting storage information for test yaml files" status |= self.get_exit_status(1, message, "Setup", sys.exc_info()) - + logger.info("BANG: 6") if args.modify: return self.get_exit_status(0, "Modifying test yaml files complete") - + logger.info("BANG: 7") # Configure slurm if any tests use partitions test_status = group.setup_slurm( logger, self.slurm_setup, self.slurm_install, self.user, self.result) - + logger.info("BANG: 8") # Split the timer for the test result to account for any non-test execution steps as not # to double report the test time accounted for in each individual test result setup_result.end() @@ -382,7 +389,7 @@ def _run(self, args): not args.disable_stop_daos, args.archive, args.rename, args.jenkinslog, core_files, args.logs_threshold, args.user_create, code_coverage, self.job_results_dir, self.logdir, args.clear_mounts, cleanup_files) - + logger.info("BANG: 9") # Convert the test status to a launch.py status status |= summarize_run(logger, self.mode, test_status) @@ -392,6 +399,8 @@ def _run(self, args): # Restart the timer for the test result to account for any non-test execution steps setup_result.start() + logger.info("BANG: 10") + # Return the appropriate return code and mark the test result to account for any non-test # execution steps complete return self.get_exit_status(status, "Executing tests complete") @@ -731,6 +740,7 @@ def main(): args = parser.parse_args() # Override arguments via the mode + logger.info(f"args.mode: {args.mode}") if args.mode == "ci": args.archive = True args.include_localhost = True @@ -764,4 +774,4 @@ def main(): logger.addHandler(get_console_handler("%(message)s", logging.DEBUG)) main() else: - logger = logging.getLogger() + logger = logging.getLogger() \ No newline at end of file diff --git a/src/tests/ftest/pool/destroy.yaml b/src/tests/ftest/pool/destroy.yaml index f2da22876bd..6e42554177c 100644 --- a/src/tests/ftest/pool/destroy.yaml +++ b/src/tests/ftest/pool/destroy.yaml @@ -16,7 +16,7 @@ server_config: class: ram scm_mount: /mnt/daos system_ram_reserved: 1 -timeout: 360 +timeout: 720 pool: scm_size: 1G container: