From 88e64510f25129ec5b885eec29cebe13f8f4caf9 Mon Sep 17 00:00:00 2001 From: yaakov-stein Date: Tue, 10 Feb 2026 14:10:20 -0800 Subject: [PATCH 1/3] lib: helper: add FNV-1a hash function A simple data hashing function. C stdlib lacks one. --- src/libbpfilter/helper.c | 16 +++++++++++++ src/libbpfilter/include/bpfilter/helper.h | 20 ++++++++++++++++ tests/unit/libbpfilter/helper.c | 29 +++++++++++++++++++++++ 3 files changed, 65 insertions(+) diff --git a/src/libbpfilter/helper.c b/src/libbpfilter/helper.c index 64ae9d230..b925ee8f4 100644 --- a/src/libbpfilter/helper.c +++ b/src/libbpfilter/helper.c @@ -195,3 +195,19 @@ char *bf_trim(char *str) return bf_rtrim(bf_ltrim(str)); } + +uint64_t bf_fnv1a(const void *data, size_t len, uint64_t hash) +{ + const uint8_t *bytes; + + assert(data); + + bytes = data; + + for (size_t i = 0; i < len; ++i) { + hash ^= bytes[i]; + hash *= BF_FNV1A_PRIME; + } + + return hash; +} diff --git a/src/libbpfilter/include/bpfilter/helper.h b/src/libbpfilter/include/bpfilter/helper.h index fa2ef7de5..697429064 100644 --- a/src/libbpfilter/include/bpfilter/helper.h +++ b/src/libbpfilter/include/bpfilter/helper.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -394,3 +395,22 @@ int bf_read_file(const char *path, void **buf, size_t *len); * @return 0 on success, negative errno value on error. */ int bf_write_file(const char *path, const void *buf, size_t len); + +/// FNV-1a 64-bit offset basis. +/// @see https://en.wikipedia.org/wiki/Fowler-Noll-Vo_hash_function +#define BF_FNV1A_INIT 0xcbf29ce484222325ULL +/// FNV-1a 64-bit prime. +#define BF_FNV1A_PRIME 0x100000001b3ULL + +/** + * @brief Compute a FNV-1a 64-bit hash. + * + * Pass `BF_FNV1A_INIT` as `hash` for the initial call. To hash + * multiple fields, chain calls by passing the previous return value. + * + * @param data Data to hash. Can't be NULL. + * @param len Number of bytes to hash. + * @param hash Initial or chained hash value. + * @return Updated hash value. + */ +uint64_t bf_fnv1a(const void *data, size_t len, uint64_t hash); diff --git a/tests/unit/libbpfilter/helper.c b/tests/unit/libbpfilter/helper.c index 8897e9269..9db578bae 100644 --- a/tests/unit/libbpfilter/helper.c +++ b/tests/unit/libbpfilter/helper.c @@ -356,12 +356,41 @@ static void overwrite_existing_file(void **state) assert_memory_equal(read_buf, second_data, strlen(second_data)); } +static void fnv1a_hash(void **state) +{ + uint32_t val_a = 42; + uint32_t val_b = 99; + uint8_t byte_a = 0x61; + uint64_t hash_a; + uint64_t hash_b; + uint64_t hash_ab; + uint64_t hash_ba; + + (void)state; + + // Zero length returns the initial hash unchanged + assert_true(bf_fnv1a(&val_a, 0, BF_FNV1A_INIT) == BF_FNV1A_INIT); + assert_true(bf_fnv1a(&byte_a, 1, BF_FNV1A_INIT) == 0xaf63dc4c8601ec8cULL); + + hash_a = bf_fnv1a(&val_a, sizeof(val_a), BF_FNV1A_INIT); + hash_b = bf_fnv1a(&val_b, sizeof(val_b), BF_FNV1A_INIT); + + assert_int_equal(hash_a, bf_fnv1a(&val_a, sizeof(val_a), BF_FNV1A_INIT)); + assert_int_not_equal(hash_a, hash_b); + + // Chaining: order matters for sequential hashing + hash_ab = bf_fnv1a(&val_b, sizeof(val_b), hash_a); + hash_ba = bf_fnv1a(&val_a, sizeof(val_a), hash_b); + assert_int_not_equal(hash_ab, hash_ba); +} + int main(void) { const struct CMUnitTest tests[] = { cmocka_unit_test(close_fd), cmocka_unit_test(string_copy), cmocka_unit_test(realloc_mem), + cmocka_unit_test(fnv1a_hash), cmocka_unit_test(trim_left), cmocka_unit_test(trim_right), cmocka_unit_test(trim_both), From e91f1f8c52f08dbc03ef47e7006da10982319ecf Mon Sep 17 00:00:00 2001 From: Pawel Zmarzly Date: Tue, 17 Feb 2026 01:31:37 +0000 Subject: [PATCH 2/3] lib: core: add bf_hashset --- .clang-format | 2 +- src/libbpfilter/CMakeLists.txt | 2 + src/libbpfilter/core/hashset.c | 337 ++++++++++++++ .../include/bpfilter/core/hashset.h | 226 +++++++++ tests/unit/CMakeLists.txt | 1 + tests/unit/libbpfilter/core/hashset.c | 431 ++++++++++++++++++ 6 files changed, 998 insertions(+), 1 deletion(-) create mode 100644 src/libbpfilter/core/hashset.c create mode 100644 src/libbpfilter/include/bpfilter/core/hashset.h create mode 100644 tests/unit/libbpfilter/core/hashset.c diff --git a/.clang-format b/.clang-format index b5cbbd0ad..be30d0822 100644 --- a/.clang-format +++ b/.clang-format @@ -59,7 +59,7 @@ EmptyLineAfterAccessModifier: Never EmptyLineBeforeAccessModifier: Always ExperimentalAutoDetectBinPacking: false FixNamespaceComments: true -ForEachMacros: ['bf_list_foreach', 'bf_list_foreach_rev', 'bf_rpack_array_foreach'] +ForEachMacros: ['bf_hashset_foreach', 'bf_list_foreach', 'bf_list_foreach_rev', 'bf_rpack_array_foreach'] IncludeBlocks: Regroup IncludeCategories: # net/if.h needs to be included BEFORE linux/if.h to avoid conflicts diff --git a/src/libbpfilter/CMakeLists.txt b/src/libbpfilter/CMakeLists.txt index 47d98e674..33cff658b 100644 --- a/src/libbpfilter/CMakeLists.txt +++ b/src/libbpfilter/CMakeLists.txt @@ -22,6 +22,7 @@ set(libbpfilter_srcs ${CMAKE_CURRENT_SOURCE_DIR}/include/bpfilter/hook.h ${CMAKE_CURRENT_SOURCE_DIR}/include/bpfilter/if.h ${CMAKE_CURRENT_SOURCE_DIR}/include/bpfilter/io.h + ${CMAKE_CURRENT_SOURCE_DIR}/include/bpfilter/core/hashset.h ${CMAKE_CURRENT_SOURCE_DIR}/include/bpfilter/core/list.h ${CMAKE_CURRENT_SOURCE_DIR}/include/bpfilter/logger.h ${CMAKE_CURRENT_SOURCE_DIR}/include/bpfilter/matcher.h @@ -45,6 +46,7 @@ set(libbpfilter_srcs ${CMAKE_CURRENT_SOURCE_DIR}/hook.c ${CMAKE_CURRENT_SOURCE_DIR}/if.c ${CMAKE_CURRENT_SOURCE_DIR}/io.c + ${CMAKE_CURRENT_SOURCE_DIR}/core/hashset.c ${CMAKE_CURRENT_SOURCE_DIR}/core/list.c ${CMAKE_CURRENT_SOURCE_DIR}/logger.c ${CMAKE_CURRENT_SOURCE_DIR}/matcher.c diff --git a/src/libbpfilter/core/hashset.c b/src/libbpfilter/core/hashset.c new file mode 100644 index 000000000..c6de1e3f3 --- /dev/null +++ b/src/libbpfilter/core/hashset.c @@ -0,0 +1,337 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + */ + +#include "bpfilter/core/hashset.h" + +#include +#include + +#include "bpfilter/helper.h" +#include "bpfilter/logger.h" + +#define _BF_HASHSET_TOMBSTONE ((bf_hashset_elem *)1) +#define _BF_HASHSET_INIT_CAP 16UL +/* Maximum load factor before growing. Lowering this number reduces collisions + * but causes higher memory usage. */ +#define _BF_HASHSET_MAX_LOAD_NUM 5 +#define _BF_HASHSET_MAX_LOAD_DEN 10 +/* Largest power-of-two element count (2^60) that still leaves headroom for + * load-factor arithmetic (slots_in_use * 10, cap * 5) without overflowing + * size_t. */ +#define _BF_HASHSET_MAX_CAP (SIZE_MAX / 16 + 1) + +static inline size_t _bf_round_next_power_of_2(size_t value) +{ + if (value == 0) + return 1; + + value--; + value |= value >> 1; + value |= value >> 2; + value |= value >> 4; + value |= value >> 8; + value |= value >> 16; +#if SIZE_MAX > 0xFFFFFFFFU + value |= value >> 32; +#endif + + return ++value; +} + +static inline bool _bf_hashset_slot_is_tombstone(const bf_hashset_elem *slot) +{ + return slot == _BF_HASHSET_TOMBSTONE; +} + +// Caller must ensure set->cap > 0 to avoid division by zero. +static size_t _bf_hashset_index(const bf_hashset *set, const void *data) +{ + assert(set); + assert(data); + + return set->ops.hash(data, set->ctx) % set->cap; +} + +static int _bf_hashset_resize(bf_hashset *set, size_t new_cap) +{ + bf_hashset_elem **new_slots; + + assert(set); + + // We must have enough space for all elements. + if (new_cap < set->len) + return -EINVAL; + + new_slots = (bf_hashset_elem **)calloc(new_cap, sizeof(*new_slots)); + if (!new_slots) + return -ENOMEM; + + bf_hashset_foreach (set, elem) { + size_t idx = set->ops.hash(elem->data, set->ctx) % new_cap; + while (new_slots[idx]) + idx = (idx + 1) % new_cap; + new_slots[idx] = elem; + } + + freep((void *)&set->slots); + set->slots = new_slots; + set->cap = new_cap; + set->slots_in_use = set->len; + + return 0; +} + +static int _bf_hashset_grow(bf_hashset *set) +{ + size_t new_cap; + + assert(set); + + if (set->cap >= _BF_HASHSET_MAX_CAP) + return -ENOMEM; + new_cap = set->cap ? set->cap * 2 : _BF_HASHSET_INIT_CAP; + + return _bf_hashset_resize(set, new_cap); +} + +static bool _bf_hashset_needs_grow(const bf_hashset *set) +{ + assert(set); + + return set->slots_in_use * _BF_HASHSET_MAX_LOAD_DEN >= + set->cap * _BF_HASHSET_MAX_LOAD_NUM; +} + +static bool _bf_hashset_find(const bf_hashset *set, const void *data, + size_t *found_index, size_t *free_index) +{ + size_t idx; + size_t first_free = SIZE_MAX; + + assert(set); + assert(data); + + if (set->cap == 0) + return false; + + idx = _bf_hashset_index(set, data); + + for (size_t i = 0; i < set->cap; ++i) { + bf_hashset_elem *slot = set->slots[idx]; + + if (!slot) { + if (free_index) + *free_index = first_free != SIZE_MAX ? first_free : idx; + return false; + } + + if (_bf_hashset_slot_is_tombstone(slot)) { + if (first_free == SIZE_MAX) + first_free = idx; + } else if (set->ops.equal(slot->data, data, set->ctx)) { + if (found_index) + *found_index = idx; + return true; + } + + idx = (idx + 1) % set->cap; + } + + if (free_index && first_free != SIZE_MAX) + *free_index = first_free; + + return false; +} + +int bf_hashset_new(bf_hashset **set, const bf_hashset_ops *ops, void *ctx) +{ + _free_bf_hashset_ bf_hashset *_set = NULL; + + assert(set); + assert(ops); + assert(ops->hash); + assert(ops->equal); + + _set = calloc(1, sizeof(*_set)); + if (!_set) + return -ENOMEM; + + bf_hashset_init(_set, ops, ctx); + + *set = TAKE_PTR(_set); + + return 0; +} + +void bf_hashset_free(bf_hashset **set) +{ + assert(set); + + if (!*set) + return; + + bf_hashset_clean(*set); + freep((void *)set); +} + +void bf_hashset_init(bf_hashset *set, const bf_hashset_ops *ops, void *ctx) +{ + assert(set); + assert(ops); + assert(ops->hash); + assert(ops->equal); + + *set = bf_hashset_default(ops, ctx); +} + +void bf_hashset_clean(bf_hashset *set) +{ + assert(set); + + bf_hashset_foreach (set, elem) { + if (set->ops.free) + set->ops.free(&elem->data, set->ctx); + freep((void *)&elem); + } + + freep((void *)&set->slots); + set->cap = 0; + set->len = 0; + set->slots_in_use = 0; + set->head = NULL; + set->tail = NULL; +} + +size_t bf_hashset_size(const bf_hashset *set) +{ + assert(set); + return set->len; +} + +size_t bf_hashset_cap(const bf_hashset *set) +{ + assert(set); + return set->cap; +} + +bool bf_hashset_is_empty(const bf_hashset *set) +{ + assert(set); + return set->len == 0; +} + +int bf_hashset_reserve(bf_hashset *set, size_t count) +{ + size_t needed, new_cap; + + assert(set); + + if (count == 0) + return 0; + + if (count > _BF_HASHSET_MAX_CAP) + return -ENOMEM; + + needed = count * _BF_HASHSET_MAX_LOAD_DEN / _BF_HASHSET_MAX_LOAD_NUM; + if (needed <= set->cap) + return 0; + + new_cap = _bf_round_next_power_of_2(bf_max(needed, _BF_HASHSET_INIT_CAP)); + if (new_cap > _BF_HASHSET_MAX_CAP) + return -ENOMEM; + + return _bf_hashset_resize(set, new_cap); +} + +int bf_hashset_add(bf_hashset *set, void *data) +{ + bf_hashset_elem *elem; + size_t free_idx = SIZE_MAX; + bool was_tombstone; + int r; + + assert(set); + assert(data); + + if (_bf_hashset_find(set, data, NULL, &free_idx)) + return -EEXIST; + + if (set->len >= _BF_HASHSET_MAX_CAP) + return bf_err_r(-ENOMEM, "hashset reached maximum capacity"); + + if (_bf_hashset_needs_grow(set)) { + r = _bf_hashset_grow(set); + if (r) + return r; + // Find new free_idx for this element. + if (_bf_hashset_find(set, data, NULL, &free_idx)) + return -EEXIST; + } + + elem = malloc(sizeof(*elem)); + if (!elem) + return -ENOMEM; + + elem->data = data; + + was_tombstone = _bf_hashset_slot_is_tombstone(set->slots[free_idx]); + set->slots[free_idx] = elem; + + elem->prev = set->tail; + elem->next = NULL; + if (set->tail) + set->tail->next = elem; + else + set->head = elem; + set->tail = elem; + + ++set->len; + + if (!was_tombstone) + ++set->slots_in_use; + + return 0; +} + +bool bf_hashset_contains(const bf_hashset *set, const void *data) +{ + assert(set); + assert(data); + + return _bf_hashset_find(set, data, NULL, NULL); +} + +int bf_hashset_delete(bf_hashset *set, const void *data) +{ + bf_hashset_elem *elem; + size_t found_idx = SIZE_MAX; + + assert(set); + assert(data); + + if (!_bf_hashset_find(set, data, &found_idx, NULL)) + return -ENOENT; + + elem = set->slots[found_idx]; + + if (set->ops.free) + set->ops.free(&elem->data, set->ctx); + + if (elem->prev) + elem->prev->next = elem->next; + else + set->head = elem->next; + + if (elem->next) + elem->next->prev = elem->prev; + else + set->tail = elem->prev; + + set->slots[found_idx] = _BF_HASHSET_TOMBSTONE; + freep((void *)&elem); + --set->len; + + return 0; +} diff --git a/src/libbpfilter/include/bpfilter/core/hashset.h b/src/libbpfilter/include/bpfilter/core/hashset.h new file mode 100644 index 000000000..84c2e4b68 --- /dev/null +++ b/src/libbpfilter/include/bpfilter/core/hashset.h @@ -0,0 +1,226 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + */ + +#pragma once + +#include +#include +#include + +/** + * @file hashset.h + * + * Open-addressing hashset with linear probing that preserves insertion order. + * Backed by an array of `bf_hashset_elem` slots; each slot holds a data + * pointer plus prev/next pointers that thread a doubly-linked list through + * the live elements. Iteration via `bf_hashset_foreach` yields elements + * in insertion order. Uses user-provided callbacks (with an opaque context + * pointer) for hashing, comparison, and cleanup. + */ + +typedef uint64_t (*bf_hashset_ops_hash)(const void *data, void *ctx); +typedef bool (*bf_hashset_ops_equal)(const void *lhs, const void *rhs, + void *ctx); +typedef void (*bf_hashset_ops_free)(void **data, void *ctx); + +/** + * @brief Callbacks for hashset element operations. + */ +typedef struct +{ + /// Hash function for an element. Must be non-NULL. + bf_hashset_ops_hash hash; + /// Equality comparison for two elements. Must be non-NULL. + bf_hashset_ops_equal equal; + /// Free callback for an element. If NULL, elements won't be freed. + bf_hashset_ops_free free; +} bf_hashset_ops; + +/** + * @brief Element node stored in a hashset, maintaining an insertion-order + * linked list. + * + * @warning From the user's perspective, the inside of this structure should + * not be directly accessed. Directly modifying any of the fields should be + * considered undefined behavior. + */ +typedef struct bf_hashset_elem +{ + /// User-provided data pointer. + void *data; + /// Previous element in insertion order, or NULL if first. + struct bf_hashset_elem *prev; + /// Next element in insertion order, or NULL if last. + struct bf_hashset_elem *next; +} bf_hashset_elem; + +/** + * @brief Open-addressing hashset with linear probing. Preserves insertion + * order. + * + * @warning From the user's perspective, the inside of this structure should + * not be directly accessed. Directly modifying any of the fields should be + * considered undefined behavior. + */ +typedef struct bf_hashset +{ + /// Backing array of slot pointers. + bf_hashset_elem **slots; + /// Number of allocated slots. + size_t cap; + /// Number of live elements (not counting tombstones). + size_t len; + /// Number of occupied + tombstone slots (used for load factor). + size_t slots_in_use; + /// Callbacks for hashing, comparing, and freeing elements. + bf_hashset_ops ops; + /// Opaque context pointer passed to every callback. + void *ctx; + /// First element in insertion order, or NULL if empty. + bf_hashset_elem *head; + /// Last element in insertion order, or NULL if empty. + bf_hashset_elem *tail; +} bf_hashset; + +/** + * @brief Compound literal for stack-initialising an empty hashset. + * + * @param ops_ptr Pointer to a `bf_hashset_ops` struct. Must be non-NULL. + * @param ctx_ptr Opaque context pointer. Can be NULL. + * @return An initialised, empty `bf_hashset` value. + */ +#define bf_hashset_default(ops_ptr, ctx_ptr) \ + ((bf_hashset) {.ops = *(ops_ptr), .ctx = (ctx_ptr)}) + +#define _free_bf_hashset_ __attribute__((cleanup(bf_hashset_free))) +#define _clean_bf_hashset_ __attribute__((cleanup(bf_hashset_clean))) + +/** + * @brief Iterate over all elements in a hashset in insertion order. + * + * Unsafe to add elements during iteration (can cause rehashing). + * Safe to `bf_hashset_delete` the current element during iteration. + * + * @param set Pointer to the hashset. Must be non-NULL. + * @param elem_var Name of the `bf_hashset_elem *` variable to hold each + * element. Access the stored data via `elem_var->data`. + */ +#define bf_hashset_foreach(set, elem_var) \ + for (bf_hashset_elem * (elem_var) = (set)->head, \ + *__next = (set)->head ? (set)->head->next : NULL; \ + (elem_var); \ + (elem_var) = __next, __next = __next ? __next->next : NULL) + +/** + * @brief Allocate and initialise a new hashset. + * + * @param set Set to allocate and initialise. Can't be NULL. + * @param ops Callbacks for hashing, comparing, and freeing elements. + * `hash` and `equal` must be non-NULL. Can't be NULL. + * @param ctx Opaque context pointer passed to every callback. Can be NULL. + * @return 0 on success, or a negative errno value on failure. + */ +int bf_hashset_new(bf_hashset **set, const bf_hashset_ops *ops, void *ctx); + +/** + * @brief Free a hashset. + * + * @param set Pointer to the hashset pointer. Must be non-NULL. + */ +void bf_hashset_free(bf_hashset **set); + +/** + * @brief Initialise a hashset in place. + * + * @param set Set to initialise. Can't be NULL. + * @param ops Callbacks for hashing, comparing, and freeing elements. + * `hash` and `equal` must be non-NULL. Can't be NULL. + * @param ctx Opaque context pointer passed to every callback. Can be NULL. + */ +void bf_hashset_init(bf_hashset *set, const bf_hashset_ops *ops, void *ctx); + +/** + * @brief Clean up a hashset, freeing all elements and the backing buffer. + * + * After this call the hashset is empty and can be reused or discarded. + * + * @param set Pointer to the hashset. Must be non-NULL. + */ +void bf_hashset_clean(bf_hashset *set); + +/** + * @brief Get the number of elements in the hashset. + * + * @param set Initialised hashset. Must be non-NULL. + * @return Number of elements stored. + */ +size_t bf_hashset_size(const bf_hashset *set); + +/** + * @brief Get the current number of slots. + * + * @param set Initialised hashset. Must be non-NULL. + * @return Current slot count. + */ +size_t bf_hashset_cap(const bf_hashset *set); + +/** + * @brief Check if the hashset is empty. + * + * @param set Initialised hashset. Must be non-NULL. + * @return True if the hashset has no elements. + */ +bool bf_hashset_is_empty(const bf_hashset *set); + +/** + * @brief Pre-allocate capacity for at least `count` elements. + * + * Ensures the backing array is large enough to hold `count` elements without + * exceeding the load factor. If the current capacity is already sufficient, + * this is a no-op. Existing elements are preserved. + * + * @param set Initialised hashset. Must be non-NULL. + * @param count Expected number of elements. + * @return 0 on success, or a negative errno value on failure. + */ +int bf_hashset_reserve(bf_hashset *set, size_t count); + +/** + * @brief Insert an element into the hashset. + * + * If the element already exists, no duplicate is added and `-EEXIST` is + * returned; ownership of `data` is not transferred in that case. + * The hashset grows automatically when the load factor is exceeded. + * On successful insertion, the hashset takes ownership of `data` and + * appends it to the end of the iteration order. + * + * @param set Initialised hashset. Can't be NULL. + * @param data Pointer to store. Can't be NULL. + * @return 0 on success, or a negative errno value on failure. + */ +int bf_hashset_add(bf_hashset *set, void *data); + +/** + * @brief Check whether an element exists in the hashset. + * + * @param set Initialised hashset. Must be non-NULL. + * @param data Element to look up. Must be non-NULL. + * @return True if a matching element is present. + */ +bool bf_hashset_contains(const bf_hashset *set, const void *data); + +/** + * @brief Delete an element from the hashset. + * + * The slot is marked as a tombstone and the element is unlinked from the + * iteration order. The element is freed using the `free` callback if one + * was provided. + * + * @param set Initialised hashset. Must be non-NULL. + * @param data Element to delete. Must be non-NULL. + * @return 0 on success, -ENOENT if the element is not found, or a negative + * errno value on failure. + */ +int bf_hashset_delete(bf_hashset *set, const void *data); diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt index fbbb898d5..eb2070ebd 100644 --- a/tests/unit/CMakeLists.txt +++ b/tests/unit/CMakeLists.txt @@ -82,6 +82,7 @@ bf_add_c_test(unit libbpfilter/helper.c) bf_add_c_test(unit libbpfilter/hook.c) bf_add_c_test(unit libbpfilter/if.c) bf_add_c_test(unit libbpfilter/io.c) +bf_add_c_test(unit libbpfilter/core/hashset.c) bf_add_c_test(unit libbpfilter/core/list.c) bf_add_c_test(unit libbpfilter/logger.c) bf_add_c_test(unit libbpfilter/matcher.c) diff --git a/tests/unit/libbpfilter/core/hashset.c b/tests/unit/libbpfilter/core/hashset.c new file mode 100644 index 000000000..b783043bb --- /dev/null +++ b/tests/unit/libbpfilter/core/hashset.c @@ -0,0 +1,431 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + */ + +#include +#include + +#include "test.h" + +static uint64_t _bf_uint32_hash(const void *data, void *ctx) +{ + (void)ctx; + return bf_fnv1a(data, sizeof(uint32_t), BF_FNV1A_INIT); +} + +static bool _bf_uint32_equal(const void *lhs, const void *rhs, void *ctx) +{ + (void)ctx; + return *(const uint32_t *)lhs == *(const uint32_t *)rhs; +} + +static void _bf_uint32_free(void **data, void *ctx) +{ + (void)ctx; + freep((void *)data); +} + +static const bf_hashset_ops _bf_uint32_ops = { + .hash = _bf_uint32_hash, + .equal = _bf_uint32_equal, + .free = _bf_uint32_free, +}; + +/* Hash function that always returns 0, forcing every element into the same + * bucket so that all insertions collide and exercise linear probing. */ +static uint64_t _bf_uint32_collide_hash(const void *data, void *ctx) +{ + (void)data; + (void)ctx; + return 0; +} + +static const bf_hashset_ops _bf_uint32_collide_ops = { + .hash = _bf_uint32_collide_hash, + .equal = _bf_uint32_equal, + .free = _bf_uint32_free, +}; + +static uint32_t *_make_u32(uint32_t val) +{ + uint32_t *p = malloc(sizeof(*p)); + assert_non_null(p); + *p = val; + return p; +} + +static void new_and_free(void **state) +{ + _free_bf_hashset_ bf_hashset *set = NULL; + + (void)state; + + assert_ok(bf_hashset_new(&set, &_bf_uint32_ops, NULL)); + assert_non_null(set); + assert_true(bf_hashset_is_empty(set)); + bf_hashset_free(&set); + assert_null(set); + + // Auto-free via cleanup attribute + assert_ok(bf_hashset_new(&set, &_bf_uint32_ops, NULL)); +} + +static void init_and_clean(void **state) +{ + _clean_bf_hashset_ bf_hashset set; + + (void)state; + + bf_hashset_init(&set, &_bf_uint32_ops, NULL); + assert_true(bf_hashset_is_empty(&set)); + assert_int_equal(bf_hashset_size(&set), 0); + assert_int_equal(bf_hashset_cap(&set), 0); +} + +static void add_and_contains(void **state) +{ + _clean_bf_hashset_ bf_hashset set; + uint32_t key; + + (void)state; + + bf_hashset_init(&set, &_bf_uint32_ops, NULL); + + key = 42; + assert_false(bf_hashset_contains(&set, &key)); + assert_ok(bf_hashset_add(&set, _make_u32(42))); + assert_int_equal(bf_hashset_size(&set), 1); + assert_true(bf_hashset_contains(&set, &key)); +} + +static void add_multiple(void **state) +{ + _clean_bf_hashset_ bf_hashset set; + + (void)state; + + bf_hashset_init(&set, &_bf_uint32_ops, NULL); + + for (uint32_t i = 0; i < 5; ++i) + assert_ok(bf_hashset_add(&set, _make_u32(i * 100))); + + assert_int_equal(bf_hashset_size(&set), 5); + + for (uint32_t i = 0; i < 5; ++i) { + uint32_t key = i * 100; + assert_true(bf_hashset_contains(&set, &key)); + } +} + +static void add_duplicate(void **state) +{ + _clean_bf_hashset_ bf_hashset set; + uint32_t *dup; + + (void)state; + + bf_hashset_init(&set, &_bf_uint32_ops, NULL); + + assert_ok(bf_hashset_add(&set, _make_u32(42))); + + // Duplicate; hashset returns -EEXIST and doesn't take ownership + dup = _make_u32(42); + assert_int_equal(bf_hashset_add(&set, dup), -EEXIST); + assert_int_equal(bf_hashset_size(&set), 1); + free(dup); +} + +static void remove_elem(void **state) +{ + _clean_bf_hashset_ bf_hashset set; + uint32_t key; + + (void)state; + + bf_hashset_init(&set, &_bf_uint32_ops, NULL); + + assert_ok(bf_hashset_add(&set, _make_u32(10))); + assert_ok(bf_hashset_add(&set, _make_u32(20))); + assert_int_equal(bf_hashset_size(&set), 2); + + key = 10; + assert_ok(bf_hashset_delete(&set, &key)); + assert_int_equal(bf_hashset_size(&set), 1); + assert_false(bf_hashset_contains(&set, &key)); + + key = 20; + assert_true(bf_hashset_contains(&set, &key)); + + key = 99; + assert_int_equal(bf_hashset_delete(&set, &key), -ENOENT); + assert_int_equal(bf_hashset_size(&set), 1); +} + +static void remove_and_readd(void **state) +{ + _clean_bf_hashset_ bf_hashset set; + uint32_t key; + + (void)state; + + bf_hashset_init(&set, &_bf_uint32_ops, NULL); + + assert_ok(bf_hashset_add(&set, _make_u32(42))); + + key = 42; + assert_ok(bf_hashset_delete(&set, &key)); + assert_false(bf_hashset_contains(&set, &key)); + + assert_ok(bf_hashset_add(&set, _make_u32(42))); + assert_int_equal(bf_hashset_size(&set), 1); + assert_true(bf_hashset_contains(&set, &key)); +} + +static void foreach(void **state) +{ + _clean_bf_hashset_ bf_hashset set; + uint32_t expected[] = {100, 200, 300, 400, 500}; + size_t idx; + size_t count; + + (void)state; + + bf_hashset_init(&set, &_bf_uint32_ops, NULL); + + // foreach on empty set does nothing + count = 0; + bf_hashset_foreach (&set, elem) { + (void)elem; + ++count; + } + assert_int_equal(count, 0); + + // Insertion order is preserved + for (uint32_t i = 0; i < 5; ++i) + assert_ok(bf_hashset_add(&set, _make_u32(expected[i]))); + + idx = 0; + bf_hashset_foreach (&set, elem) { + assert_int_equal(*(uint32_t *)elem->data, expected[idx]); + ++idx; + } + assert_int_equal(idx, 5); +} + +static void foreach_after_removal(void **state) +{ + _clean_bf_hashset_ bf_hashset set; + uint32_t expected[] = {1, 2, 4, 5}; + uint32_t key; + size_t idx = 0; + + (void)state; + + bf_hashset_init(&set, &_bf_uint32_ops, NULL); + + for (uint32_t i = 1; i <= 5; ++i) + assert_ok(bf_hashset_add(&set, _make_u32(i))); + + key = 3; + assert_ok(bf_hashset_delete(&set, &key)); + + // Removed element is skipped, order preserved + bf_hashset_foreach (&set, elem) { + assert_int_equal(*(uint32_t *)elem->data, expected[idx]); + ++idx; + } + assert_int_equal(idx, 4); +} + +static void foreach_break(void **state) +{ + _clean_bf_hashset_ bf_hashset set; + size_t count; + + (void)state; + + bf_hashset_init(&set, &_bf_uint32_ops, NULL); + + for (uint32_t i = 0; i < 3; ++i) + assert_ok(bf_hashset_add(&set, _make_u32(i + 1))); + + count = 0; + bf_hashset_foreach (&set, elem) { + (void)elem; + ++count; + break; + } + assert_int_equal(count, 1); +} + +static void foreach_remove(void **state) +{ + _clean_bf_hashset_ bf_hashset set; + size_t count = 0; + + (void)state; + + bf_hashset_init(&set, &_bf_uint32_ops, NULL); + + for (uint32_t i = 1; i <= 5; ++i) + assert_ok(bf_hashset_add(&set, _make_u32(i))); + + // Remove every element during iteration + bf_hashset_foreach (&set, elem) { + assert_ok(bf_hashset_delete(&set, elem->data)); + ++count; + } + + assert_int_equal(count, 5); + assert_true(bf_hashset_is_empty(&set)); +} + +static void reserve(void **state) +{ + _clean_bf_hashset_ bf_hashset set; + + (void)state; + + bf_hashset_init(&set, &_bf_uint32_ops, NULL); + + assert_ok(bf_hashset_reserve(&set, 0)); + assert_int_equal(bf_hashset_cap(&set), 0); + + assert_ok(bf_hashset_reserve(&set, 100)); + assert_true(bf_hashset_cap(&set) >= 200); + + size_t cap_after = bf_hashset_cap(&set); + for (uint32_t i = 0; i < 100; ++i) + assert_ok(bf_hashset_add(&set, _make_u32(i))); + assert_int_equal(bf_hashset_cap(&set), cap_after); + assert_int_equal(bf_hashset_size(&set), 100); + + assert_ok(bf_hashset_reserve(&set, 10)); + assert_int_equal(bf_hashset_cap(&set), cap_after); +} + +static void grow(void **state) +{ + _clean_bf_hashset_ bf_hashset set; + size_t idx; + + (void)state; + + bf_hashset_init(&set, &_bf_uint32_ops, NULL); + assert_true(bf_hashset_cap(&set) <= 16); + + for (uint32_t i = 0; i < 20; ++i) + assert_ok(bf_hashset_add(&set, _make_u32(i))); + + assert_int_equal(bf_hashset_size(&set), 20); + assert_true(bf_hashset_cap(&set) > 16); + + for (uint32_t i = 0; i < 20; ++i) { + uint32_t key = i; + assert_true(bf_hashset_contains(&set, &key)); + } + + // Insertion order is preserved across grow + idx = 0; + bf_hashset_foreach (&set, elem) { + assert_int_equal(*(uint32_t *)elem->data, idx); + ++idx; + } + assert_int_equal(idx, 20); +} + +static void collisions(void **state) +{ + _clean_bf_hashset_ bf_hashset set; + uint32_t key; + size_t idx; + + (void)state; + + bf_hashset_init(&set, &_bf_uint32_collide_ops, NULL); + + for (uint32_t i = 1; i <= 6; ++i) + assert_ok(bf_hashset_add(&set, _make_u32(i * 10))); + + assert_int_equal(bf_hashset_size(&set), 6); + + for (uint32_t i = 1; i <= 6; ++i) { + uint32_t key = i * 10; + assert_true(bf_hashset_contains(&set, &key)); + } + + // Insertion order preserved despite all hashes colliding + idx = 0; + uint32_t expected_before[] = {10, 20, 30, 40, 50, 60}; + bf_hashset_foreach (&set, elem) { + assert_int_equal(*(uint32_t *)elem->data, expected_before[idx]); + ++idx; + } + assert_int_equal(idx, 6); + + // Delete from the middle of the probe chain to create tombstones + key = 20; + assert_ok(bf_hashset_delete(&set, &key)); + key = 40; + assert_ok(bf_hashset_delete(&set, &key)); + + assert_int_equal(bf_hashset_size(&set), 4); + + // Lookups must still find elements past the tombstones + key = 10; + assert_true(bf_hashset_contains(&set, &key)); + key = 30; + assert_true(bf_hashset_contains(&set, &key)); + key = 50; + assert_true(bf_hashset_contains(&set, &key)); + key = 60; + assert_true(bf_hashset_contains(&set, &key)); + + // Deleted elements must not be found + key = 20; + assert_false(bf_hashset_contains(&set, &key)); + key = 40; + assert_false(bf_hashset_contains(&set, &key)); + + // Re-add into tombstone slots + assert_ok(bf_hashset_add(&set, _make_u32(20))); + assert_ok(bf_hashset_add(&set, _make_u32(40))); + assert_int_equal(bf_hashset_size(&set), 6); + + for (uint32_t i = 1; i <= 6; ++i) { + key = i * 10; + assert_true(bf_hashset_contains(&set, &key)); + } + + // Insertion order: 10,30,50,60 (survivors) then 20,40 (re-added) + uint32_t expected_after[] = {10, 30, 50, 60, 20, 40}; + idx = 0; + bf_hashset_foreach (&set, elem) { + assert_int_equal(*(uint32_t *)elem->data, expected_after[idx]); + ++idx; + } + assert_int_equal(idx, 6); +} + +int main(void) +{ + const struct CMUnitTest tests[] = { + cmocka_unit_test(new_and_free), + cmocka_unit_test(init_and_clean), + cmocka_unit_test(add_and_contains), + cmocka_unit_test(add_multiple), + cmocka_unit_test(add_duplicate), + cmocka_unit_test(remove_elem), + cmocka_unit_test(remove_and_readd), + cmocka_unit_test(foreach), + cmocka_unit_test(foreach_after_removal), + cmocka_unit_test(foreach_break), + cmocka_unit_test(foreach_remove), + cmocka_unit_test(reserve), + cmocka_unit_test(grow), + cmocka_unit_test(collisions), + }; + + return cmocka_run_group_tests(tests, NULL, NULL); +} From 3fd44fb8d64b62ac55ec063315374656c533e677 Mon Sep 17 00:00:00 2001 From: Pawel Zmarzly Date: Tue, 17 Feb 2026 01:31:37 +0000 Subject: [PATCH 3/3] lib: set: use bf_hashset for elems field --- src/bfcli/print.c | 10 +-- src/libbpfilter/cgen/prog/map.c | 2 +- src/libbpfilter/cgen/program.c | 8 +- src/libbpfilter/include/bpfilter/set.h | 13 ++- src/libbpfilter/set.c | 112 +++++++++++++------------ tests/harness/test.c | 16 ++-- tests/harness/test.h | 2 +- tests/unit/libbpfilter/set.c | 74 ++++++++++++---- 8 files changed, 143 insertions(+), 94 deletions(-) diff --git a/src/bfcli/print.c b/src/bfcli/print.c index b920dc21d..6860cecf7 100644 --- a/src/bfcli/print.c +++ b/src/bfcli/print.c @@ -171,16 +171,15 @@ void bfc_chain_dump(struct bf_chain *chain, struct bf_hookopts *hookopts, } (void)fprintf(stdout, ") in {\n"); - bf_list_foreach (&set->elems, elem_node) { + bf_hashset_foreach (&set->elems, node) { uint32_t payload_idx = 0; - void *payload = bf_list_node_get_data(elem_node); (void)fprintf(stdout, " "); for (size_t i = 0; i < set->n_comps; ++i) { const struct bf_matcher_meta *meta = bf_matcher_get_meta(set->key[i]); - meta->ops[BF_MATCHER_IN].print(payload + payload_idx); + meta->ops[BF_MATCHER_IN].print(node->data + payload_idx); payload_idx += meta->ops[BF_MATCHER_IN].ref_payload_size; if (i != set->n_comps - 1) @@ -220,16 +219,15 @@ void bfc_chain_dump(struct bf_chain *chain, struct bf_hookopts *hookopts, } else { (void)fprintf(stdout, ") in {\n"); - bf_list_foreach (&set->elems, elem_node) { + bf_hashset_foreach (&set->elems, node) { uint32_t payload_idx = 0; - void *payload = bf_list_node_get_data(elem_node); (void)fprintf(stdout, " "); for (size_t i = 0; i < set->n_comps; ++i) { const struct bf_matcher_meta *meta = bf_matcher_get_meta(set->key[i]); - meta->ops[BF_MATCHER_IN].print(payload + + meta->ops[BF_MATCHER_IN].print(node->data + payload_idx); payload_idx += meta->ops[BF_MATCHER_IN].ref_payload_size; diff --git a/src/libbpfilter/cgen/prog/map.c b/src/libbpfilter/cgen/prog/map.c index 42e9d238e..dab5a1d4a 100644 --- a/src/libbpfilter/cgen/prog/map.c +++ b/src/libbpfilter/cgen/prog/map.c @@ -220,7 +220,7 @@ int bf_map_new_from_set(struct bf_map **map, const char *name, return _bf_map_new(map, name, BF_MAP_TYPE_SET, set->use_trie ? BF_BPF_MAP_TYPE_LPM_TRIE : BF_BPF_MAP_TYPE_HASH, - set->elem_size, 1, bf_list_size(&set->elems)); + set->elem_size, 1, bf_hashset_size(&set->elems)); } int bf_map_new_from_pack(struct bf_map **map, int dir_fd, bf_rpack_node_t node) diff --git a/src/libbpfilter/cgen/program.c b/src/libbpfilter/cgen/program.c index 84f8d7b5f..fde5578c1 100644 --- a/src/libbpfilter/cgen/program.c +++ b/src/libbpfilter/cgen/program.c @@ -724,7 +724,7 @@ static int _bf_program_load_sets_maps(struct bf_program *new_prog) _free_bf_map_ struct bf_map *map = NULL; _cleanup_free_ uint8_t *values = NULL; _cleanup_free_ uint8_t *keys = NULL; - size_t nelems = bf_list_size(&set->elems); + size_t nelems = bf_hashset_size(&set->elems); size_t idx = 0; if (!nelems) { @@ -748,10 +748,8 @@ static int _bf_program_load_sets_maps(struct bf_program *new_prog) if (!keys) return bf_err_r(errno, "failed to allocate map keys"); - bf_list_foreach (&set->elems, elem_node) { - void *elem = bf_list_node_get_data(elem_node); - - memcpy(keys + (idx * set->elem_size), elem, set->elem_size); + bf_hashset_foreach (&set->elems, elem) { + memcpy(keys + (idx * set->elem_size), elem->data, set->elem_size); values[idx] = 1; ++idx; } diff --git a/src/libbpfilter/include/bpfilter/set.h b/src/libbpfilter/include/bpfilter/set.h index d0d35d0b1..d5701c94b 100644 --- a/src/libbpfilter/include/bpfilter/set.h +++ b/src/libbpfilter/include/bpfilter/set.h @@ -8,7 +8,7 @@ #include #include -#include +#include #include #include #include @@ -54,7 +54,7 @@ struct bf_set size_t n_comps; /** Elements of the set. */ - bf_list elems; + bf_hashset elems; /** Size of a single element. All elements have the same size, it's derived * from the key. */ @@ -126,6 +126,15 @@ void bf_set_dump(const struct bf_set *set, prefix_t *prefix); */ bool bf_set_is_empty(const struct bf_set *set); +/** + * @brief Pre-allocate capacity for at least `count` elements. + * + * @param set Initialised set. Can't be NULL. + * @param count Expected number of elements. + * @return 0 on success, or a negative errno value on failure. + */ +int bf_set_reserve(struct bf_set *set, size_t count); + int bf_set_add_elem(struct bf_set *set, const void *elem); /** diff --git a/src/libbpfilter/set.c b/src/libbpfilter/set.c index 212c4aa99..314236950 100644 --- a/src/libbpfilter/set.c +++ b/src/libbpfilter/set.c @@ -11,7 +11,7 @@ #include #include -#include "bpfilter/core/list.h" +#include "bpfilter/core/hashset.h" #include "bpfilter/dump.h" #include "bpfilter/helper.h" #include "bpfilter/logger.h" @@ -22,6 +22,28 @@ (BF_FLAGS(BF_MATCHER_IP4_SNET, BF_MATCHER_IP4_DNET, BF_MATCHER_IP6_SNET, \ BF_MATCHER_IP6_DNET)) +static uint64_t _bf_set_elem_hash(const void *data, void *ctx) +{ + return bf_fnv1a(data, *(const size_t *)ctx, BF_FNV1A_INIT); +} + +static bool _bf_set_elem_equal(const void *lhs, const void *rhs, void *ctx) +{ + return memcmp(lhs, rhs, *(const size_t *)ctx) == 0; +} + +static void _bf_set_elem_free(void **data, void *ctx) +{ + (void)ctx; + freep((void *)data); +} + +static const bf_hashset_ops _bf_set_elem_ops = { + .hash = _bf_set_elem_hash, + .equal = _bf_set_elem_equal, + .free = _bf_set_elem_free, +}; + int bf_set_new(struct bf_set **set, const char *name, enum bf_matcher_type *key, size_t n_comps) { @@ -40,7 +62,7 @@ int bf_set_new(struct bf_set **set, const char *name, enum bf_matcher_type *key, BF_SET_MAX_N_COMPS); } - _set = malloc(sizeof(*_set)); + _set = calloc(1, sizeof(*_set)); if (!_set) return -ENOMEM; @@ -54,7 +76,7 @@ int bf_set_new(struct bf_set **set, const char *name, enum bf_matcher_type *key, memcpy(&(_set)->key, key, n_comps * sizeof(enum bf_matcher_type)); _set->n_comps = n_comps; _set->elem_size = 0; - _set->elems = bf_list_default(freep, NULL); + bf_hashset_init(&_set->elems, &_bf_set_elem_ops, &_set->elem_size); for (size_t i = 0; i < n_comps; ++i) { const struct bf_matcher_ops *ops; @@ -194,9 +216,12 @@ int bf_set_add_elem_raw(struct bf_set *set, const char *raw_elem) raw_elem); } - r = bf_list_add_tail(&set->elems, elem); + r = bf_hashset_add(&set->elems, elem); + if (r == -EEXIST) + return 0; if (r) return bf_err_r(r, "failed to insert element into set"); + TAKE_PTR(elem); return 0; @@ -325,7 +350,7 @@ void bf_set_free(struct bf_set **set) if (!*set) return; - bf_list_clean(&(*set)->elems); + bf_hashset_clean(&(*set)->elems); freep((void *)&(*set)->name); freep((void *)set); } @@ -346,8 +371,8 @@ int bf_set_pack(const struct bf_set *set, bf_wpack_t *pack) bf_wpack_close_array(pack); bf_wpack_open_array(pack, "elements"); - bf_list_foreach (&set->elems, elem_node) - bf_wpack_bin(pack, bf_list_node_get_data(elem_node), set->elem_size); + bf_hashset_foreach (&set->elems, elem) + bf_wpack_bin(pack, elem->data, set->elem_size); bf_wpack_close_array(pack); return bf_wpack_is_valid(pack) ? 0 : -EINVAL; @@ -355,6 +380,8 @@ int bf_set_pack(const struct bf_set *set, bf_wpack_t *pack) void bf_set_dump(const struct bf_set *set, prefix_t *prefix) { + size_t dump_idx = 0; + assert(set); assert(prefix); @@ -373,17 +400,17 @@ void bf_set_dump(const struct bf_set *set, prefix_t *prefix) bf_dump_prefix_pop(prefix); DUMP(prefix, "elem_size: %lu", set->elem_size); - DUMP(bf_dump_prefix_last(prefix), "elems: bf_list[%lu]", - bf_list_size(&set->elems)); + DUMP(bf_dump_prefix_last(prefix), "elems: bf_hashset[%lu]", + bf_hashset_size(&set->elems)); bf_dump_prefix_push(prefix); - bf_list_foreach (&set->elems, elem_node) { - if (bf_list_is_tail(&set->elems, elem_node)) + bf_hashset_foreach (&set->elems, elem) { + if (++dump_idx == bf_hashset_size(&set->elems)) bf_dump_prefix_last(prefix); - DUMP(prefix, "void * @ %p", bf_list_node_get_data(elem_node)); + DUMP(prefix, "void * @ %p", elem->data); bf_dump_prefix_push(prefix); - bf_dump_hex(prefix, bf_list_node_get_data(elem_node), set->elem_size); + bf_dump_hex(prefix, elem->data, set->elem_size); bf_dump_prefix_pop(prefix); } bf_dump_prefix_pop(prefix); @@ -405,8 +432,10 @@ int bf_set_add_elem(struct bf_set *set, const void *elem) memcpy(_elem, elem, set->elem_size); - r = bf_list_add_tail(&set->elems, _elem); - if (r < 0) + r = bf_hashset_add(&set->elems, _elem); + if (r == -EEXIST) + return 0; + if (r) return r; TAKE_PTR(_elem); @@ -418,7 +447,14 @@ bool bf_set_is_empty(const struct bf_set *set) { assert(set); - return bf_list_is_empty(&set->elems); + return bf_hashset_is_empty(&set->elems); +} + +int bf_set_reserve(struct bf_set *set, size_t count) +{ + assert(set); + + return bf_hashset_reserve(&set->elems, count); } /** @@ -459,30 +495,10 @@ int bf_set_add_many(struct bf_set *dest, struct bf_set **to_add) if (r) return r; - // @todo This has O(n * m) complexity. We could get to O(n log n + m) by - // turning the linked list into an array and sorting it, but we should - // just replace underlying bf_list with true hashset and enjoy O(m). - bf_list_foreach (&(*to_add)->elems, elem_node) { - void *elem_to_add = bf_list_node_get_data(elem_node); - bool found = false; - - bf_list_foreach (&dest->elems, dest_elem_node) { - const void *dest_elem = bf_list_node_get_data(dest_elem_node); - - if (memcmp(dest_elem, elem_to_add, dest->elem_size) == 0) { - found = true; - break; - } - } - - if (!found) { - r = bf_list_add_tail(&dest->elems, - bf_list_node_get_data(elem_node)); - if (r) - return bf_err_r(r, "failed to add element to set"); - // Take ownership of data to stop to_add cleanup from freeing it. - bf_list_node_take_data(elem_node); - } + bf_hashset_foreach (&(*to_add)->elems, elem) { + r = bf_set_add_elem(dest, elem->data); + if (r) + return bf_err_r(r, "failed to add element to set"); } bf_set_free(to_add); @@ -502,18 +518,10 @@ int bf_set_remove_many(struct bf_set *dest, struct bf_set **to_remove) if (r) return r; - // @todo This has O(n * m) complexity. Could be O(m) if we used hashsets. - bf_list_foreach (&(*to_remove)->elems, elem_node) { - const void *elem_to_remove = bf_list_node_get_data(elem_node); - - bf_list_foreach (&dest->elems, dest_elem_node) { - const void *dest_elem = bf_list_node_get_data(dest_elem_node); - - if (memcmp(dest_elem, elem_to_remove, dest->elem_size) == 0) { - bf_list_delete(&dest->elems, dest_elem_node); - break; - } - } + bf_hashset_foreach (&(*to_remove)->elems, elem) { + r = bf_hashset_delete(&dest->elems, elem->data); + if (r && r != -ENOENT) + return bf_err_r(r, "failed to remove element from set"); } bf_set_free(to_remove); diff --git a/tests/harness/test.c b/tests/harness/test.c index e88088b7e..4c1a85c30 100644 --- a/tests/harness/test.c +++ b/tests/harness/test.c @@ -251,21 +251,17 @@ bool bft_counter_eq(const struct bf_counter *lhs, const struct bf_counter *rhs) return lhs->count == rhs->count && lhs->size == rhs->size; } -bool bft_set_eq(const struct bf_set *lhs, const struct bf_set *rhs) +bool bft_set_eq_ordered(const struct bf_set *lhs, const struct bf_set *rhs) { - const struct bf_list_node *n0, *n1; - - if (bf_list_size(&lhs->elems) != bf_list_size(&rhs->elems)) + if (bf_hashset_size(&lhs->elems) != bf_hashset_size(&rhs->elems)) return false; if (lhs->elem_size != rhs->elem_size) return false; - n0 = bf_list_get_head(&lhs->elems); - n1 = bf_list_get_head(&rhs->elems); - for (; n0 || n1; n0 = bf_list_node_next(n0), n1 = bf_list_node_next(n1)) { - if (0 != memcmp(bf_list_node_get_data(n0), bf_list_node_get_data(n1), - lhs->elem_size)) + for (bf_hashset_elem *n0 = lhs->elems.head, *n1 = rhs->elems.head; n0 || n1; + n0 = n0 ? n0->next : NULL, n1 = n1 ? n1->next : NULL) { + if (!n0 || !n1 || memcmp(n0->data, n1->data, lhs->elem_size) != 0) return false; } @@ -290,7 +286,7 @@ bool bft_chain_equal(const struct bf_chain *chain0, bft_list_eq(&chain0->rules, &chain1->rules, (bft_list_eq_cb)bft_rule_equal) && bft_list_eq(&chain0->sets, &chain1->sets, - (bft_list_eq_cb)bft_set_eq); + (bft_list_eq_cb)bft_set_eq_ordered); } bool bft_rule_equal(const struct bf_rule *rule0, const struct bf_rule *rule1) diff --git a/tests/harness/test.h b/tests/harness/test.h index ff5cfd223..8bde270b4 100644 --- a/tests/harness/test.h +++ b/tests/harness/test.h @@ -81,7 +81,7 @@ struct bf_counter; */ bool bft_list_eq(const bf_list *lhs, const bf_list *rhs, bft_list_eq_cb cb); -bool bft_set_eq(const struct bf_set *lhs, const struct bf_set *rhs); +bool bft_set_eq_ordered(const struct bf_set *lhs, const struct bf_set *rhs); bool bft_counter_eq(const struct bf_counter *lhs, const struct bf_counter *rhs); bool bft_chain_equal(const struct bf_chain *chain0, const struct bf_chain *chain1); diff --git a/tests/unit/libbpfilter/set.c b/tests/unit/libbpfilter/set.c index d582b01ce..d53da2f4c 100644 --- a/tests/unit/libbpfilter/set.c +++ b/tests/unit/libbpfilter/set.c @@ -102,7 +102,7 @@ static void add_elem(void **state) assert_ok(bf_set_new(&set, "test", key, ARRAY_SIZE(key))); assert_ok(bf_set_add_elem(set, &elem)); - assert_int_equal(bf_list_size(&set->elems), 1); + assert_int_equal(bf_hashset_size(&set->elems), 1); } static void add_multiple_elems(void **state) @@ -122,7 +122,44 @@ static void add_multiple_elems(void **state) assert_ok(bf_set_add_elem(set, elem)); } - assert_int_equal(bf_list_size(&set->elems), 5); + assert_int_equal(bf_hashset_size(&set->elems), 5); +} + +static void add_duplicate(void **state) +{ + _free_bf_set_ struct bf_set *set = NULL; + + enum bf_matcher_type key[] = {BF_MATCHER_IP4_SADDR}; + + uint32_t elem = 0x01020304; + + (void)state; + + assert_ok(bf_set_new(&set, "test", key, ARRAY_SIZE(key))); + assert_ok(bf_set_add_elem(set, &elem)); + assert_ok(bf_set_add_elem(set, &elem)); + assert_int_equal(bf_hashset_size(&set->elems), 1); +} + +static void reserve(void **state) +{ + _free_bf_set_ struct bf_set *set = NULL; + + enum bf_matcher_type key[] = {BF_MATCHER_IP4_SADDR}; + + (void)state; + + assert_ok(bf_set_new(&set, "test", key, ARRAY_SIZE(key))); + assert_ok(bf_set_reserve(set, 50)); + assert_true(bf_hashset_cap(&set->elems) >= 100); + + size_t cap_after = bf_hashset_cap(&set->elems); + for (uint32_t i = 0; i < 50; ++i) { + uint32_t addr = 0x0A000001 + i; + assert_ok(bf_set_add_elem(set, &addr)); + } + assert_int_equal(bf_hashset_cap(&set->elems), cap_after); + assert_int_equal(bf_hashset_size(&set->elems), 50); } static void pack_and_unpack(void **state) @@ -150,7 +187,7 @@ static void pack_and_unpack(void **state) assert_ok(bf_rpack_kv_obj(bf_rpack_root(rpack), "set", &node)); assert_ok(bf_set_new_from_pack(&destination, node)); - assert_true(bft_set_eq(source, destination)); + assert_true(bft_set_eq_ordered(source, destination)); } static void pack_and_unpack_empty(void **state) @@ -180,8 +217,8 @@ static void pack_and_unpack_empty(void **state) assert_ok(bf_rpack_kv_obj(bf_rpack_root(rpack), "set", &node)); assert_ok(bf_set_new_from_pack(&destination, node)); - assert_true(bft_set_eq(source, destination)); - assert_int_equal(bf_list_size(&destination->elems), 0); + assert_true(bft_set_eq_ordered(source, destination)); + assert_int_equal(bf_hashset_size(&destination->elems), 0); } static void dump(void **state) @@ -224,7 +261,7 @@ static void new_from_raw(void **state) assert_string_equal(set->name, "test_raw"); assert_int_equal(set->n_comps, 1); assert_int_equal(set->key[0], BF_MATCHER_IP4_SADDR); - assert_int_equal(bf_list_size(&set->elems), 2); + assert_int_equal(bf_hashset_size(&set->elems), 2); } static void new_from_raw_multiple_keys(void **state) @@ -240,7 +277,7 @@ static void new_from_raw_multiple_keys(void **state) assert_int_equal(set->n_comps, 2); assert_int_equal(set->key[0], BF_MATCHER_IP4_DADDR); assert_int_equal(set->key[1], BF_MATCHER_TCP_SPORT); - assert_int_equal(bf_list_size(&set->elems), 2); + assert_int_equal(bf_hashset_size(&set->elems), 2); } static void new_from_raw_invalid(void **state) @@ -280,10 +317,10 @@ static void add_many_basic(void **state) assert_ok(bf_set_add_many(dest, &to_add)); - assert_int_equal(bf_list_size(&dest->elems), 3); - assert_int_equal(*(uint32_t *)bf_list_get_at(&dest->elems, 0), elem1); - assert_int_equal(*(uint32_t *)bf_list_get_at(&dest->elems, 1), elem2); - assert_int_equal(*(uint32_t *)bf_list_get_at(&dest->elems, 2), elem3); + assert_int_equal(bf_hashset_size(&dest->elems), 3); + assert_true(bf_hashset_contains(&dest->elems, &elem1)); + assert_true(bf_hashset_contains(&dest->elems, &elem2)); + assert_true(bf_hashset_contains(&dest->elems, &elem3)); assert_null(to_add); } @@ -347,9 +384,10 @@ static void remove_many_basic(void **state) assert_ok(bf_set_remove_many(dest, &to_remove)); - assert_int_equal(bf_list_size(&dest->elems), 2); - assert_int_equal(*(uint32_t *)bf_list_get_at(&dest->elems, 0), elem1); - assert_int_equal(*(uint32_t *)bf_list_get_at(&dest->elems, 1), elem3); + assert_int_equal(bf_hashset_size(&dest->elems), 2); + assert_true(bf_hashset_contains(&dest->elems, &elem1)); + assert_false(bf_hashset_contains(&dest->elems, &elem2)); + assert_true(bf_hashset_contains(&dest->elems, &elem3)); assert_null(to_remove); } @@ -377,9 +415,9 @@ static void remove_many_disjoint_sets(void **state) assert_ok(bf_set_add_elem(to_remove, &elem4)); assert_ok(bf_set_remove_many(dest, &to_remove)); - assert_int_equal(bf_list_size(&dest->elems), 2); - assert_int_equal(*(uint32_t *)bf_list_get_at(&dest->elems, 0), elem1); - assert_int_equal(*(uint32_t *)bf_list_get_at(&dest->elems, 1), elem2); + assert_int_equal(bf_hashset_size(&dest->elems), 2); + assert_true(bf_hashset_contains(&dest->elems, &elem1)); + assert_true(bf_hashset_contains(&dest->elems, &elem2)); assert_null(to_remove); } @@ -429,6 +467,8 @@ int main(void) cmocka_unit_test(new_with_invalid_network_combination), cmocka_unit_test(add_elem), cmocka_unit_test(add_multiple_elems), + cmocka_unit_test(add_duplicate), + cmocka_unit_test(reserve), cmocka_unit_test(pack_and_unpack), cmocka_unit_test(pack_and_unpack_empty), cmocka_unit_test(dump),