diff --git a/include/array_arena.hpp b/include/array_arena.hpp new file mode 100644 index 0000000..200fa80 --- /dev/null +++ b/include/array_arena.hpp @@ -0,0 +1,428 @@ +#ifndef ARRAY_ARENA_HPP +#define ARRAY_ARENA_HPP + +#include + +#include +#include +#include +#include +#include + +#include "array_ref.hpp" +#include "free_list_arena.hpp" +#include "string_ref.hpp" +#include "value_type.hpp" + +namespace tundradb { + +/** + * Arena for managing variable-length array data. + * + * Memory layout per array allocation: + * ┌──────────────────────────────────────────────────────────────────────┐ + * │ ArrayHeader (24B) │ element data (elem_size x capacity) │ + * │ [ref_count|flags|length| │ [elem0][elem1][...][padding] │ + * │ capacity|arena*] │ │ + * └──────────────────────────────────────────────────────────────────────┘ + * ↑ + * ArrayRef::data_ points here + * + * The header stores a pointer to the owning ArrayArena so that + * ArrayRef::release() can deallocate directly (no global registry). + * + * Thread safety: + * FreeListArena is NOT thread-safe, so all allocations/deallocations + * are protected by arena_mutex_. + */ +class ArrayArena { + public: + /** + * @param initial_size Initial arena size (default: 2 MB) + */ + explicit ArrayArena(size_t initial_size = 2 * 1024 * 1024) + : arena_(std::make_unique(initial_size, 16)) {} + + // Non-copyable (contains mutex) + ArrayArena(const ArrayArena&) = delete; + ArrayArena& operator=(const ArrayArena&) = delete; + + /** + * Allocate a new array with the given element type and capacity. + * Initializes header (ref_count = 0, length = 0, arena = this) + * and zeroes element data. + * The returned ArrayRef constructor bumps ref_count to 1. + * + * @param elem_type Element ValueType (e.g. INT32, DOUBLE, STRING) + * @param capacity Number of element slots to allocate + * @return Ok(ArrayRef) with ref_count = 1, or Error with reason on failure + */ + arrow::Result allocate(ValueType elem_type, uint32_t capacity) { + // capacity 0 is valid: return empty (null) ArrayRef for empty arrays + if (capacity == 0) { + return ArrayRef{}; + } + + const size_t elem_sz = get_type_size(elem_type); + const size_t data_bytes = elem_sz * capacity; + const size_t alloc_size = ArrayRef::HEADER_SIZE + data_bytes; + + std::lock_guard lock(arena_mutex_); + void* raw = arena_->allocate(alloc_size); + if (!raw) { + return arrow::Status::OutOfMemory( + "ArrayArena::allocate: arena allocation failed (requested ", + alloc_size, " bytes)"); + } + + init_header(raw, capacity); + + char* data = static_cast(raw) + ArrayRef::HEADER_SIZE; + zero_init_elements(data, elem_type, capacity); + + return ArrayRef{data, elem_type}; + } + + /** + * Allocate and populate an array from existing data. + * + * @param elem_type Element ValueType + * @param elements Pointer to element data to copy + * @param count Number of elements + * @param capacity Capacity (must be >= count; if 0, uses count) + * @return Ok(ArrayRef) with ref_count = 1 and length = count, or Error + */ + arrow::Result allocate_with_data(ValueType elem_type, + const void* elements, + uint32_t count, + uint32_t capacity = 0) { + if (capacity < count) capacity = count; + // capacity 0 is valid (count 0): return empty ArrayRef + if (capacity == 0) { + return ArrayRef{}; + } + + const size_t elem_sz = get_type_size(elem_type); + const size_t data_bytes = elem_sz * capacity; + const size_t alloc_size = ArrayRef::HEADER_SIZE + data_bytes; + + std::lock_guard lock(arena_mutex_); + void* raw = arena_->allocate(alloc_size); + if (!raw) { + return arrow::Status::OutOfMemory( + "ArrayArena::allocate_with_data: arena allocation failed (requested ", + alloc_size, " bytes)"); + } + + auto* header = init_header(raw, capacity); + header->length = count; + + char* data = static_cast(raw) + ArrayRef::HEADER_SIZE; + copy_init_elements(data, static_cast(elements), elem_type, + count); + if (capacity > count) { + zero_init_elements(data + elem_sz * count, elem_type, capacity - count); + } + + return ArrayRef{data, elem_type}; + } + + /** + * Append one element to an array. Two strategies: + * + * 1. If there's spare capacity (length < capacity), write in-place. + * -> O(1), no allocation. + * + * 2. If full, allocate a new array with 2 x capacity, copy old elements, + * append the new one, and mark the old array for deletion. + * -> Amortized O(1). + * + * IMPORTANT: This mutates the array through `ref`. In versioned storage, + * the caller MUST first copy() the ArrayRef to avoid corrupting older + * versions that share the same underlying data (copy-on-write). + * + * @param ref ArrayRef to append to (updated in-place if reallocated) + * @param element Pointer to the element data to append + * @return Ok on success; Error with reason if ref is null or allocation fails + */ + arrow::Status append(ArrayRef& ref, const void* element) { + if (ref.is_null()) { + return arrow::Status::Invalid( + "ArrayArena::append: ArrayRef is null (cannot append to null ref)"); + } + + auto* header = get_header(ref); + if (!header) { + return arrow::Status::Invalid( + "ArrayArena::append: invalid ArrayRef (header is null)"); + } + + if (header->length < header->capacity) { + char* dest = ref.mutable_element_ptr(header->length); + assign_element(dest, element, ref.elem_type()); + header->length++; + return arrow::Status::OK(); + } + + // Full - reallocate with 2 x capacity using allocate_with_data + // which properly handles ref-counted element types. + const uint32_t new_cap = header->capacity * 2; + const uint32_t old_len = header->length; + + ARROW_ASSIGN_OR_RAISE( + ArrayRef new_ref, + allocate_with_data(ref.elem_type(), ref.data(), old_len, new_cap)); + + assign_element(new_ref.mutable_element_ptr(old_len), element, + ref.elem_type()); + auto* new_header = get_header(new_ref); + new_header->length = old_len + 1; + + header->mark_for_deletion(); + + ref = std::move(new_ref); + return arrow::Status::OK(); + } + + /** + * Create a copy of an existing array (for copy-on-write / versioning). + * + * @param src Source ArrayRef to copy + * @param extra_capacity Additional element slots beyond the original + * capacity. Use this when the caller knows it will append soon, to avoid a + * second reallocation inside append(). + * @return Ok(new ArrayRef) with independent data, or Error with reason + */ + arrow::Result copy(const ArrayRef& src, + uint32_t extra_capacity = 0) { + if (src.is_null()) { + return arrow::Status::Invalid( + "ArrayArena::copy: source ArrayRef is null"); + } + + const auto* header = get_header_const(src); + if (!header) { + return arrow::Status::Invalid( + "ArrayArena::copy: invalid source ArrayRef (header is null)"); + } + + const uint32_t new_capacity = header->capacity + extra_capacity; + return allocate_with_data(src.elem_type(), src.data(), header->length, + new_capacity); + } + + /** + * Mark an array for deferred deletion. + * The actual deallocation happens when the last ArrayRef is destroyed + * and release() is called. + */ + void mark_for_deletion(const ArrayRef& ref) { + if (ref.is_null()) return; + if (auto* h = get_header_mut(ref)) { + h->mark_for_deletion(); + } + } + + /** + * Deallocate an array's memory back to the FreeListArena. + * Called by ArrayRef::release() when ref_count reaches 0 and the + * array is marked for deletion. + * + * For arrays of non-trivial types (STRING, ARRAY), element destructors + * are called first so that nested ref-counted objects are properly released. + * + * @param data Pointer to element data (NOT to header) + * @param elem_type Element type (NA = skip element cleanup) + */ + void release_array(char* data, ValueType elem_type = ValueType::NA) { + if (!data) return; + + auto* header = + reinterpret_cast(data - ArrayRef::HEADER_SIZE); + if (!header->arena) return; // already released + header->arena = nullptr; // prevent double-free + + destruct_elements(data, elem_type, header->length); + + std::lock_guard lock(arena_mutex_); + arena_->deallocate(header); + } + + // ======================================================================== + // Statistics + // ======================================================================== + + size_t get_total_allocated() const { return arena_->get_total_allocated(); } + + size_t get_used_bytes() const { return arena_->get_used_bytes(); } + + size_t get_freed_bytes() const { return arena_->get_freed_bytes(); } + + void reset() { + std::lock_guard lock(arena_mutex_); + arena_->reset(); + } + + void clear() { + std::lock_guard lock(arena_mutex_); + arena_->clear(); + } + + private: + /** Initialize a freshly allocated header block. */ + ArrayRef::ArrayHeader* init_header(void* raw, uint32_t capacity) { + auto* header = static_cast(raw); + header->ref_count.store(0, std::memory_order_relaxed); + header->flags = 0; + header->length = 0; + header->capacity = capacity; + header->arena = this; + return header; + } + + static ArrayRef::ArrayHeader* get_header(const ArrayRef& ref) { + if (ref.is_null()) return nullptr; + return reinterpret_cast(ref.data() - + ArrayRef::HEADER_SIZE); + } + + static const ArrayRef::ArrayHeader* get_header_const(const ArrayRef& ref) { + return get_header(ref); + } + + static ArrayRef::ArrayHeader* get_header_mut(const ArrayRef& ref) { + return get_header(ref); + } + + /** + * Destruct non-trivial elements before freeing array memory. + * + * For STRING elements: marks each string for deletion, then calls + * the destructor. The destructor decrements ref_count; when it hits 0 + * with the deletion flag set, the string memory is freed. + * + * For ARRAY elements: calls the destructor, which triggers the same + * release_array chain recursively. + * + * Primitives (INT32, DOUBLE, etc.) have trivial destructors - skip them. + */ + static void destruct_elements(char* data, ValueType elem_type, + uint32_t count) { + if (count == 0) return; + + if (is_string_type(elem_type)) { + for (uint32_t i = 0; i < count; ++i) { + auto* sr = reinterpret_cast(data + i * sizeof(StringRef)); + if (!sr->is_null()) { + // Must mark for deletion first - StringRef::release() only + // frees memory when BOTH ref_count==0 AND marked_for_deletion. + auto* hdr = reinterpret_cast( + const_cast(sr->data() - StringRef::HEADER_SIZE)); + hdr->mark_for_deletion(); + } + sr->~StringRef(); + } + } else if (is_array_type(elem_type)) { + for (uint32_t i = 0; i < count; ++i) { + auto* ar = reinterpret_cast(data + i * sizeof(ArrayRef)); + if (!ar->is_null()) { + // ArrayRef destructor calls release() which calls release_array + // recursively if this was the last reference. + auto* hdr = reinterpret_cast( + ar->data() - ArrayRef::HEADER_SIZE); + hdr->mark_for_deletion(); + } + ar->~ArrayRef(); + } + } + // Primitives: trivial destructors, nothing to do. + } + + /** + * Copy-construct elements from src to raw (uninitialized) dst memory. + * Uses copy constructors for ref-counted types (StringRef, ArrayRef) + * to properly increment reference counts; memcpy for primitives. + * + * IMPORTANT: dst must be RAW uninitialized memory (no live objects). + */ + static void copy_init_elements(char* dst, const char* src, + ValueType elem_type, uint32_t count) { + if (count == 0) return; + if (is_string_type(elem_type)) { + for (uint32_t i = 0; i < count; ++i) { + const auto* s = + reinterpret_cast(src + i * sizeof(StringRef)); + new (dst + i * sizeof(StringRef)) StringRef(*s); + } + } else if (is_array_type(elem_type)) { + for (uint32_t i = 0; i < count; ++i) { + const auto* a = + reinterpret_cast(src + i * sizeof(ArrayRef)); + new (dst + i * sizeof(ArrayRef)) ArrayRef(*a); + } + } else { + std::memcpy(dst, src, get_type_size(elem_type) * count); + } + } + + /** + * Copy-assign a single element from src to an INITIALIZED dst slot. + * Uses copy assignment for ref-counted types (properly releases old, + * increments new); memcpy for primitives. + */ + static void assign_element(char* dst, const void* src, ValueType elem_type) { + if (is_string_type(elem_type)) { + *reinterpret_cast(dst) = + *reinterpret_cast(src); + } else if (is_array_type(elem_type)) { + *reinterpret_cast(dst) = + *reinterpret_cast(src); + } else { + std::memcpy(dst, src, get_type_size(elem_type)); + } + } + + /** + * Initialize element memory. + * Uses placement-new for types with non-trivial constructors + * (StringRef, ArrayRef); memset(0) for primitives. + */ + static void zero_init_elements(char* data, ValueType elem_type, + uint32_t count) { + if (is_string_type(elem_type)) { + for (uint32_t i = 0; i < count; ++i) { + new (data + i * sizeof(StringRef)) StringRef(); + } + } else if (is_array_type(elem_type)) { + for (uint32_t i = 0; i < count; ++i) { + new (data + i * sizeof(ArrayRef)) ArrayRef(); + } + } else { + std::memset(data, 0, get_type_size(elem_type) * count); + } + } + + std::unique_ptr arena_; + mutable std::mutex arena_mutex_; +}; + +// ============================================================================ +// ArrayRef::release() implementation (after ArrayArena is fully defined) +// ============================================================================ + +inline void ArrayRef::release() { + if (!data_) return; + if (auto* h = get_header()) { + const int32_t old_count = + h->ref_count.fetch_sub(1, std::memory_order_acq_rel); + if (old_count == 1 && h->is_marked_for_deletion() && h->arena) { + h->arena->release_array(data_, elem_type_); + } + } + data_ = nullptr; + elem_type_ = ValueType::NA; +} + +} // namespace tundradb + +#endif // ARRAY_ARENA_HPP diff --git a/include/array_ref.hpp b/include/array_ref.hpp new file mode 100644 index 0000000..7abedd2 --- /dev/null +++ b/include/array_ref.hpp @@ -0,0 +1,232 @@ +#ifndef ARRAY_REF_HPP +#define ARRAY_REF_HPP + +#include +#include +#include +#include + +#include "value_type.hpp" + +namespace tundradb { + +class ArrayArena; // forward declaration + +/** + * Lightweight handle stored in the node's fixed-size slot (16 bytes). + * Same footprint as StringRef. + * + * Memory layout in ArrayArena (FreeListArena): + * ┌────────────┬────────┬──────────┬──────────┬──────────┬───────────────┐ + * │ ref_count │ flags │ length │ capacity │ arena* │ element data │ + * │ (4B) │ (4B) │ (4B) │ (4B) │ (8B) │ (elem*cap) │ + * └────────────┴────────┴──────────┴──────────┴──────────┴───────────────┘ + * ↑ ↑ + * ArrayHeader (24 bytes) data_ points here + * + * The owning ArrayArena pointer is stored in the header so that + * ArrayRef::release() can deallocate directly without a global registry. + * + * ArrayRef stored in the node slot: + * ┌──────────┬────────────┬─────────┐ + * │ data_ │ elem_type_ │ padding │ + * │ (8B) │ (4B) │ (4B) │ + * └──────────┴────────────┴─────────┘ + * Total: 16 bytes + * + * Supported operations (v1): + * SET field = [values] - replace entire array + * APPEND(field, value) - add element to end + * Random-index delete is NOT supported in v1. + */ +class ArrayRef { + public: + /** + * Header stored in ArrayArena memory BEFORE the element data. + * All mutable shared state lives here - ArrayRef delegates to it. + * The arena pointer allows release() to deallocate without a registry. + */ + struct ArrayHeader { + std::atomic ref_count; // 4 bytes + uint32_t flags; // 4 bytes - bit 0: marked_for_deletion + uint32_t length; // 4 bytes - current element count + uint32_t capacity; // 4 bytes - allocated element slots + ArrayArena* arena; // 8 bytes - owning arena (for dealloc) + + [[nodiscard]] bool is_marked_for_deletion() const { + return (flags & 0x1) != 0; + } + void mark_for_deletion() { flags |= 0x1; } + }; + + static constexpr size_t HEADER_SIZE = sizeof(ArrayHeader); // 24 bytes + + // ======================================================================== + // CONSTRUCTORS AND DESTRUCTOR + // ======================================================================== + + /** Default constructor - creates a null/empty reference. */ + ArrayRef() : data_(nullptr), elem_type_(ValueType::NA) {} + + /** + * Internal constructor used by ArrayArena. + * Automatically increments the reference count. + * + * @param data Pointer to first element in arena (NOT to header) + * @param elem_type Element ValueType (e.g. INT64, DOUBLE, STRING) + */ + ArrayRef(char* data, ValueType elem_type) + : data_(data), elem_type_(elem_type) { + if (data_) { + if (auto* h = get_header()) { + h->ref_count.fetch_add(1, std::memory_order_relaxed); + } + } + } + + /** Copy constructor - increments reference count. */ + ArrayRef(const ArrayRef& other) + : data_(other.data_), elem_type_(other.elem_type_) { + if (data_) { + if (auto* h = get_header()) { + h->ref_count.fetch_add(1, std::memory_order_relaxed); + } + } + } + + /** Move constructor - transfers ownership without changing ref count. */ + ArrayRef(ArrayRef&& other) noexcept + : data_(other.data_), elem_type_(other.elem_type_) { + other.data_ = nullptr; + other.elem_type_ = ValueType::NA; + } + + /** Copy assignment - properly handles reference counting. */ + ArrayRef& operator=(const ArrayRef& other) { + if (this != &other) { + release(); + data_ = other.data_; + elem_type_ = other.elem_type_; + if (data_) { + if (auto* h = get_header()) { + h->ref_count.fetch_add(1, std::memory_order_relaxed); + } + } + } + return *this; + } + + /** Move assignment - transfers ownership. */ + ArrayRef& operator=(ArrayRef&& other) noexcept { + if (this != &other) { + release(); + data_ = other.data_; + elem_type_ = other.elem_type_; + other.data_ = nullptr; + other.elem_type_ = ValueType::NA; + } + return *this; + } + + ~ArrayRef() { release(); } + + // ======================================================================== + // PUBLIC INTERFACE - delegates to header for mutable state + // ======================================================================== + + /** Pointer to the first element in arena memory. */ + [[nodiscard]] char* data() const { return data_; } + + /** Element type (e.g. INT64, DOUBLE, STRING). */ + [[nodiscard]] ValueType elem_type() const { return elem_type_; } + + /** Current number of elements (reads from shared header). */ + [[nodiscard]] uint32_t length() const { + const auto* h = get_header(); + return h ? h->length : 0; + } + + /** Allocated element slots (reads from shared header). */ + [[nodiscard]] uint32_t capacity() const { + const auto* h = get_header(); + return h ? h->capacity : 0; + } + + /** True if this reference points to no data. */ + [[nodiscard]] bool is_null() const { return data_ == nullptr; } + + /** True if the array has no elements. */ + [[nodiscard]] bool empty() const { return length() == 0; } + + /** Size of one element in bytes. */ + [[nodiscard]] size_t elem_size() const { return get_type_size(elem_type_); } + + /** Pointer to the element at index i. */ + [[nodiscard]] const char* element_ptr(uint32_t i) const { + assert(i < length()); + return data_ + static_cast(i) * elem_size(); + } + + /** Mutable pointer to the element at index i. */ + [[nodiscard]] char* mutable_element_ptr(uint32_t i) const { + assert(i < capacity()); + return data_ + static_cast(i) * elem_size(); + } + + /** True if there's room for at least one more element. */ + [[nodiscard]] bool has_capacity() const { return length() < capacity(); } + + /** Get the current ref count (for debugging). */ + [[nodiscard]] int32_t get_ref_count() const { + const auto* h = get_header(); + return h ? h->ref_count.load(std::memory_order_relaxed) : 0; + } + + /** Check if this array is marked for deferred deletion. */ + [[nodiscard]] bool is_marked_for_deletion() const { + const auto* h = get_header(); + return h && h->is_marked_for_deletion(); + } + + // ======================================================================== + // OPERATORS + // ======================================================================== + + bool operator==(const ArrayRef& other) const { + if (data_ == other.data_) return true; + const uint32_t len = length(); + if (len != other.length() || elem_type_ != other.elem_type_) return false; + if (is_null() && other.is_null()) return true; + if (is_null() || other.is_null()) return false; + return std::memcmp(data_, other.data_, len * elem_size()) == 0; + } + + bool operator!=(const ArrayRef& other) const { return !(*this == other); } + + private: + /** + * Release this reference. + * Decrements ref count; if this was the last reference AND the array + * is marked for deletion, deallocates via the arena stored in the header. + * Implementation in array_arena.hpp to avoid circular dependency. + */ + void release(); + + ArrayHeader* get_header() const { + if (!data_) return nullptr; + return reinterpret_cast(data_ - HEADER_SIZE); + } + + char* data_; // 8 bytes - pointer to element data in ArrayArena + ValueType elem_type_; // 4 bytes - element type + // 4 bytes implicit padding -> total 16 bytes (same as StringRef) + + friend class ArrayArena; +}; + +static_assert(sizeof(ArrayRef) == 16, + "ArrayRef must be 16 bytes (same as StringRef)"); + +} // namespace tundradb + +#endif // ARRAY_REF_HPP diff --git a/include/arrow_utils.hpp b/include/arrow_utils.hpp index c3071ba..5e8ec49 100644 --- a/include/arrow_utils.hpp +++ b/include/arrow_utils.hpp @@ -131,6 +131,19 @@ arrow::Result> create_table_from_nodes( arrow::Result> create_empty_table( const std::shared_ptr& schema); +/** + * @brief Appends an ArrayRef's contents to an Arrow ListBuilder. + * + * Handles null arrays (appends null), empty arrays (appends empty list), + * and populated arrays (appends each element based on elem_type). + * + * @param arr_ref The array reference to append. + * @param list_builder The Arrow ListBuilder to append into. + * @return OK on success, or an error if an element type is unsupported. + */ +arrow::Status append_array_to_list_builder(const ArrayRef& arr_ref, + arrow::ListBuilder* list_builder); + /** * @brief Filters an Arrow table using a WhereExpr predicate. * diff --git a/include/core.hpp b/include/core.hpp index 030d090..da27de6 100644 --- a/include/core.hpp +++ b/include/core.hpp @@ -242,10 +242,10 @@ class Database { /** * @brief Execute an UpdateQuery. * - * Mode 1 — by ID (bare field names): + * Mode 1 - by ID (bare field names): * db.update(UpdateQuery::on("User", 0).set("age", Value(31)).build()); * - * Mode 2 — by MATCH query (alias-qualified SET, multi-schema): + * Mode 2 - by MATCH query (alias-qualified SET, multi-schema): * db.update(UpdateQuery::match( * Query::from("u:User") * .traverse("u", "WORKS_AT", "c:Company") @@ -276,7 +276,7 @@ class Database { UpdateType update_type, UpdateResult &result); /** - * Build an alias→schema mapping from a Query's FROM + TRAVERSE clauses. + * Build an alias->schema mapping from a Query's FROM + TRAVERSE clauses. * Only declarations ("alias:Schema") are recorded; bare references ("alias") * are skipped. Returns an error if the same alias is bound to two different * schemas. diff --git a/include/free_list_arena.hpp b/include/free_list_arena.hpp index 1600441..f13af70 100644 --- a/include/free_list_arena.hpp +++ b/include/free_list_arena.hpp @@ -154,11 +154,11 @@ class FreeListArena : public MemArena { * (same as total_used_) * * Example flow: - * allocate(100) → total_allocated=1024, used_bytes=100, freed_bytes=0 - * allocate(200) → total_allocated=1024, used_bytes=300, freed_bytes=0 - * deallocate(100) → total_allocated=1024, used_bytes=200, freed_bytes=100 - * reset() → total_allocated=1024, used_bytes=0, freed_bytes=0 (chunks kept) - * clear() → total_allocated=0, used_bytes=0, freed_bytes=0 (chunks freed) + * allocate(100) -> total_allocated=1024, used_bytes=100, freed_bytes=0 + * allocate(200) -> total_allocated=1024, used_bytes=300, freed_bytes=0 + * deallocate(100) -> total_allocated=1024, used_bytes=200, freed_bytes=100 + * reset() -> total_allocated=1024, used_bytes=0, freed_bytes=0 (chunks kept) + * clear() -> total_allocated=0, used_bytes=0, freed_bytes=0 (chunks freed) */ // Statistics diff --git a/include/memory_arena.hpp b/include/memory_arena.hpp index 96a5501..fe82772 100644 --- a/include/memory_arena.hpp +++ b/include/memory_arena.hpp @@ -46,7 +46,7 @@ class MemoryArena : public MemArena { */ void* allocate(size_t size, size_t alignment = 8) override { // Calculate aligned offset: rounds current_offset_ up to next multiple of - // alignment Example: current_offset_=5, alignment=8 → aligned_offset=8 + // alignment Example: current_offset_=5, alignment=8 -> aligned_offset=8 // (skips 3 bytes padding) size_t aligned_offset = (current_chunk_ != nullptr) diff --git a/include/metadata.hpp b/include/metadata.hpp index cb53c82..0deb048 100644 --- a/include/metadata.hpp +++ b/include/metadata.hpp @@ -13,6 +13,7 @@ #include "llvm/ADT/SmallVector.h" #include "logger.hpp" #include "schema.hpp" +#include "type_descriptor.hpp" #include "types.hpp" using namespace std::string_literals; @@ -20,15 +21,72 @@ using namespace std::string_literals; namespace tundradb { /** - * @brief A simplified representation of a field schema + * @brief A simplified representation of a field schema. + * + * For non-array types, only `name`, `type`, `nullable` are meaningful. + * For ARRAY types, `element_type` and `fixed_size` carry the array parameters. + * Old JSON files without these fields will deserialize cleanly (defaults). */ struct FieldMetadata { std::string name; - ValueType type; + ValueType type = ValueType::NA; bool nullable = true; + ValueType element_type = ValueType::NA; // for ARRAY + uint32_t fixed_size = 0; // for ARRAY: 0=dynamic + uint32_t max_string_size = 0; // for STRING: 0=unlimited + + /// Build a TypeDescriptor from this metadata. + [[nodiscard]] TypeDescriptor to_type_descriptor() const { + if (type == ValueType::ARRAY) { + return TypeDescriptor::array(element_type, fixed_size); + } + auto td = TypeDescriptor::from_value_type(type); + td.max_string_size = max_string_size; + return td; + } - // Allow JSON serialization/deserialization - NLOHMANN_DEFINE_TYPE_INTRUSIVE(FieldMetadata, name, type, nullable) + /// Build FieldMetadata from a TypeDescriptor. + static FieldMetadata from_type_descriptor(const std::string &field_name, + const TypeDescriptor &td, + bool is_nullable = true) { + FieldMetadata fm; + fm.name = field_name; + fm.type = td.base_type; + fm.nullable = is_nullable; + fm.element_type = td.element_type; + fm.fixed_size = td.fixed_size; + fm.max_string_size = td.max_string_size; + return fm; + } + + // Custom JSON: write all fields; on read, missing array params default to + // 0/NA + friend void to_json(nlohmann::json &j, const FieldMetadata &fm) { + j = nlohmann::json{ + {"name", fm.name}, {"type", fm.type}, {"nullable", fm.nullable}}; + if (fm.type == ValueType::ARRAY) { + j["element_type"] = fm.element_type; + j["fixed_size"] = fm.fixed_size; + } + if (fm.max_string_size > 0) { + j["max_string_size"] = fm.max_string_size; + } + } + + friend void from_json(const nlohmann::json &j, FieldMetadata &fm) { + j.at("name").get_to(fm.name); + j.at("type").get_to(fm.type); + j.at("nullable").get_to(fm.nullable); + if (j.contains("element_type")) { + fm.element_type = j.at("element_type").get(); + } + if (j.contains("fixed_size")) { + fm.fixed_size = j.at("fixed_size").get(); + } + if (j.contains("max_string_size")) { + fm.max_string_size = j.at("max_string_size").get(); + } + } }; /** @@ -44,7 +102,7 @@ struct SchemaMetadata { }; static std::shared_ptr from_metadata(const FieldMetadata &metadata) { - return std::make_shared(metadata.name, metadata.type, + return std::make_shared(metadata.name, metadata.to_type_descriptor(), metadata.nullable); } @@ -71,7 +129,8 @@ inline arrow::Result ArrowFieldToMetadata( result.name = field->name(); result.nullable = field->nullable(); - switch (field->type()->id()) { + const auto &dt = field->type(); + switch (dt->id()) { case arrow::Type::BOOL: result.type = ValueType::BOOL; break; @@ -93,9 +152,31 @@ inline arrow::Result ArrowFieldToMetadata( case arrow::Type::LARGE_STRING: result.type = ValueType::STRING; break; + case arrow::Type::LIST: { + auto list_type = std::static_pointer_cast(dt); + // Recursively determine element type + FieldMetadata elem_meta; + elem_meta.name = "item"; + elem_meta.nullable = list_type->value_field()->nullable(); + auto elem_result = ArrowFieldToMetadata(list_type->value_field()); + if (!elem_result.ok()) return elem_result.status(); + result.type = ValueType::ARRAY; + result.element_type = elem_result.ValueOrDie().type; + result.fixed_size = 0; + break; + } + case arrow::Type::FIXED_SIZE_LIST: { + auto fsl_type = std::static_pointer_cast(dt); + auto elem_result = ArrowFieldToMetadata(fsl_type->value_field()); + if (!elem_result.ok()) return elem_result.status(); + result.type = ValueType::ARRAY; + result.element_type = elem_result.ValueOrDie().type; + result.fixed_size = static_cast(fsl_type->list_size()); + break; + } default: return arrow::Status::NotImplemented("Unsupported Arrow type: ", - field->type()->ToString()); + dt->ToString()); } return result; @@ -108,26 +189,52 @@ inline arrow::Result ArrowFieldToMetadata( * @return arrow::Result> The resulting Arrow * field */ -inline arrow::Result> metadata_to_arrow_field( - const FieldMetadata &metadata) { - std::shared_ptr type; - - switch (metadata.type) { +/// Helper: map a scalar ValueType to an Arrow DataType. +inline std::shared_ptr scalar_vt_to_arrow(ValueType vt) { + switch (vt) { case ValueType::BOOL: - type = arrow::boolean(); - break; + return arrow::boolean(); + case ValueType::INT32: + return arrow::int32(); case ValueType::INT64: - type = arrow::int64(); - break; + return arrow::int64(); + case ValueType::FLOAT: + return arrow::float32(); case ValueType::DOUBLE: - type = arrow::float64(); - break; + return arrow::float64(); case ValueType::STRING: - type = arrow::utf8(); - break; + case ValueType::FIXED_STRING16: + case ValueType::FIXED_STRING32: + case ValueType::FIXED_STRING64: + return arrow::utf8(); default: + return nullptr; + } +} + +inline arrow::Result> metadata_to_arrow_field( + const FieldMetadata &metadata) { + std::shared_ptr type; + + if (metadata.type == ValueType::ARRAY) { + const auto elem_dt = scalar_vt_to_arrow(metadata.element_type); + if (!elem_dt) { + return arrow::Status::NotImplemented( + "Unsupported array element type: ", + static_cast(metadata.element_type)); + } + if (metadata.fixed_size > 0) { + type = arrow::fixed_size_list(arrow::field("item", elem_dt), + static_cast(metadata.fixed_size)); + } else { + type = arrow::list(arrow::field("item", elem_dt)); + } + } else { + type = scalar_vt_to_arrow(metadata.type); + if (!type) { return arrow::Status::NotImplemented("Unsupported ValueType: ", static_cast(metadata.type)); + } } return arrow::field(metadata.name, type, metadata.nullable); diff --git a/include/node.hpp b/include/node.hpp index b764bb1..e43e211 100644 --- a/include/node.hpp +++ b/include/node.hpp @@ -12,14 +12,10 @@ #include "schema.hpp" #include "temporal_context.hpp" #include "types.hpp" +#include "update_type.hpp" namespace tundradb { -enum UpdateType { - SET, - // todo APPEND for List/Array -}; - class Node { private: std::unordered_map data_; @@ -98,7 +94,9 @@ class Node { arrow::Result update(const std::shared_ptr &field, Value value, UpdateType update_type) { if (arena_ != nullptr) { - return arena_->set_field_value(*handle_, layout_, field, value); + ARROW_RETURN_NOT_OK(arena_->set_field_value(*handle_, layout_, field, + value, update_type)); + return true; } if (const auto it = data_.find(field->name()); it == data_.end()) { @@ -106,9 +104,12 @@ class Node { } switch (update_type) { - case SET: + case UpdateType::SET: data_[field->name()] = std::move(value); break; + case UpdateType::APPEND: + return arrow::Status::NotImplemented( + "APPEND not supported in non-arena mode"); } return true; @@ -127,7 +128,8 @@ class Node { if (field_updates.empty()) return true; if (arena_ != nullptr) { - return arena_->update_fields(*handle_, layout_, field_updates); + return arena_->update_fields(*handle_, layout_, field_updates, + update_type); } // Non-arena fallback: update data_ map directly @@ -136,9 +138,12 @@ class Node { return arrow::Status::KeyError("Field not found: ", field->name()); } switch (update_type) { - case SET: + case UpdateType::SET: data_[field->name()] = value; break; + case UpdateType::APPEND: + return arrow::Status::NotImplemented( + "APPEND not supported in non-arena mode"); } } return true; @@ -147,12 +152,12 @@ class Node { [[deprecated]] arrow::Result set_value(const std::string &field, const Value &value) { log_warn("set_value by string is deprecated"); - return update(schema_->get_field(field), value, SET); + return update(schema_->get_field(field), value, UpdateType::SET); } arrow::Result set_value(const std::shared_ptr &field, const Value &value) { - return update(field, value, SET); + return update(field, value, UpdateType::SET); } /** @@ -277,10 +282,8 @@ class NodeManager { // Initial population of v0: write directly to base node // Use set_field_value_v0 for all fields (doesn't create versions) - if (!node_arena_->set_field_value_v0( - node_handle, layout_, schema_->get_field("id"), Value{id})) { - return arrow::Status::Invalid("Failed to set id field"); - } + ARROW_RETURN_NOT_OK(node_arena_->set_field_value_v0( + node_handle, layout_, schema_->get_field("id"), Value{id})); for (const auto &field : schema_->fields()) { if (field->name() == "id") continue; @@ -290,10 +293,8 @@ class NodeManager { value = data.find(field->name())->second; } // else: Value() = NULL - if (!node_arena_->set_field_value_v0(node_handle, layout_, field, - value)) { - return arrow::Status::Invalid("Failed to set field ", field->name()); - } + ARROW_RETURN_NOT_OK(node_arena_->set_field_value_v0( + node_handle, layout_, field, value)); } auto node = std::make_shared( diff --git a/include/node_arena.hpp b/include/node_arena.hpp index e91f45f..aea5e7e 100644 --- a/include/node_arena.hpp +++ b/include/node_arena.hpp @@ -10,6 +10,7 @@ #include #include +#include "array_arena.hpp" #include "clock.hpp" #include "free_list_arena.hpp" #include "mem_arena.hpp" @@ -17,6 +18,7 @@ #include "schema_layout.hpp" #include "string_arena.hpp" #include "types.hpp" +#include "update_type.hpp" namespace tundradb { @@ -336,6 +338,7 @@ class NodeArena { layout_registry_(std::move(layout_registry)), string_arena_(string_arena ? std::move(string_arena) : std::make_unique()), + array_arena_(std::make_unique()), versioning_enabled_(enable_versioning), version_counter_(0) { // Only allocate version arena if versioning is enabled @@ -392,13 +395,12 @@ class NodeArena { return {node_data, node_size, layout->get_schema_name()}; } - /** Deallocate node and its strings. */ + /** Deallocate node and its strings/arrays. */ void deallocate_node(const NodeHandle& handle) const { if (handle.is_null()) { return; } - // First, deallocate all string references from the node if (!handle.schema_name.empty()) { const std::shared_ptr layout = layout_registry_->get_layout(handle.schema_name); @@ -406,21 +408,23 @@ class NodeArena { const char* data_start = static_cast(handle.ptr) + layout->get_data_offset(); for (const auto& field : layout->get_fields()) { + const char* field_ptr = data_start + field.offset; + if (is_string_type(field.type)) { - // Read the StringRef from the node memory (after data offset) - const char* field_ptr = data_start + field.offset; const auto* str_ref = reinterpret_cast(field_ptr); - - // Deallocate the string if it's not null if (!str_ref->is_null()) { string_arena_->mark_for_deletion(*str_ref); } + } else if (is_array_type(field.type)) { + const auto* arr_ref = reinterpret_cast(field_ptr); + if (!arr_ref->is_null()) { + array_arena_->mark_for_deletion(*arr_ref); + } } } } } - // Then deallocate the node memory itself mem_arena_->deallocate(handle.ptr); } @@ -484,7 +488,8 @@ class NodeArena { arrow::Result update_fields( NodeHandle& current_handle, const std::shared_ptr& layout, const std::vector, Value>>& - field_updates) { + field_updates, + UpdateType update_type = UpdateType::SET) { // Convert Field pointers to indices std::vector> indexed_updates; indexed_updates.reserve(field_updates.size()); @@ -497,7 +502,8 @@ class NodeArena { indexed_updates.emplace_back(field_layout->index, value); } - return update_fields_by_index(current_handle, layout, indexed_updates); + return update_fields_by_index(current_handle, layout, indexed_updates, + update_type); } /** @@ -506,19 +512,21 @@ class NodeArena { */ arrow::Result create_new_version( NodeHandle& current_handle, const std::shared_ptr& layout, - uint16_t field_idx, const Value& new_value) { + uint16_t field_idx, const Value& new_value, + UpdateType update_type = UpdateType::SET) { if (field_idx >= layout->get_fields().size()) { return arrow::Status::IndexError("Field index out of bounds"); } const std::vector> updates = { {field_idx, new_value}}; - return update_fields_by_index(current_handle, layout, updates); + return update_fields_by_index(current_handle, layout, updates, update_type); } /** Update multiple fields by index (internal, more efficient). */ arrow::Result update_fields_by_index( NodeHandle& current_handle, const std::shared_ptr& layout, - const std::vector>& field_updates) { + const std::vector>& field_updates, + UpdateType update_type = UpdateType::SET) { if (field_updates.empty()) return true; // Non-versioned: write directly to base node @@ -528,10 +536,8 @@ class NodeArena { return arrow::Status::IndexError("Field index out of bounds"); } const FieldLayout& field_layout = layout->get_fields()[field_idx]; - if (!set_field_value_internal(current_handle.ptr, layout, &field_layout, - value)) { - return arrow::Status::Invalid("Failed to set field value"); - } + ARROW_RETURN_NOT_OK(set_field_value_internal( + current_handle.ptr, layout, &field_layout, value, update_type)); } return true; } @@ -557,20 +563,23 @@ class NodeArena { size_t total_size = 0; size_t max_alignment = 1; - // First pass: calculate total size and max alignment + // For APPEND, we always need space for the ArrayRef slot for (const auto& [field_idx, new_value] : field_updates) { if (field_idx >= layout->get_fields().size()) { return arrow::Status::IndexError("Field index out of bounds"); } - if (!new_value.is_null()) { // NULL uses nullptr sentinel (no allocation) - const FieldLayout& field_layout = layout->get_fields()[field_idx]; + const FieldLayout& field_layout = layout->get_fields()[field_idx]; + if (update_type == UpdateType::APPEND || !new_value.is_null()) { total_size += field_layout.size; max_alignment = std::max(max_alignment, field_layout.alignment); } } - // Batch allocate memory for all non-null fields + // Batch allocate memory for all non-null fields. + // Zero-init so that write_value_to_memory's copy assignment on + // StringRef/ArrayRef slots sees null objects (release() is a no-op on + // null). char* batch_memory = nullptr; if (total_size > 0) { batch_memory = static_cast( @@ -579,6 +588,7 @@ class NodeArena { return arrow::Status::OutOfMemory( "Failed to batch allocate field storage"); } + std::memset(batch_memory, 0, total_size); } // Second pass: write values and assign pointers @@ -586,26 +596,38 @@ class NodeArena { for (const auto& [field_idx, new_value] : field_updates) { const FieldLayout& field_layout = layout->get_fields()[field_idx]; - // Handle NULL: use nullptr sentinel - if (new_value.is_null()) { + // Handle NULL SET: use nullptr sentinel + if (update_type == UpdateType::SET && new_value.is_null()) { new_version_info->updated_fields[field_idx] = nullptr; - continue; // Skip batch_memory usage for NULL fields + continue; } - // At this point, field is non-NULL, so batch_memory must be allocated - // (because total_size > 0 when any field is non-NULL) assert(batch_memory != nullptr && "Batch memory must be allocated for non-null fields"); - // Prepare value (convert strings to StringRef) Value storage_value = new_value; - if (new_value.type() == ValueType::STRING) { - const StringRef str_ref = - string_arena_->store_string_auto(new_value.as_string()); - storage_value = Value{str_ref, field_layout.type}; + + if (update_type == UpdateType::APPEND) { + ARROW_ASSIGN_OR_RAISE(storage_value, + prepare_append_value(current_handle, layout, + field_layout, new_value)); + } else { + // SET: convert raw types to arena-backed refs + if (new_value.type() == ValueType::STRING && + new_value.holds_std_string()) { + ARROW_ASSIGN_OR_RAISE( + StringRef str_ref, + string_arena_->store_string_auto(new_value.as_string())); + storage_value = Value{str_ref, field_layout.type}; + } else if (new_value.type() == ValueType::ARRAY && + new_value.holds_raw_array()) { + ARROW_ASSIGN_OR_RAISE(ArrayRef arr_ref, + store_raw_array(field_layout.type_desc, + new_value.as_raw_array())); + storage_value = Value{std::move(arr_ref)}; + } } - // Use batch-allocated memory (safe because batch_memory != nullptr here) char* field_storage = batch_memory + offset; offset += field_layout.size; @@ -623,19 +645,88 @@ class NodeArena { return true; } + /** + * Prepare a Value for the APPEND operation in versioned path. + * Reads the current ArrayRef (from version chain or base node), + * copies it (COW), appends the new element(s), and returns the new ArrayRef. + */ + arrow::Result prepare_append_value( + const NodeHandle& handle, const std::shared_ptr& layout, + const FieldLayout& field_layout, const Value& new_value) { + if (!is_array_type(field_layout.type)) { + return arrow::Status::TypeError( + "APPEND is only valid for array fields, got: ", + tundradb::to_string(field_layout.type)); + } + + // Read current ArrayRef from the version chain or base node + ArrayRef current_ref; + if (handle.is_versioned()) { + auto [found, ptr] = get_field_ptr_from_version_chain(handle.version_info_, + field_layout.index); + if (found && ptr) { + current_ref = *reinterpret_cast(ptr); + } else if (!found) { + const char* base_ptr = layout->get_field_value_ptr( + static_cast(handle.ptr), field_layout.index); + if (base_ptr) { + current_ref = *reinterpret_cast(base_ptr); + } + } + } + + if (new_value.holds_raw_array()) { + const auto& elems = new_value.as_raw_array(); + if (elems.empty()) { + if (current_ref.is_null()) return Value{ArrayRef{}}; + ARROW_ASSIGN_OR_RAISE(ArrayRef copy, array_arena_->copy(current_ref)); + return Value{std::move(copy)}; + } + if (current_ref.is_null()) { + ARROW_ASSIGN_OR_RAISE(ArrayRef arr_ref, + store_raw_array(field_layout.type_desc, elems)); + return Value{std::move(arr_ref)}; + } + const auto n = static_cast(elems.size()); + ARROW_ASSIGN_OR_RAISE( + ArrayRef new_ref, + array_arena_->copy(current_ref, grow_for_append(current_ref, n))); + for (const auto& elem : elems) { + ARROW_RETURN_NOT_OK( + append_single_element(new_ref, field_layout.type_desc, elem)); + } + return Value{std::move(new_ref)}; + } + + // Single element + if (current_ref.is_null()) { + const std::vector elems = {new_value}; + ARROW_ASSIGN_OR_RAISE(ArrayRef arr_ref, + store_raw_array(field_layout.type_desc, elems)); + return Value{std::move(arr_ref)}; + } + ARROW_ASSIGN_OR_RAISE( + ArrayRef new_ref, + array_arena_->copy(current_ref, grow_for_append(current_ref, 1))); + ARROW_RETURN_NOT_OK( + append_single_element(new_ref, field_layout.type_desc, new_value)); + return Value{std::move(new_ref)}; + } + /** * Set field in v0 (initial population). * Writes to base node without creating versions. */ - bool set_field_value_v0(NodeHandle& handle, - const std::shared_ptr& layout, - const std::shared_ptr& field, - const Value& value) { + arrow::Status set_field_value_v0(NodeHandle& handle, + const std::shared_ptr& layout, + const std::shared_ptr& field, + const Value& value) { assert(!handle.is_null()); const FieldLayout* field_layout = layout->get_field_layout(field); if (!field_layout) { - return false; + return arrow::Status::Invalid( + "set_field_value_v0: field not found in layout"); } // Write directly to base node @@ -646,56 +737,31 @@ class NodeArena { * Set field value. * Creates new version if versioning enabled, direct write otherwise. */ - bool set_field_value(NodeHandle& handle, - const std::shared_ptr& layout, - const std::shared_ptr& field, - const Value& value) { + arrow::Status set_field_value(NodeHandle& handle, + const std::shared_ptr& layout, + const std::shared_ptr& field, + const Value& value, + UpdateType update_type = UpdateType::SET) { assert(!handle.is_null()); const FieldLayout* field_layout = layout->get_field_layout(field); if (!field_layout) { - return false; // Invalid field + return arrow::Status::Invalid( + "set_field_value: field not found in layout"); } // VERSIONED PATH: Create a new version if (versioning_enabled_ && handle.is_versioned()) { - auto result = - create_new_version(handle, layout, field_layout->index, value); - return result.ok() && result.ValueOrDie(); + const std::vector> updates = { + {field_layout->index, value}}; + ARROW_RETURN_NOT_OK( + update_fields_by_index(handle, layout, updates, update_type)); + return arrow::Status::OK(); } - // ======================================================================== - // NON-VERSIONED PATH: Direct write (in-place update) - // ======================================================================== - - // If the field currently contains a string, deallocate it first - if (is_string_type(field_layout->type) && - is_field_set(static_cast(handle.ptr), field_layout->index)) { - const Value old_value = layout->get_field_value( - static_cast(handle.ptr), *field_layout); - if (!old_value.is_null() && old_value.type() != ValueType::NA) { - try { - const StringRef& old_str_ref = old_value.as_string_ref(); - if (!old_str_ref.is_null()) { - string_arena_->mark_for_deletion(old_str_ref); - } - } catch (...) { - // Old value wasn't a StringRef, ignore - } - } - } - - // Handle string storage - if (value.type() == ValueType::STRING) { - const std::string& str_content = value.as_string(); - const StringRef str_ref = string_arena_->store_string_auto(str_content); - return layout->set_field_value(static_cast(handle.ptr), - *field_layout, - Value{str_ref, field_layout->type}); - } else { - return layout->set_field_value(static_cast(handle.ptr), - *field_layout, value); - } + // NON-VERSIONED PATH: direct write via shared implementation + return set_field_value_internal(handle.ptr, layout, field_layout, value, + update_type); } /** Reset arenas (keeps chunks). */ @@ -713,6 +779,9 @@ class NodeArena { /** Get string arena. */ StringArena* get_string_arena() const { return string_arena_.get(); } + /** Get array arena. */ + ArrayArena* get_array_arena() const { return array_arena_.get(); } + // Statistics and getters size_t get_total_allocated() const { return mem_arena_->get_total_allocated(); @@ -791,11 +860,15 @@ class NodeArena { return Clock::instance().now_nanos(); } - /** Write field directly to node memory (handles strings). */ - bool set_field_value_internal(void* node_ptr, - const std::shared_ptr& layout, - const FieldLayout* field_layout, - const Value& value) const { + /** Write field directly to node memory (handles strings/arrays). */ + arrow::Status set_field_value_internal( + void* node_ptr, const std::shared_ptr& layout, + const FieldLayout* field_layout, const Value& value, + UpdateType update_type = UpdateType::SET) { + if (update_type == UpdateType::APPEND) { + return append_to_array_field(node_ptr, layout, field_layout, value); + } + // If the field currently contains a string, deallocate it first if (is_string_type(field_layout->type) && is_field_set(static_cast(node_ptr), field_layout->index)) { @@ -813,16 +886,174 @@ class NodeArena { } } - // Handle string storage - if (value.type() == ValueType::STRING) { + // If the field currently contains an array, mark for deletion + if (is_array_type(field_layout->type) && + is_field_set(static_cast(node_ptr), field_layout->index)) { + Value old_value = + layout->get_field_value(static_cast(node_ptr), *field_layout); + if (!old_value.is_null() && old_value.holds_array_ref()) { + const ArrayRef& old_arr_ref = old_value.as_array_ref(); + if (!old_arr_ref.is_null()) { + array_arena_->mark_for_deletion(old_arr_ref); + } + } + } + + // Handle string storage: std::string -> StringRef via arena + if (value.type() == ValueType::STRING && value.holds_std_string()) { const std::string& str_content = value.as_string(); - const StringRef str_ref = string_arena_->store_string_auto(str_content); - return layout->set_field_value(static_cast(node_ptr), - *field_layout, - Value{str_ref, field_layout->type}); + ARROW_ASSIGN_OR_RAISE(StringRef str_ref, + string_arena_->store_string_auto(str_content)); + if (!layout->set_field_value(static_cast(node_ptr), *field_layout, + Value{str_ref, field_layout->type})) { + return arrow::Status::Invalid("Failed to write string field value"); + } + return arrow::Status::OK(); + } + + // Handle array storage: std::vector -> ArrayRef via arena + if (value.type() == ValueType::ARRAY && value.holds_raw_array()) { + ARROW_ASSIGN_OR_RAISE( + ArrayRef arr_ref, + store_raw_array(field_layout->type_desc, value.as_raw_array())); + if (!layout->set_field_value(static_cast(node_ptr), *field_layout, + Value{std::move(arr_ref)})) { + return arrow::Status::Invalid("Failed to write array field value"); + } + return arrow::Status::OK(); + } + + // Value already holds arena-backed ref (StringRef / ArrayRef) or primitive + if (!layout->set_field_value(static_cast(node_ptr), *field_layout, + value)) { + return arrow::Status::Invalid("Failed to write field value"); + } + return arrow::Status::OK(); + } + + /** + * APPEND implementation for array fields (non-versioned path). + * + * Reads the current ArrayRef, copies it (COW), appends the new element(s), + * marks the old array for deletion, and writes the new ref back. + */ + arrow::Status append_to_array_field( + void* node_ptr, const std::shared_ptr& layout, + const FieldLayout* field_layout, const Value& value) { + if (!is_array_type(field_layout->type)) { + return arrow::Status::TypeError( + "APPEND is only valid for array fields, got: ", + tundradb::to_string(field_layout->type)); + } + + auto* base = static_cast(node_ptr); + const bool field_is_set = is_field_set(base, field_layout->index); + + ArrayRef current_ref; + if (field_is_set) { + Value old_value = layout->get_field_value(base, *field_layout); + if (!old_value.is_null() && old_value.holds_array_ref()) { + current_ref = old_value.as_array_ref(); + } + } + + if (value.holds_raw_array()) { + const auto& elems = value.as_raw_array(); + if (elems.empty()) return arrow::Status::OK(); + + ArrayRef new_ref; + if (current_ref.is_null()) { + ARROW_ASSIGN_OR_RAISE(new_ref, + store_raw_array(field_layout->type_desc, elems)); + } else { + const auto n = static_cast(elems.size()); + ARROW_ASSIGN_OR_RAISE( + new_ref, + array_arena_->copy(current_ref, grow_for_append(current_ref, n))); + for (const auto& elem : elems) { + ARROW_RETURN_NOT_OK( + append_single_element(new_ref, field_layout->type_desc, elem)); + } + array_arena_->mark_for_deletion(current_ref); + } + + if (!layout->set_field_value(base, *field_layout, + Value{std::move(new_ref)})) { + return arrow::Status::Invalid( + "Failed to write array field after APPEND"); + } + return arrow::Status::OK(); + } + + // Single element append + if (current_ref.is_null()) { + const std::vector elems = {value}; + ARROW_ASSIGN_OR_RAISE(ArrayRef new_ref, + store_raw_array(field_layout->type_desc, elems)); + if (!layout->set_field_value(base, *field_layout, + Value{std::move(new_ref)})) { + return arrow::Status::Invalid( + "Failed to write array field after APPEND"); + } + return arrow::Status::OK(); + } + + ARROW_ASSIGN_OR_RAISE( + ArrayRef new_ref, + array_arena_->copy(current_ref, grow_for_append(current_ref, 1))); + ARROW_RETURN_NOT_OK( + append_single_element(new_ref, field_layout->type_desc, value)); + array_arena_->mark_for_deletion(current_ref); + + if (!layout->set_field_value(base, *field_layout, + Value{std::move(new_ref)})) { + return arrow::Status::Invalid("Failed to write array field after APPEND"); + } + return arrow::Status::OK(); + } + + /** + * How many extra slots copy() should pre-allocate so that the + * subsequent append() calls won't trigger a second reallocation. + * Returns 0 when the array already has enough spare capacity. + */ + static uint32_t grow_for_append(const ArrayRef& ref, uint32_t n) { + const uint32_t spare = ref.capacity() - ref.length(); + if (spare >= n) return 0; + return n - spare; + } + + /** Append a single Value element to an ArrayRef via the arena. */ + arrow::Status append_single_element(ArrayRef& ref, + const TypeDescriptor& type_desc, + const Value& elem) { + switch (type_desc.element_type) { + case ValueType::INT32: { + int32_t v = elem.as_int32(); + return array_arena_->append(ref, &v); + } + case ValueType::INT64: { + int64_t v = elem.as_int64(); + return array_arena_->append(ref, &v); + } + case ValueType::DOUBLE: { + double v = elem.as_double(); + return array_arena_->append(ref, &v); + } + case ValueType::BOOL: { + bool v = elem.as_bool(); + return array_arena_->append(ref, &v); + } + case ValueType::STRING: { + ARROW_ASSIGN_OR_RAISE( + StringRef sr, string_arena_->store_string_auto(elem.as_string())); + return array_arena_->append(ref, &sr); + } + default: + return arrow::Status::NotImplemented( + "APPEND: unsupported element type: ", + tundradb::to_string(type_desc.element_type)); } - return layout->set_field_value(static_cast(node_ptr), *field_layout, - value); } /** Traverse the version chain to find field pointer. */ @@ -881,14 +1112,70 @@ class NodeArena { *reinterpret_cast(ptr) = value.as_string_ref(); return true; + case ValueType::ARRAY: + if (value.type() != ValueType::ARRAY) return false; + *reinterpret_cast(ptr) = value.as_array_ref(); + return true; + default: return false; } } + /** + * Convert a raw array (std::vector) to an arena-backed ArrayRef. + * Mirrors what string_arena_->store_string_auto() does for strings. + * + * @param type_desc Field's TypeDescriptor (carries element_type) + * @param elements Raw element values + * @return Ok(ArrayRef) or Error with reason (e.g. allocation failure) + */ + arrow::Result store_raw_array(const TypeDescriptor& type_desc, + const std::vector& elements) { + const ValueType elem_type = type_desc.element_type; + const auto count = static_cast(elements.size()); + + uint32_t capacity = count; + if (type_desc.is_fixed_size_array() && type_desc.fixed_size > count) { + capacity = type_desc.fixed_size; + } + + ARROW_ASSIGN_OR_RAISE(ArrayRef ref, + array_arena_->allocate(elem_type, capacity)); + + // Empty array: allocate(0) returns null ArrayRef; nothing to fill + if (ref.is_null()) { + return ref; + } + + const size_t elem_sz = get_type_size(elem_type); + auto* header = reinterpret_cast( + ref.data() - ArrayRef::HEADER_SIZE); + + for (uint32_t i = 0; i < count; ++i) { + char* dest = ref.mutable_element_ptr(i); + const Value& elem = elements[i]; + + // For string elements, store via string arena first + if (is_string_type(elem_type) && elem.holds_std_string()) { + ARROW_ASSIGN_OR_RAISE( + StringRef str_ref, + string_arena_->store_string_auto(elem.as_string())); + *reinterpret_cast(dest) = std::move(str_ref); + } else { + // Write primitive or pre-allocated ref directly + write_value_to_memory(dest, elem_type, elem); + } + } + + header->length = count; + return ref; + } + std::unique_ptr mem_arena_; std::shared_ptr layout_registry_; std::unique_ptr string_arena_; + std::unique_ptr array_arena_; // Versioning (optional) bool versioning_enabled_; diff --git a/include/query.hpp b/include/query.hpp index 18d4e69..77a5938 100644 --- a/include/query.hpp +++ b/include/query.hpp @@ -645,21 +645,21 @@ struct SetAssignment { * * Two modes: * - * **Mode 1 — by ID** (no query engine involved, bare field names): + * **Mode 1 - by ID** (no query engine involved, bare field names): * @code * UpdateQuery::on("User", 0).set("age", Value(31)).build(); * @endcode * - * **Mode 2 — by MATCH query** (alias-qualified SET fields): + * **Mode 2 - by MATCH query** (alias-qualified SET fields): * @code - * // Simple WHERE — update one schema: + * // Simple WHERE - update one schema: * UpdateQuery::match( * Query::from("u:User") * .where("u.city", CompareOp::Eq, Value("NYC")) * .build() * ).set("u.status", Value("active")).build(); * - * // Traversal — update multiple schemas: + * // Traversal - update multiple schemas: * UpdateQuery::match( * Query::from("u:User") * .traverse("u", "WORKS_AT", "c:Company") @@ -718,12 +718,12 @@ class UpdateQuery { return aliases; } - /** @brief Mode 1 — target a specific node by schema + ID. */ + /** @brief Mode 1 - target a specific node by schema + ID. */ static Builder on(const std::string& schema, int64_t node_id) { return {schema, node_id}; } - /** @brief Mode 2 — target nodes found by a MATCH query. */ + /** @brief Mode 2 - target nodes found by a MATCH query. */ static Builder match(Query query) { return Builder{std::move(query)}; } class Builder { @@ -736,16 +736,31 @@ class UpdateQuery { explicit Builder(Query query) : match_query_(std::move(query)) {} /** - * @brief Add a field assignment. + * @brief Add a SET field assignment. * - * - Mode 1: bare name — set("age", Value(31)) - * - Mode 2: qualified — set("u.age", Value(31)) + * - Mode 1: bare name - set("age", Value(31)) + * - Mode 2: qualified - set("u.age", Value(31)) */ Builder& set(std::string field_name, Value value) { assignments_.emplace_back(std::move(field_name), std::move(value)); return *this; } + /** + * @brief Add an APPEND assignment for an array field. + * + * Sets update_type to APPEND. The value is appended to the existing + * array rather than replacing it. + * + * - Mode 1: bare name - append("tags", Value(std::vector{...})) + * - Mode 2: qualified - append("u.tags", Value(std::vector{...})) + */ + Builder& append(std::string field_name, Value value) { + update_type_ = UpdateType::APPEND; + assignments_.emplace_back(std::move(field_name), std::move(value)); + return *this; + } + /** @brief Override the update type (default: SET). */ Builder& type(UpdateType t) { update_type_ = t; @@ -756,7 +771,7 @@ class UpdateQuery { [[nodiscard]] UpdateQuery build() && { if (assignments_.empty()) { throw std::runtime_error( - "UpdateQuery must have at least one SET assignment"); + "UpdateQuery must have at least one field assignment"); } return UpdateQuery(std::move(schema_), std::move(assignments_), node_id_, std::move(match_query_), update_type_); @@ -766,7 +781,7 @@ class UpdateQuery { [[nodiscard]] UpdateQuery build() & { if (assignments_.empty()) { throw std::runtime_error( - "UpdateQuery must have at least one SET assignment"); + "UpdateQuery must have at least one field assignment"); } return UpdateQuery(schema_, assignments_, node_id_, match_query_, update_type_); diff --git a/include/query_execution.hpp b/include/query_execution.hpp index f7ffe76..d971207 100644 --- a/include/query_execution.hpp +++ b/include/query_execution.hpp @@ -99,7 +99,7 @@ class SchemaContext { : schema_registry_(std::move(registry)) {} /** - * @brief Registers a schema alias (e.g. "u" → "User"). + * @brief Registers a schema alias (e.g. "u" -> "User"). * * @param schema_ref The schema reference containing alias and schema name. * @return The resolved concrete schema name, or an error if unknown. @@ -117,7 +117,7 @@ class SchemaContext { /** @brief Returns the underlying SchemaRegistry. */ std::shared_ptr registry() const { return schema_registry_; } - /** @brief Returns all registered alias→schema mappings. */ + /** @brief Returns all registered alias->schema mappings. */ const std::unordered_map& get_aliases() const { return aliases_; } @@ -233,7 +233,7 @@ class GraphState { node_ids_[schema_ref.value()].erase(node_id); } - /** @brief Returns the outgoing connections map (const). schema → node_id → + /** @brief Returns the outgoing connections map (const). schema -> node_id -> * [connections]. */ const llvm::StringMap< llvm::DenseMap>>& @@ -241,7 +241,7 @@ class GraphState { return outgoing_; } - /** @brief Returns the incoming connections map (const). target_id → + /** @brief Returns the incoming connections map (const). target_id -> * [connections]. */ const llvm::DenseMap>& incoming() const { @@ -256,7 +256,7 @@ class GraphState { /** @brief Returns the connection object pool. */ ConnectionPool& connection_pool() const { return connection_pool_; } - /** @brief Returns the full node-ID map (schema alias → ID set). */ + /** @brief Returns the full node-ID map (schema alias -> ID set). */ llvm::StringMap>& get_ids() { return node_ids_; } /** @overload */ @@ -264,8 +264,8 @@ class GraphState { return node_ids_; } - /** @brief Returns the outgoing connections map (mutable). schema → node_id → - * [connections]. */ + /** @brief Returns the outgoing connections map (mutable). schema -> node_id + * -> [connections]. */ llvm::StringMap< llvm::DenseMap>>& get_outgoing_map() { @@ -349,7 +349,7 @@ class FieldIndexer { return it != field_name_to_index_.end() ? it->second : -1; } - /** @brief Returns the field-ID → name map (const). */ + /** @brief Returns the field-ID -> name map (const). */ const llvm::SmallDenseMap& field_id_to_name() const { return field_id_to_name_; } @@ -362,7 +362,7 @@ class FieldIndexer { return fq_field_names_.contains(schema_alias); } - /** @brief Returns the schema → field-indices map (mutable). */ + /** @brief Returns the schema -> field-indices map (mutable). */ llvm::StringMap>& get_schema_field_indices() { return schema_field_indices_; } @@ -371,7 +371,7 @@ class FieldIndexer { return schema_field_indices_; } - /** @brief Returns the field-ID → name map (mutable). */ + /** @brief Returns the field-ID -> name map (mutable). */ llvm::SmallDenseMap& get_field_id_to_name() { return field_id_to_name_; } @@ -457,12 +457,12 @@ struct QueryState { return schemas.registry(); } - /** @brief Returns all alias→schema mappings. */ + /** @brief Returns all alias->schema mappings. */ const std::unordered_map& aliases() const { return schemas.get_aliases(); } - /** @brief Returns the full schema→ID-set map (mutable). */ + /** @brief Returns the full schema->ID-set map (mutable). */ llvm::StringMap>& ids() { return graph.get_ids(); } /** @overload */ @@ -493,7 +493,7 @@ struct QueryState { return graph.incoming(); } - /** @brief Returns the schema → field-indices map (mutable). */ + /** @brief Returns the schema -> field-indices map (mutable). */ llvm::StringMap>& schema_field_indices() { return fields.get_schema_field_indices(); } @@ -502,7 +502,7 @@ struct QueryState { return fields.get_schema_field_indices(); } - /** @brief Returns the field-ID → name map (mutable). */ + /** @brief Returns the field-ID -> name map (mutable). */ llvm::SmallDenseMap& field_id_to_name() { return fields.get_field_id_to_name(); } diff --git a/include/row.hpp b/include/row.hpp index d6705f4..f95ee91 100644 --- a/include/row.hpp +++ b/include/row.hpp @@ -98,7 +98,7 @@ struct Row { std::vector cells; ///< Cell values indexed by field ID. std::vector path; ///< BFS path that produced this row. std::unordered_map - ids; ///< Lazily-populated schema→node-ID map. + ids; ///< Lazily-populated schema->node-ID map. bool ids_populated = false; ///< Whether @ref ids has been built. /** @brief Constructs an empty row with space for @p max_field_count fields. @@ -155,7 +155,7 @@ struct Row { } /** - * @brief Lazily extracts a schema-name→node-ID map from the "*.id" cells. + * @brief Lazily extracts a schema-name->node-ID map from the "*.id" cells. * * @param field_id_to_name Mapping from field index to fully-qualified name. * @return A reference to the cached schema-ID map. diff --git a/include/schema.hpp b/include/schema.hpp index 9cc879e..5e16d22 100644 --- a/include/schema.hpp +++ b/include/schema.hpp @@ -13,6 +13,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "logger.hpp" +#include "type_descriptor.hpp" #include "types.hpp" namespace tundradb { @@ -21,18 +22,36 @@ struct Field { private: uint32_t index_; std::string name_; - ValueType type_; + TypeDescriptor type_desc_; bool nullable_ = true; friend struct SchemaLayout; + friend class SchemaLayout; public: + /// Construct from a TypeDescriptor (preferred) + Field(std::string name, TypeDescriptor type_desc, bool nullable = true) + : index_(0), + name_(std::move(name)), + type_desc_(type_desc), + nullable_(nullable) {} + + /// Legacy constructor from ValueType (backwards-compatible) Field(std::string name, const ValueType type, bool nullable = true) - : index_(0), name_(std::move(name)), type_(type), nullable_(nullable) {} + : index_(0), + name_(std::move(name)), + type_desc_(TypeDescriptor::from_value_type(type)), + nullable_(nullable) {} [[nodiscard]] const std::string &name() const { return name_; } - [[nodiscard]] const ValueType &type() const { return type_; } + /// Returns the base ValueType for switch-based dispatch. + [[nodiscard]] ValueType type() const { return type_desc_.base_type; } + + /// Returns the full TypeDescriptor (for parameterized types like ARRAY). + [[nodiscard]] const TypeDescriptor &type_descriptor() const { + return type_desc_; + } [[nodiscard]] bool nullable() const { return nullable_; } diff --git a/include/schema_layout.hpp b/include/schema_layout.hpp index e40876d..ef20cbd 100644 --- a/include/schema_layout.hpp +++ b/include/schema_layout.hpp @@ -9,9 +9,11 @@ #include #include +#include "array_ref.hpp" #include "llvm/ADT/StringMap.h" #include "mem_utils.hpp" #include "schema.hpp" +#include "type_descriptor.hpp" #include "types.hpp" namespace tundradb { @@ -47,19 +49,21 @@ inline void set_field_bit(char* base, size_t idx, bool is_set) { struct FieldLayout { const size_t index; std::string name; - ValueType type; - size_t offset; // Byte offset from start of node data - size_t size; // Size in bytes - size_t alignment; // Required alignment - bool nullable; // Whether field can be null + ValueType type; // base type for fast dispatch + TypeDescriptor type_desc; // full type descriptor (carries array params etc.) + size_t offset; // Byte offset from start of node data + size_t size; // Size in bytes + size_t alignment; // Required alignment + bool nullable; // Whether field can be null FieldLayout(const size_t index, std::string field_name, - const ValueType field_type, const size_t field_offset, - const size_t field_size, const size_t field_alignment, - const bool is_nullable = true) + const ValueType field_type, const TypeDescriptor& field_type_desc, + const size_t field_offset, const size_t field_size, + const size_t field_alignment, const bool is_nullable = true) : index(index), name(std::move(field_name)), type(field_type), + type_desc(field_type_desc), offset(field_offset), size(field_size), alignment(field_alignment), @@ -235,15 +239,16 @@ class SchemaLayout { */ void add_field(const std::shared_ptr& field) { assert(field != nullptr); - size_t field_size = get_type_size(field->type()); - size_t field_alignment = get_type_alignment(field->type()); + const auto& td = field->type_descriptor(); + size_t field_size = td.storage_size(); + size_t field_alignment = td.storage_alignment(); alignment_ = std::max(alignment_, field_alignment); // Calculate field offset (relative to start of data, after bit set) size_t aligned_offset = align_up(total_size_, field_alignment); field->index_ = fields_.size(); - fields_.emplace_back(field->index_, field->name(), field->type(), + fields_.emplace_back(field->index_, field->name(), field->type(), td, aligned_offset, field_size, field_alignment, field->nullable()); @@ -280,6 +285,10 @@ class SchemaLayout { *reinterpret_cast(ptr) = value.as_string_ref(); return true; } + case ValueType::ARRAY: + if (value.type() != ValueType::ARRAY) return false; + *reinterpret_cast(ptr) = value.as_array_ref(); + return true; default: return false; } @@ -294,6 +303,10 @@ class SchemaLayout { // Initialize StringRef to null/empty new (ptr) StringRef(); break; + case ValueType::ARRAY: + // Initialize ArrayRef to null/empty + new (ptr) ArrayRef(); + break; default: // Zero initialization is fine for numeric types and bools break; diff --git a/include/shard.hpp b/include/shard.hpp index 6309c6d..d678b5e 100644 --- a/include/shard.hpp +++ b/include/shard.hpp @@ -22,7 +22,7 @@ namespace tundradb { class TemporalContext; // ========================================================================= -// Shard — a contiguous range of nodes for one schema +// Shard - a contiguous range of nodes for one schema // ========================================================================= class Shard { @@ -84,7 +84,7 @@ class Shard { }; // ========================================================================= -// ShardManager — manages per-schema shard collections +// ShardManager - manages per-schema shard collections // ========================================================================= class ShardManager { diff --git a/include/string_arena.hpp b/include/string_arena.hpp index 7d78af1..75323ab 100644 --- a/include/string_arena.hpp +++ b/include/string_arena.hpp @@ -1,6 +1,8 @@ #ifndef STRING_ARENA_HPP #define STRING_ARENA_HPP +#include +#include #include #include @@ -63,18 +65,20 @@ class StringPool { * * @param str String to store * @param pool_id Pool identifier (for StringRef) - * @return StringRef with ref_count = 1, or null ref if allocation fails + * @return Ok(StringRef) with ref_count = 1, or Error with reason */ - StringRef store_string(const std::string& str, uint32_t pool_id) { + arrow::Result store_string(const std::string& str, + uint32_t pool_id) { if (str.length() > max_size_) { - return StringRef{}; // String too large for this pool + return arrow::Status::Invalid("StringPool::store_string: string length ", + str.length(), " exceeds pool max_size ", + max_size_); } // Check deduplication cache (thread-safe via tbb::concurrent_hash_map) if (enable_deduplication_) { typename decltype(dedup_cache_)::const_accessor acc; if (dedup_cache_.find(acc, str)) { - // Found existing string - return copy (increments ref count) return acc->second; } } @@ -86,29 +90,27 @@ class StringPool { const size_t alloc_size = StringRef::HEADER_SIZE + str.length() + 1; void* raw_storage = arena_->allocate(alloc_size); if (!raw_storage) { - return StringRef{}; // Allocation failed + return arrow::Status::OutOfMemory( + "StringPool::store_string: arena allocation failed (requested ", + alloc_size, " bytes)"); } - // Initialize header at the beginning auto* header = static_cast(raw_storage); - header->ref_count.store( - 0, std::memory_order_relaxed); // Will be set to 1 by StringRef ctor + header->ref_count.store(0, std::memory_order_relaxed); header->length = static_cast(str.length()); header->flags = 0; header->padding = 0; - // Copy string data after header char* data = reinterpret_cast(header) + StringRef::HEADER_SIZE; std::memcpy(data, str.c_str(), str.length()); - data[str.length()] = '\0'; // Null-terminate for safety + data[str.length()] = '\0'; - // Create StringRef (this increments ref_count to 1) StringRef ref(data, static_cast(str.length()), pool_id); if (enable_deduplication_) { typename decltype(dedup_cache_)::accessor acc; dedup_cache_.insert(acc, str); - acc->second = ref; // This increments ref to 2 (cache holds one ref) + acc->second = ref; } return ref; @@ -117,7 +119,8 @@ class StringPool { /** * Store a string view in this pool. */ - StringRef store_string(std::string_view str, uint32_t pool_id) { + arrow::Result store_string(std::string_view str, + uint32_t pool_id) { return store_string(std::string(str), pool_id); } @@ -143,9 +146,11 @@ class StringPool { } /** - * Release a string reference (called by StringRef::release()). - * Decrements ref count and deallocates if this was the last reference - * AND the string is marked for deletion. + * Deallocate a string's memory back to the FreeListArena. + * Called by StringRef::release() AFTER it has already decremented + * ref_count to 0 and confirmed is_marked_for_deletion(). + * + * This method MUST NOT touch ref_count (the caller already did). * * Thread-safe: Multiple threads can call this concurrently. * @@ -154,29 +159,11 @@ class StringPool { void release_string(const char* data) { if (!data) return; - // Get header auto* header = reinterpret_cast( const_cast(data - StringRef::HEADER_SIZE)); - // Decrement ref count atomically - int32_t old_count = - header->ref_count.fetch_sub(1, std::memory_order_acq_rel); - - if (old_count == 1) { // We were the last reference - // Only deallocate if marked for deletion - if (header->is_marked_for_deletion()) { - // CORRECT: Don't remove from dedup_cache_ here! - // It was already removed in mark_for_deletion(), or - // a new string with the same content may now be in cache. - - // CRITICAL: Lock arena deallocation (FreeListArena is NOT thread-safe) - std::lock_guard lock(arena_mutex_); - - // Deallocate entire block (header + data + null terminator) - size_t alloc_size = StringRef::HEADER_SIZE + header->length + 1; - arena_->deallocate(header); - } - } + std::lock_guard lock(arena_mutex_); + arena_->deallocate(header); } /** @@ -291,6 +278,7 @@ class StringArena { pools_.emplace_back(std::make_unique(32)); pools_.emplace_back(std::make_unique(64)); pools_.emplace_back(std::make_unique(SIZE_MAX)); + register_pools(); } /** @@ -298,11 +286,14 @@ class StringArena { * * @param str String to store * @param pool_id Pool identifier (0-3), default is pool 3 (unlimited size) - * @return StringRef with ref_count = 1 + * @return Ok(StringRef) with ref_count = 1, or Error with reason */ - StringRef store_string(const std::string& str, uint32_t pool_id = 3) { + arrow::Result store_string(const std::string& str, + uint32_t pool_id = 3) { if (pool_id >= pools_.size()) { - return StringRef{}; // Invalid pool ID + return arrow::Status::Invalid( + "StringArena::store_string: invalid pool_id ", pool_id, + " (max: ", pools_.size() - 1, ")"); } return pools_[pool_id]->store_string(str, pool_id); } @@ -311,7 +302,7 @@ class StringArena { * Store a string, automatically choosing the best pool. * Picks the smallest pool that can fit the string. */ - StringRef store_string_auto(const std::string& str) { + arrow::Result store_string_auto(const std::string& str) { size_t len = str.length(); if (len <= 16) return pools_[0]->store_string(str, 0); if (len <= 32) return pools_[1]->store_string(str, 1); @@ -375,6 +366,12 @@ class StringArena { } } + /** + * Register pools with the global registry. + * Defined below StringArenaRegistry to avoid forward-declaration issues. + */ + void register_pools(); + private: std::vector> pools_; }; @@ -427,15 +424,23 @@ class StringArenaRegistry { } }; +// ============================================================================ +// StringArena::register_pools() implementation (after StringArenaRegistry) +// ============================================================================ + +inline void StringArena::register_pools() { + for (uint32_t i = 0; i < pools_.size(); ++i) { + StringArenaRegistry::register_pool(i, pools_[i].get()); + } +} + // ============================================================================ // StringRef::release() implementation // ============================================================================ inline void StringRef::release() { if (data_) { - auto* header = get_header(); - if (header) { - // Decrement ref count atomically + if (auto* header = get_header()) { int32_t old_count = header->ref_count.fetch_sub(1, std::memory_order_acq_rel); diff --git a/include/string_ref.hpp b/include/string_ref.hpp index a9a0ab5..13e1c33 100644 --- a/include/string_ref.hpp +++ b/include/string_ref.hpp @@ -33,11 +33,7 @@ class StringPool; * // When last StringRef is destroyed, string is deallocated */ class StringRef { - private: - const char* data_; - uint32_t length_; - uint32_t pool_id_; - + public: /** * String header stored in arena memory BEFORE the string data. * Layout: [ref_count:4][length:4][flags:4][padding:4] = 16 bytes total @@ -49,22 +45,19 @@ class StringRef { uint32_t flags; // 4 bytes - Bit 0: marked_for_deletion uint32_t padding; // 4 bytes - Ensure 16-byte alignment - /** - * Check if string is marked for deletion. - * When marked, the string will be deallocated when ref_count reaches 0. - */ - bool is_marked_for_deletion() const { return (flags & 0x1) != 0; } - - /** - * Mark string for deletion. - * Called when a node field is updated - actual deallocation happens - * when the last StringRef is destroyed. - */ + [[nodiscard]] bool is_marked_for_deletion() const { + return (flags & 0x1) != 0; + } void mark_for_deletion() { flags |= 0x1; } }; static constexpr size_t HEADER_SIZE = sizeof(StringHeader); // 16 bytes + private: + const char* data_; + uint32_t length_; + uint32_t pool_id_; + __attribute__((always_inline)) StringHeader* get_header() const { if (!data_) return nullptr; return reinterpret_cast( @@ -127,28 +120,15 @@ class StringRef { /** * Copy assignment - properly handles reference counting. - * - * Example: ref4 = ref1 - * - Decrements ref4's OLD value's ref_count - * - Increments ref1's value's ref_count - * - ref4 now points to the same string as ref1 */ StringRef& operator=(const StringRef& other) { if (this != &other) { - // Step 1: Release THIS object's OLD value (before overwriting) - // Example: if ref4 pointed to "Goodbye", decrement "Goodbye" ref_count - // Note: release() also sets this->data_ = nullptr after decrementing release(); - - // Step 2: Copy OTHER object's values into THIS object - // Example: ref4 now gets ref1's pointer to "Hello" + // ======================================================================= data_ = other.data_; length_ = other.length_; pool_id_ = other.pool_id_; - - // Step 3: Increment the NEW value's ref_count - // Important: get_header() now uses the NEW data_ pointer (from step 2) - // Example: Increment "Hello" ref_count (not "Goodbye"!) + // ======================================================================= if (data_) { auto* header = get_header(); // Uses NEW data_ pointer if (header) { diff --git a/include/type_descriptor.hpp b/include/type_descriptor.hpp new file mode 100644 index 0000000..87d52d7 --- /dev/null +++ b/include/type_descriptor.hpp @@ -0,0 +1,144 @@ +#ifndef TYPE_DESCRIPTOR_HPP +#define TYPE_DESCRIPTOR_HPP + +#include +#include + +#include "value_type.hpp" + +namespace tundradb { + +/** + * Describes a complete type, including parameterized types like arrays. + * + * For primitive types, only base_type is meaningful. + * For ARRAY, element_type and fixed_size carry the array parameters. + * For STRING, max_string_size optionally caps string length. + * + * Examples: + * TypeDescriptor::int32() -> INT32 + * TypeDescriptor::string() -> STRING (variable) + * TypeDescriptor::string(64) -> STRING with max 64 bytes + * TypeDescriptor::array(ValueType::INT32) -> ARRAY (dynamic) + * TypeDescriptor::array(ValueType::INT32, 10) -> ARRAY (fixed) + * TypeDescriptor::array(ValueType::STRING) -> ARRAY + */ +struct TypeDescriptor { + ValueType base_type = ValueType::NA; + ValueType element_type = ValueType::NA; // for ARRAY: element type + uint32_t fixed_size = 0; // for ARRAY: 0=dynamic, N=fixed cap + uint32_t max_string_size = 0; // for STRING: 0=unlimited + + // ======================================================================== + // Factory methods + // ======================================================================== + + static TypeDescriptor na() { return {ValueType::NA}; } + static TypeDescriptor int32() { return {ValueType::INT32}; } + static TypeDescriptor int64() { return {ValueType::INT64}; } + static TypeDescriptor float32() { return {ValueType::FLOAT}; } + static TypeDescriptor float64() { return {ValueType::DOUBLE}; } + static TypeDescriptor boolean() { return {ValueType::BOOL}; } + + static TypeDescriptor string(uint32_t max_size = 0) { + return {ValueType::STRING, ValueType::NA, 0, max_size}; + } + + static TypeDescriptor array(ValueType elem, uint32_t fixed = 0) { + return {ValueType::ARRAY, elem, fixed, 0}; + } + + /** + * Create a TypeDescriptor from a legacy ValueType. + * Handles FIXED_STRING* by converting to STRING with max_string_size. + */ + static TypeDescriptor from_value_type(ValueType vt) { + switch (vt) { + case ValueType::FIXED_STRING16: + return string(16); + case ValueType::FIXED_STRING32: + return string(32); + case ValueType::FIXED_STRING64: + return string(64); + default: + return {vt}; + } + } + + // ======================================================================== + // Quick checks (for hot paths - no virtual dispatch) + // ======================================================================== + + [[nodiscard]] bool is_primitive() const { + return base_type != ValueType::NA && base_type != ValueType::ARRAY && + !is_string(); + } + + [[nodiscard]] bool is_string() const { + return base_type == ValueType::STRING || + base_type == ValueType::FIXED_STRING16 || + base_type == ValueType::FIXED_STRING32 || + base_type == ValueType::FIXED_STRING64; + } + + [[nodiscard]] bool is_array() const { return base_type == ValueType::ARRAY; } + + [[nodiscard]] bool is_null() const { return base_type == ValueType::NA; } + + [[nodiscard]] bool is_fixed_size_array() const { + return is_array() && fixed_size > 0; + } + + [[nodiscard]] bool is_dynamic_array() const { + return is_array() && fixed_size == 0; + } + + // ======================================================================== + // Size and alignment (delegates to ValueType for primitives/strings) + // ======================================================================== + + /** Storage size in the node slot (bytes). */ + [[nodiscard]] size_t storage_size() const { return get_type_size(base_type); } + + /** Required memory alignment. */ + [[nodiscard]] size_t storage_alignment() const { + return get_type_alignment(base_type); + } + + // ======================================================================== + // String representation + // ======================================================================== + + [[nodiscard]] std::string to_string() const { + if (is_array()) { + std::string result = "ARRAY<" + tundradb::to_string(element_type); + if (fixed_size > 0) { + result += ", " + std::to_string(fixed_size); + } + result += ">"; + return result; + } + if (is_string() && max_string_size > 0) { + return "STRING(" + std::to_string(max_string_size) + ")"; + } + return tundradb::to_string(base_type); + } + + // ======================================================================== + // Comparison + // ======================================================================== + + bool operator==(const TypeDescriptor& other) const { + return base_type == other.base_type && element_type == other.element_type && + fixed_size == other.fixed_size && + max_string_size == other.max_string_size; + } + + bool operator!=(const TypeDescriptor& other) const { + return !(*this == other); + } +}; + +} // namespace tundradb + +#endif // TYPE_DESCRIPTOR_HPP diff --git a/include/types.hpp b/include/types.hpp index db56b7d..3b98af0 100644 --- a/include/types.hpp +++ b/include/types.hpp @@ -7,11 +7,13 @@ #include #include #include +#include // Arrow includes for type conversion functions #include #include +#include "array_ref.hpp" #include "string_arena.hpp" namespace tundradb { @@ -19,8 +21,6 @@ namespace tundradb { class Value { public: Value() : type_(ValueType::NA), data_(std::monostate{}) {} - // explicit Value(int32_t i) : type_(ValueType::Int32), data_(i) {} - // explicit Value(int64_t v) : type_(ValueType::Int64), data_(v) {} explicit Value(double v) : type_(ValueType::DOUBLE), data_(v) {} explicit Value(std::string v) : type_(ValueType::STRING), data_(std::move(v)) {} @@ -31,11 +31,18 @@ class Value { // Constructor for creating StringRef value with specific string type Value(StringRef v, const ValueType string_type) : type_(string_type), data_(v) { - // Ensure it's actually a string type assert(is_string_type(string_type)); } + + // Arena-backed array (already allocated in ArrayArena) + explicit Value(ArrayRef v) : type_(ValueType::ARRAY), data_(std::move(v)) {} + + // Raw array data - will be converted to ArrayRef by NodeArena + // (same pattern as std::string -> StringRef for strings) + explicit Value(std::vector v) + : type_(ValueType::ARRAY), data_(std::move(v)) {} + explicit Value(bool v) : type_(ValueType::BOOL), data_(v) {} - // Value(int i) : type_(ValueType::Int32), data_(i) {} Value(int32_t i) : type_(ValueType::INT32), data_(i) {} // Non-explicit Value(int64_t v) : type_(ValueType::INT64), data_(v) {} // Non-explicit Value(const char* s) : type_(ValueType::STRING), data_(std::string(s)) {} @@ -55,6 +62,7 @@ class Value { [[nodiscard]] const StringRef& as_string_ref() const { return get(); } + [[nodiscard]] const ArrayRef& as_array_ref() const { return get(); } [[nodiscard]] bool as_bool() const { return get(); } [[nodiscard]] bool is_null() const { return type_ == ValueType::NA; } @@ -68,6 +76,21 @@ class Value { return is_string_type(type_) && std::holds_alternative(data_); } + // Check if the Value contains an ArrayRef (arena-backed) + [[nodiscard]] bool holds_array_ref() const { + return type_ == ValueType::ARRAY && std::holds_alternative(data_); + } + + // Check if the Value contains a raw array (std::vector) + [[nodiscard]] bool holds_raw_array() const { + return type_ == ValueType::ARRAY && + std::holds_alternative>(data_); + } + + [[nodiscard]] const std::vector& as_raw_array() const { + return get>(); + } + // Convert the Value to its raw string representation (without quotes for // strings) [[nodiscard]] std::string to_string() const { @@ -87,6 +110,21 @@ class Value { return as_string(); case ValueType::BOOL: return as_bool() ? "true" : "false"; + case ValueType::ARRAY: { + if (holds_array_ref()) { + const auto& arr = as_array_ref(); + std::string result = "["; + for (uint32_t i = 0; i < arr.length(); ++i) { + if (i > 0) result += ", "; + auto elem = Value::read_value_from_memory(arr.element_ptr(i), + arr.elem_type()); + result += elem.to_string(); + } + result += "]"; + return result; + } + return "[]"; + } default: return ""; } @@ -112,6 +150,8 @@ class Value { // All string types stored as StringRef, but preserve the field's // declared type return Value{*reinterpret_cast(ptr), type}; + case ValueType::ARRAY: + return Value{*reinterpret_cast(ptr)}; case ValueType::NA: default: return Value{}; @@ -131,7 +171,7 @@ class Value { private: ValueType type_; std::variant + StringRef, ArrayRef, std::vector, bool> data_; }; @@ -171,6 +211,10 @@ struct ValueRef { return *reinterpret_cast(data); } + [[nodiscard]] const ArrayRef& as_array_ref() const { + return *reinterpret_cast(data); + } + arrow::Result> as_scalar() const { switch (type) { case ValueType::INT32: @@ -185,6 +229,9 @@ struct ValueRef { return arrow::MakeScalar(as_bool()); case ValueType::NA: return arrow::MakeNullScalar(arrow::null()); + case ValueType::ARRAY: + return arrow::Status::NotImplemented( + "Array scalar conversion not yet implemented"); default: return arrow::Status::NotImplemented( "Unsupported Value type for Arrow scalar conversion: ", @@ -237,11 +284,15 @@ struct ValueRef { case ValueType::STRING: { const StringRef& str1 = *reinterpret_cast(data); const StringRef& str2 = *reinterpret_cast(other.data); - - // Use StringRef's operator== which handles all edge cases return str1 == str2; } + case ValueType::ARRAY: { + const ArrayRef& arr1 = *reinterpret_cast(data); + const ArrayRef& arr2 = *reinterpret_cast(other.data); + return arr1 == arr2; + } + default: return false; // Unknown type } @@ -289,6 +340,19 @@ struct ValueRef { // Use StringRef's to_string() method return "\"" + str_ref.to_string() + "\""; } + case ValueType::ARRAY: { + const ArrayRef& arr = as_array_ref(); + if (arr.is_null()) return "NULL"; + std::string result = "["; + for (uint32_t i = 0; i < arr.length(); ++i) { + if (i > 0) result += ", "; + auto elem = Value::read_value_from_memory(arr.element_ptr(i), + arr.elem_type()); + result += elem.to_string(); + } + result += "]"; + return result; + } default: return "UNKNOWN_TYPE"; } @@ -305,59 +369,6 @@ inline std::ostream& operator<<(std::ostream& os, const Value& value) { return os << value.to_string(); } -static constexpr ValueType arrow_type_to_value_type( - const std::shared_ptr& arrow_type) { - switch (arrow_type->id()) { - case arrow::Type::INT32: - case arrow::Type::INT16: - case arrow::Type::INT8: - case arrow::Type::UINT16: - case arrow::Type::UINT8: - return ValueType::INT32; - - case arrow::Type::INT64: - case arrow::Type::UINT64: - case arrow::Type::UINT32: // Could overflow int32, safer as int64 - return ValueType::INT64; - - case arrow::Type::FLOAT: - return ValueType::FLOAT; - case arrow::Type::DOUBLE: - return ValueType::DOUBLE; - case arrow::Type::STRING: - case arrow::Type::LARGE_STRING: - return ValueType::STRING; - case arrow::Type::BOOL: - return ValueType::BOOL; - case arrow::Type::NA: - return ValueType::NA; - default: - return ValueType::NA; - } -} - -static std::shared_ptr value_type_to_arrow_type( - const ValueType type) { - switch (type) { - case ValueType::NA: - return arrow::null(); - case ValueType::INT32: - return arrow::int32(); - case ValueType::INT64: - return arrow::int64(); - case ValueType::FLOAT: - return arrow::float32(); - case ValueType::DOUBLE: - return arrow::float64(); - case ValueType::STRING: - return arrow::utf8(); - case ValueType::BOOL: - return arrow::boolean(); - default: - return arrow::utf8(); // Default fallback - } -} - } // namespace tundradb #endif // TYPES_HPP diff --git a/include/update_type.hpp b/include/update_type.hpp new file mode 100644 index 0000000..03c3285 --- /dev/null +++ b/include/update_type.hpp @@ -0,0 +1,20 @@ +#ifndef UPDATE_TYPE_HPP +#define UPDATE_TYPE_HPP + +namespace tundradb { + +/** + * Specifies how a field value is applied during an update. + * + * - SET: Replace the entire field value. + * - APPEND: For array fields, append element(s) to the existing array. + * Returns an error if applied to a non-array field. + */ +enum class UpdateType { + SET, + APPEND, +}; + +} // namespace tundradb + +#endif // UPDATE_TYPE_HPP diff --git a/include/utils.hpp b/include/utils.hpp index 6982bfb..e44b528 100644 --- a/include/utils.hpp +++ b/include/utils.hpp @@ -15,6 +15,7 @@ #include #include +#include "arrow_utils.hpp" #include "logger.hpp" #include "node.hpp" #include "query.hpp" @@ -252,6 +253,14 @@ static arrow::Result> create_table( case arrow::Type::BOOL: builders.push_back(std::make_unique()); break; + case arrow::Type::LIST: + case arrow::Type::FIXED_SIZE_LIST: { + std::unique_ptr list_builder; + ARROW_RETURN_NOT_OK(arrow::MakeBuilder(arrow::default_memory_pool(), + field->type(), &list_builder)); + builders.push_back(std::move(list_builder)); + break; + } default: return arrow::Status::NotImplemented("Unsupported type: ", field->type()->ToString()); @@ -321,6 +330,19 @@ static arrow::Result> create_table( ->Append(str_ref.to_string())); break; } + case ValueType::ARRAY: { + const auto& arr_ref = + *reinterpret_cast(value_ptr); + auto* list_builder = + dynamic_cast(builders[i].get()); + if (!list_builder) { + return arrow::Status::Invalid( + "Expected ListBuilder for array field: ", field->name()); + } + ARROW_RETURN_NOT_OK( + append_array_to_list_builder(arr_ref, list_builder)); + break; + } default: return arrow::Status::NotImplemented("Unsupported type: ", to_string(field->type())); diff --git a/include/value_type.hpp b/include/value_type.hpp index 7698d48..1a708a8 100644 --- a/include/value_type.hpp +++ b/include/value_type.hpp @@ -22,7 +22,8 @@ enum class ValueType { FIXED_STRING16, // Fixed-size string up to 16 bytes (uses StringArena pool 0) FIXED_STRING32, // Fixed-size string up to 32 bytes (uses StringArena pool 1) FIXED_STRING64, // Fixed-size string up to 64 bytes (uses StringArena pool 2) - BOOL // Boolean value + BOOL, // Boolean value + ARRAY // Variable or fixed-size array (uses ArrayArena) }; /** @@ -34,6 +35,14 @@ inline bool is_string_type(const ValueType type) { type == ValueType::FIXED_STRING32 || type == ValueType::FIXED_STRING64; } +/** + * Check if a ValueType represents an array type. + * Arrays are stored using ArrayRef (16 bytes) in the node slot. + */ +inline bool is_array_type(const ValueType type) { + return type == ValueType::ARRAY; +} + /** * Get the maximum size for fixed-size string types. * Returns SIZE_MAX for variable-length strings. @@ -78,6 +87,8 @@ inline std::string to_string(const ValueType type) { return "FixedString64"; case ValueType::BOOL: return "Bool"; + case ValueType::ARRAY: + return "Array"; default: return "Unknown"; } @@ -93,6 +104,10 @@ inline size_t get_type_size(const ValueType type) { if (is_string_type(type)) { return 16; // sizeof(StringRef) = 16 bytes } + // Array types are stored as ArrayRef (16 bytes) + if (is_array_type(type)) { + return 16; // sizeof(ArrayRef) = 16 bytes + } switch (type) { case ValueType::INT64: @@ -118,6 +133,10 @@ inline size_t get_type_alignment(const ValueType type) { if (is_string_type(type)) { return 8; // alignof(StringRef) = 8 bytes (pointer alignment) } + // Array types are stored as ArrayRef (8-byte pointer alignment) + if (is_array_type(type)) { + return 8; // alignof(ArrayRef) = 8 bytes (pointer alignment) + } switch (type) { case ValueType::INT64: diff --git a/src/arrow_utils.cpp b/src/arrow_utils.cpp index dbfafa6..c9a4f95 100644 --- a/src/arrow_utils.cpp +++ b/src/arrow_utils.cpp @@ -210,6 +210,47 @@ arrow::Result> value_ptr_to_arrow_scalar( } } +arrow::Status append_array_to_list_builder(const ArrayRef& arr_ref, + arrow::ListBuilder* list_builder) { + if (arr_ref.is_null()) { + return list_builder->AppendNull(); + } + ARROW_RETURN_NOT_OK(list_builder->Append()); + auto* vb = list_builder->value_builder(); + for (uint32_t j = 0; j < arr_ref.length(); ++j) { + const char* ep = arr_ref.element_ptr(j); + switch (arr_ref.elem_type()) { + case ValueType::INT32: + ARROW_RETURN_NOT_OK(static_cast(vb)->Append( + *reinterpret_cast(ep))); + break; + case ValueType::INT64: + ARROW_RETURN_NOT_OK(static_cast(vb)->Append( + *reinterpret_cast(ep))); + break; + case ValueType::DOUBLE: + ARROW_RETURN_NOT_OK(static_cast(vb)->Append( + *reinterpret_cast(ep))); + break; + case ValueType::BOOL: + ARROW_RETURN_NOT_OK(static_cast(vb)->Append( + *reinterpret_cast(ep))); + break; + case ValueType::STRING: { + const auto& sr = *reinterpret_cast(ep); + ARROW_RETURN_NOT_OK(static_cast(vb)->Append( + sr.data(), sr.length())); + break; + } + default: + return arrow::Status::NotImplemented( + "Unsupported array element type: ", + tundradb::to_string(arr_ref.elem_type())); + } + } + return arrow::Status::OK(); +} + arrow::compute::Expression where_condition_to_expression( const WhereExpr& condition, bool strip_var) { return condition.to_arrow_expression(strip_var); @@ -255,19 +296,32 @@ arrow::Result> create_table_from_nodes( if (res.ok()) { auto value = res.ValueOrDie(); if (value) { - auto scalar_result = value_ptr_to_arrow_scalar(value, field->type()); - if (!scalar_result.ok()) { - log_error("Failed to convert value to scalar for field '{}': {}", - field_name, scalar_result.status().ToString()); - return scalar_result.status(); - } - - const auto& scalar = scalar_result.ValueOrDie(); - auto status = builders[i]->AppendScalar(*scalar); - if (!status.ok()) { - log_error("Failed to append scalar for field '{}': {}", field_name, - status.ToString()); - return status; + if (field->type() == ValueType::ARRAY) { + const auto& arr_ref = *reinterpret_cast(value); + auto* list_builder = + dynamic_cast(builders[i].get()); + if (!list_builder) { + return arrow::Status::Invalid( + "Expected ListBuilder for array field: ", field_name); + } + ARROW_RETURN_NOT_OK( + append_array_to_list_builder(arr_ref, list_builder)); + } else { + auto scalar_result = + value_ptr_to_arrow_scalar(value, field->type()); + if (!scalar_result.ok()) { + log_error("Failed to convert value to scalar for field '{}': {}", + field_name, scalar_result.status().ToString()); + return scalar_result.status(); + } + + const auto& scalar = scalar_result.ValueOrDie(); + auto status = builders[i]->AppendScalar(*scalar); + if (!status.ok()) { + log_error("Failed to append scalar for field '{}': {}", + field_name, status.ToString()); + return status; + } } } else { IF_DEBUG_ENABLED { @@ -343,6 +397,19 @@ arrow::Result> create_empty_table( ARROW_RETURN_NOT_OK(builder.Finish(&empty_array)); break; } + case arrow::Type::INT32: { + arrow::Int32Builder builder; + ARROW_RETURN_NOT_OK(builder.Finish(&empty_array)); + break; + } + case arrow::Type::LIST: + case arrow::Type::FIXED_SIZE_LIST: { + std::unique_ptr builder; + ARROW_RETURN_NOT_OK(arrow::MakeBuilder(arrow::default_memory_pool(), + field->type(), &builder)); + ARROW_RETURN_NOT_OK(builder->Finish(&empty_array)); + break; + } default: empty_array = std::make_shared(0); } diff --git a/src/core.cpp b/src/core.cpp index f57e240..f31ec14 100644 --- a/src/core.cpp +++ b/src/core.cpp @@ -441,6 +441,18 @@ arrow::Result> create_table_from_rows( static_cast(builders[i].get()) ->Append(value_ref.as_bool()); break; + case ValueType::ARRAY: { + const auto& arr_ref = value_ref.as_array_ref(); + auto* list_builder = + dynamic_cast(builders[i].get()); + if (!list_builder) { + append_status = arrow::Status::Invalid( + "Expected ListBuilder for field: ", field_name); + break; + } + append_status = append_array_to_list_builder(arr_ref, list_builder); + break; + } default: append_status = builders[i]->AppendNull(); break; diff --git a/src/schema.cpp b/src/schema.cpp index cae6bf8..57343e6 100644 --- a/src/schema.cpp +++ b/src/schema.cpp @@ -6,47 +6,65 @@ namespace tundradb { -arrow::Result Field::from_arrow( - const std::shared_ptr &field) { - ValueType type; - - switch (field->type()->id()) { +/// Helper: convert an Arrow element DataType to our ValueType. +static arrow::Result arrow_elem_to_value_type( + const std::shared_ptr &dt) { + switch (dt->id()) { case arrow::Type::BOOL: - type = ValueType::BOOL; - break; + return ValueType::BOOL; case arrow::Type::INT8: case arrow::Type::INT16: case arrow::Type::INT32: - type = ValueType::INT32; - break; + return ValueType::INT32; case arrow::Type::INT64: case arrow::Type::UINT8: case arrow::Type::UINT16: case arrow::Type::UINT32: case arrow::Type::UINT64: - type = ValueType::INT64; - break; + return ValueType::INT64; case arrow::Type::FLOAT: - type = ValueType::FLOAT; - break; + return ValueType::FLOAT; case arrow::Type::DOUBLE: - type = ValueType::DOUBLE; - break; + return ValueType::DOUBLE; case arrow::Type::STRING: case arrow::Type::LARGE_STRING: - type = ValueType::STRING; - break; + return ValueType::STRING; default: - return arrow::Status::NotImplemented("Unsupported Arrow type: ", - field->type()->ToString()); + return arrow::Status::NotImplemented("Unsupported Arrow element type: ", + dt->ToString()); + } +} + +arrow::Result Field::from_arrow( + const std::shared_ptr &field) { + const auto &dt = field->type(); + + // Handle list / fixed_size_list -> ARRAY + if (dt->id() == arrow::Type::LIST) { + auto list_type = std::static_pointer_cast(dt); + ARROW_ASSIGN_OR_RAISE(auto elem_vt, + arrow_elem_to_value_type(list_type->value_type())); + return Field(field->name(), TypeDescriptor::array(elem_vt), + field->nullable()); + } + if (dt->id() == arrow::Type::FIXED_SIZE_LIST) { + auto fsl_type = std::static_pointer_cast(dt); + ARROW_ASSIGN_OR_RAISE(auto elem_vt, + arrow_elem_to_value_type(fsl_type->value_type())); + return Field(field->name(), + TypeDescriptor::array(elem_vt, fsl_type->list_size()), + field->nullable()); } - return Field(field->name(), type, field->nullable()); + // Scalar types + ARROW_ASSIGN_OR_RAISE(auto vt, arrow_elem_to_value_type(dt)); + return Field(field->name(), vt, field->nullable()); } [[nodiscard]] arrow::Result> Field::to_arrow() const { - switch (type_) { + const auto base = type_desc_.base_type; + switch (base) { case ValueType::BOOL: return arrow::field(name_, arrow::boolean()); case ValueType::INT32: @@ -58,10 +76,50 @@ arrow::Result Field::from_arrow( case ValueType::DOUBLE: return arrow::field(name_, arrow::float64()); case ValueType::STRING: + case ValueType::FIXED_STRING16: + case ValueType::FIXED_STRING32: + case ValueType::FIXED_STRING64: return arrow::field(name_, arrow::utf8()); + case ValueType::ARRAY: { + // Convert element type to Arrow, then wrap in list + auto elem_arrow = TypeDescriptor{{type_desc_.element_type}}; + // Map element ValueType to arrow type + std::shared_ptr elem_dt; + switch (type_desc_.element_type) { + case ValueType::INT32: + elem_dt = arrow::int32(); + break; + case ValueType::INT64: + elem_dt = arrow::int64(); + break; + case ValueType::FLOAT: + elem_dt = arrow::float32(); + break; + case ValueType::DOUBLE: + elem_dt = arrow::float64(); + break; + case ValueType::BOOL: + elem_dt = arrow::boolean(); + break; + case ValueType::STRING: + elem_dt = arrow::utf8(); + break; + default: + return arrow::Status::NotImplemented( + "Unsupported array element type: ", + static_cast(type_desc_.element_type)); + } + if (type_desc_.fixed_size > 0) { + return arrow::field(name_, + arrow::fixed_size_list( + arrow::field("item", elem_dt), + static_cast(type_desc_.fixed_size))); + } + return arrow::field(name_, arrow::list(arrow::field("item", elem_dt))); + } default: return arrow::Status::NotImplemented("Unsupported ValueType: ", - static_cast(type_)); + static_cast(base)); } } diff --git a/src/shard.cpp b/src/shard.cpp index 543d790..5bc2c8c 100644 --- a/src/shard.cpp +++ b/src/shard.cpp @@ -19,7 +19,7 @@ Shard::Shard(int64_t id, int64_t index, size_t capacity, int64_t min_id, max_id(max_id), capacity(capacity), chunk_size(chunk_size), - schema_name(move(schema_name)) {} + schema_name(std::move(schema_name)) {} Shard::Shard(int64_t id, int64_t index, const DatabaseConfig &config, int64_t min_id, int64_t max_id, std::string schema_name, diff --git a/src/tundra_shell.cpp b/src/tundra_shell.cpp index 9ba74bc..50207c0 100644 --- a/src/tundra_shell.cpp +++ b/src/tundra_shell.cpp @@ -908,7 +908,7 @@ class TundraQLVisitorImpl : public tundraql::TundraQLBaseVisitor { return deleted_count; } - // Handle UPDATE statements — delegates to UpdateQuery + Database::update() + // Handle UPDATE statements - delegates to UpdateQuery + Database::update() antlrcpp::Any visitUpdateStatement( tundraql::TundraQLParser::UpdateStatementContext* ctx) override { spdlog::info("Executing UPDATE command"); @@ -922,12 +922,12 @@ class TundraQLVisitorImpl : public tundraql::TundraQLBaseVisitor { std::optional node_id; if (updateTarget->nodeLocator()) { - // UPDATE User(0) SET ... — Mode 1 (by ID) + // UPDATE User(0) SET ... - Mode 1 (by ID) auto loc = updateTarget->nodeLocator(); schema_name = loc->IDENTIFIER()->getText(); node_id = std::stoll(loc->INTEGER_LITERAL()->getText()); } else if (updateTarget->nodePattern()) { - // UPDATE (u:User) SET ... WHERE ... — Mode 2 (by MATCH) + // UPDATE (u:User) SET ... WHERE ... - Mode 2 (by MATCH) auto pat = updateTarget->nodePattern(); if (pat->IDENTIFIER().size() > 1) { alias = pat->IDENTIFIER(0)->getText(); @@ -1028,7 +1028,7 @@ class TundraQLVisitorImpl : public tundraql::TundraQLBaseVisitor { auto match_query = query_builder.build(); auto builder = tundradb::UpdateQuery::match(std::move(match_query)); - // Parse SET assignments — keep alias.field format + // Parse SET assignments - keep alias.field format for (auto assignment : setClause->setAssignment()) { std::string qualified_name; if (assignment->IDENTIFIER().size() == 2) { diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 5d11d49..c659315 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -154,6 +154,10 @@ add_executable(update_query_test add_executable(update_query_join_test update_query_join_test.cpp) +# Array query test +add_executable(array_query_test + array_query_test.cpp) + # Link against Arrow and GTest target_link_libraries(sharding_test PRIVATE @@ -403,6 +407,19 @@ target_link_libraries(update_query_join_test LLVMSupport LLVMCore ) +target_link_libraries(array_query_test + PRIVATE + core + Arrow::arrow_shared + Parquet::parquet_shared + GTest::GTest + GTest::Main + pthread + TBB::tbb + spdlog::spdlog + LLVMSupport LLVMCore +) + # Apply sanitizer flags to all test targets if enabled if(ENABLE_SANITIZERS) target_compile_options(sharding_test PRIVATE ${SANITIZER_COMPILE_FLAGS}) @@ -449,6 +466,9 @@ if(ENABLE_SANITIZERS) target_compile_options(update_query_join_test PRIVATE ${SANITIZER_COMPILE_FLAGS}) target_link_options(update_query_join_test PRIVATE ${SANITIZER_LINK_FLAGS}) + + target_compile_options(array_query_test PRIVATE ${SANITIZER_COMPILE_FLAGS}) + target_link_options(array_query_test PRIVATE ${SANITIZER_LINK_FLAGS}) endif() # Simple test registration @@ -472,6 +492,7 @@ add_test(NAME NodeViewTest COMMAND node_view_test) add_test(NAME TemporalQueryTest COMMAND temporal_query_test) add_test(NAME UpdateQueryTest COMMAND update_query_test) add_test(NAME UpdateQueryJoinTest COMMAND update_query_join_test) +add_test(NAME ArrayQueryTest COMMAND array_query_test) # Set TSan options for tests after they've been registered if(ENABLE_SANITIZERS AND SANITIZER_TYPE STREQUAL "thread" AND EXISTS ${TSAN_SUPPRESSIONS_FILE}) diff --git a/tests/array_query_test.cpp b/tests/array_query_test.cpp new file mode 100644 index 0000000..99b4436 --- /dev/null +++ b/tests/array_query_test.cpp @@ -0,0 +1,797 @@ +#include + +#include +#include +#include + +#include "../include/clock.hpp" +#include "../include/core.hpp" +#include "../include/query.hpp" +#include "../include/utils.hpp" + +#define ASSERT_OK(expr) ASSERT_TRUE((expr).ok()) + +using namespace std::string_literals; +using namespace tundradb; + +namespace tundradb { + +class ArrayQueryTest : public ::testing::Test { + protected: + void SetUp() override { + auto schema = arrow::schema({ + arrow::field("name", arrow::utf8()), + arrow::field("tags", arrow::list(arrow::utf8())), + arrow::field("scores", arrow::list(arrow::int32())), + }); + + auto db_path = "array_query_test_db_" + std::to_string(now_millis()); + auto config = make_config() + .with_db_path(db_path) + .with_shard_capacity(1000) + .with_chunk_size(1000) + .build(); + + db_ = std::make_shared(config); + db_->get_schema_registry()->create("Item", schema).ValueOrDie(); + + create_test_data(); + } + + void create_test_data() { + // Item 0: Alice with tags and scores + { + std::vector tags = {Value{"cpp"s}, Value{"rust"s}}; + std::vector scores = {Value{int32_t(90)}, Value{int32_t(85)}}; + std::unordered_map data = { + {"name", Value{"Alice"s}}, + {"tags", Value{tags}}, + {"scores", Value{scores}}, + }; + db_->create_node("Item", data).ValueOrDie(); + } + + // Item 1: Bob with tags, no scores (empty array) + { + std::vector tags = {Value{"java"s}, Value{"go"s}, + Value{"python"s}}; + std::vector scores = {}; + std::unordered_map data = { + {"name", Value{"Bob"s}}, + {"tags", Value{tags}}, + {"scores", Value{scores}}, + }; + db_->create_node("Item", data).ValueOrDie(); + } + + // Item 2: Charlie with no tags (empty), has scores + { + std::vector tags = {}; + std::vector scores = {Value{int32_t(100)}}; + std::unordered_map data = { + {"name", Value{"Charlie"s}}, + {"tags", Value{tags}}, + {"scores", Value{scores}}, + }; + db_->create_node("Item", data).ValueOrDie(); + } + } + + /// Query the "Item" table and return the full Arrow table. + std::shared_ptr query_items() { + auto query = Query::from("i:Item").build(); + auto result = db_->query(query).ValueOrDie(); + return result->table(); + } + + /// Extract list column values for a specific row index. + template + std::vector get_list_at_row(const std::shared_ptr& table, + const std::string& col_name, int64_t row) { + auto column = table->GetColumnByName(col_name); + EXPECT_NE(column, nullptr) << "Column not found: " << col_name; + if (!column) return {}; + + int64_t offset = 0; + for (int c = 0; c < column->num_chunks(); ++c) { + auto chunk = column->chunk(c); + if (row < offset + chunk->length()) { + auto list_arr = std::static_pointer_cast(chunk); + int64_t local = row - offset; + if (list_arr->IsNull(local)) return {}; + auto values = list_arr->value_slice(local); + std::vector result; + if constexpr (std::is_same_v) { + auto typed = std::static_pointer_cast(values); + for (int64_t i = 0; i < typed->length(); ++i) + result.push_back(typed->Value(i)); + } else if constexpr (std::is_same_v) { + auto typed = std::static_pointer_cast(values); + for (int64_t i = 0; i < typed->length(); ++i) + result.push_back(typed->GetString(i)); + } + return result; + } + offset += chunk->length(); + } + return {}; + } + + /// Find the row index for a given name in the query result. + int64_t find_row_by_name(const std::shared_ptr& table, + const std::string& name) { + auto names = get_column_values(table, "i.name").ValueOrDie(); + for (size_t i = 0; i < names.size(); ++i) { + if (names[i] == name) return static_cast(i); + } + return -1; + } + + std::shared_ptr db_; +}; + +// ========================================================================= +// Query tests — verify arrays come back in Arrow table +// ========================================================================= + +TEST_F(ArrayQueryTest, QueryReturnsTable) { + auto table = query_items(); + ASSERT_NE(table, nullptr); + EXPECT_EQ(table->num_rows(), 3); +} + +TEST_F(ArrayQueryTest, QueryTableHasListColumns) { + auto table = query_items(); + auto schema = table->schema(); + + auto tags_field = schema->GetFieldByName("i.tags"); + ASSERT_NE(tags_field, nullptr); + EXPECT_EQ(tags_field->type()->id(), arrow::Type::LIST); + + auto scores_field = schema->GetFieldByName("i.scores"); + ASSERT_NE(scores_field, nullptr); + EXPECT_EQ(scores_field->type()->id(), arrow::Type::LIST); +} + +TEST_F(ArrayQueryTest, QueryStringArrayValues) { + auto table = query_items(); + int64_t alice_row = find_row_by_name(table, "Alice"); + ASSERT_GE(alice_row, 0); + + auto tags = get_list_at_row(table, "i.tags", alice_row); + ASSERT_EQ(tags.size(), 2); + EXPECT_EQ(tags[0], "cpp"); + EXPECT_EQ(tags[1], "rust"); +} + +TEST_F(ArrayQueryTest, QueryInt32ArrayValues) { + auto table = query_items(); + int64_t alice_row = find_row_by_name(table, "Alice"); + ASSERT_GE(alice_row, 0); + + auto scores = get_list_at_row(table, "i.scores", alice_row); + ASSERT_EQ(scores.size(), 2); + EXPECT_EQ(scores[0], 90); + EXPECT_EQ(scores[1], 85); +} + +TEST_F(ArrayQueryTest, QueryMultiElementStringArray) { + auto table = query_items(); + int64_t bob_row = find_row_by_name(table, "Bob"); + ASSERT_GE(bob_row, 0); + + auto tags = get_list_at_row(table, "i.tags", bob_row); + ASSERT_EQ(tags.size(), 3); + EXPECT_EQ(tags[0], "java"); + EXPECT_EQ(tags[1], "go"); + EXPECT_EQ(tags[2], "python"); +} + +TEST_F(ArrayQueryTest, QueryEmptyArray) { + auto table = query_items(); + int64_t charlie_row = find_row_by_name(table, "Charlie"); + ASSERT_GE(charlie_row, 0); + + auto tags = get_list_at_row(table, "i.tags", charlie_row); + EXPECT_TRUE(tags.empty()); +} + +TEST_F(ArrayQueryTest, QueryEmptyInt32Array) { + auto table = query_items(); + int64_t bob_row = find_row_by_name(table, "Bob"); + ASSERT_GE(bob_row, 0); + + auto scores = get_list_at_row(table, "i.scores", bob_row); + EXPECT_TRUE(scores.empty()); +} + +TEST_F(ArrayQueryTest, QuerySingleElementArray) { + auto table = query_items(); + int64_t charlie_row = find_row_by_name(table, "Charlie"); + ASSERT_GE(charlie_row, 0); + + auto scores = get_list_at_row(table, "i.scores", charlie_row); + ASSERT_EQ(scores.size(), 1); + EXPECT_EQ(scores[0], 100); +} + +// ========================================================================= +// Update tests — update array fields and verify via query +// ========================================================================= + +TEST_F(ArrayQueryTest, UpdateArrayByIdReplaceFull) { + std::vector new_tags = {Value{"scala"s}, Value{"kotlin"s}, + Value{"haskell"s}}; + auto uq = UpdateQuery::on("Item", 0).set("tags", Value{new_tags}).build(); + + auto result = db_->update(uq); + ASSERT_OK(result); + EXPECT_EQ(result.ValueOrDie().updated_count, 1); + + auto table = query_items(); + int64_t alice_row = find_row_by_name(table, "Alice"); + ASSERT_GE(alice_row, 0); + + auto tags = get_list_at_row(table, "i.tags", alice_row); + ASSERT_EQ(tags.size(), 3); + EXPECT_EQ(tags[0], "scala"); + EXPECT_EQ(tags[1], "kotlin"); + EXPECT_EQ(tags[2], "haskell"); +} + +TEST_F(ArrayQueryTest, UpdateArrayByIdToEmpty) { + std::vector empty_tags = {}; + auto uq = UpdateQuery::on("Item", 0).set("tags", Value{empty_tags}).build(); + + auto result = db_->update(uq); + ASSERT_OK(result); + EXPECT_EQ(result.ValueOrDie().updated_count, 1); + + auto table = query_items(); + int64_t alice_row = find_row_by_name(table, "Alice"); + ASSERT_GE(alice_row, 0); + + auto tags = get_list_at_row(table, "i.tags", alice_row); + EXPECT_TRUE(tags.empty()); +} + +TEST_F(ArrayQueryTest, UpdateInt32ArrayById) { + std::vector new_scores = {Value{int32_t(10)}, Value{int32_t(20)}, + Value{int32_t(30)}}; + auto uq = UpdateQuery::on("Item", 0).set("scores", Value{new_scores}).build(); + + auto result = db_->update(uq); + ASSERT_OK(result); + EXPECT_EQ(result.ValueOrDie().updated_count, 1); + + auto table = query_items(); + int64_t alice_row = find_row_by_name(table, "Alice"); + ASSERT_GE(alice_row, 0); + + auto scores = get_list_at_row(table, "i.scores", alice_row); + ASSERT_EQ(scores.size(), 3); + EXPECT_EQ(scores[0], 10); + EXPECT_EQ(scores[1], 20); + EXPECT_EQ(scores[2], 30); +} + +TEST_F(ArrayQueryTest, UpdateArrayDoesNotAffectOtherNodes) { + std::vector new_tags = {Value{"updated"s}}; + auto uq = UpdateQuery::on("Item", 0).set("tags", Value{new_tags}).build(); + + auto result = db_->update(uq); + ASSERT_OK(result); + + auto table = query_items(); + + // Bob's tags unchanged + int64_t bob_row = find_row_by_name(table, "Bob"); + ASSERT_GE(bob_row, 0); + auto bob_tags = get_list_at_row(table, "i.tags", bob_row); + ASSERT_EQ(bob_tags.size(), 3); + EXPECT_EQ(bob_tags[0], "java"); + EXPECT_EQ(bob_tags[1], "go"); + EXPECT_EQ(bob_tags[2], "python"); +} + +TEST_F(ArrayQueryTest, UpdateArrayDoesNotAffectOtherFields) { + std::vector new_tags = {Value{"updated"s}}; + auto uq = UpdateQuery::on("Item", 0).set("tags", Value{new_tags}).build(); + + auto result = db_->update(uq); + ASSERT_OK(result); + + auto table = query_items(); + int64_t alice_row = find_row_by_name(table, "Alice"); + ASSERT_GE(alice_row, 0); + + // Name unchanged + auto names = get_column_values(table, "i.name").ValueOrDie(); + EXPECT_EQ(names[alice_row], "Alice"); + + // Scores unchanged + auto scores = get_list_at_row(table, "i.scores", alice_row); + ASSERT_EQ(scores.size(), 2); + EXPECT_EQ(scores[0], 90); + EXPECT_EQ(scores[1], 85); +} + +TEST_F(ArrayQueryTest, SequentialArrayUpdatesAccumulate) { + std::vector tags1 = {Value{"first"s}}; + ASSERT_OK(db_->update( + UpdateQuery::on("Item", 0).set("tags", Value{tags1}).build())); + + std::vector tags2 = {Value{"second"s}, Value{"third"s}}; + ASSERT_OK(db_->update( + UpdateQuery::on("Item", 0).set("tags", Value{tags2}).build())); + + auto table = query_items(); + int64_t alice_row = find_row_by_name(table, "Alice"); + ASSERT_GE(alice_row, 0); + + auto tags = get_list_at_row(table, "i.tags", alice_row); + ASSERT_EQ(tags.size(), 2); + EXPECT_EQ(tags[0], "second"); + EXPECT_EQ(tags[1], "third"); +} + +TEST_F(ArrayQueryTest, UpdateByMatchSetsArray) { + auto q = Query::from("i:Item") + .where("i.name", CompareOp::Eq, Value("Bob"s)) + .build(); + std::vector new_tags = {Value{"matched"s}}; + auto uq = UpdateQuery::match(q).set("i.tags", Value{new_tags}).build(); + + auto result = db_->update(uq); + ASSERT_OK(result); + EXPECT_EQ(result.ValueOrDie().updated_count, 1); + + auto table = query_items(); + int64_t bob_row = find_row_by_name(table, "Bob"); + ASSERT_GE(bob_row, 0); + + auto tags = get_list_at_row(table, "i.tags", bob_row); + ASSERT_EQ(tags.size(), 1); + EXPECT_EQ(tags[0], "matched"); +} + +// ========================================================================= +// APPEND tests — append elements to existing array fields +// ========================================================================= + +TEST_F(ArrayQueryTest, AppendToStringArrayById) { + // Alice has tags ["cpp", "rust"], append "python" + std::vector to_append = {Value{"python"s}}; + auto uq = UpdateQuery::on("Item", 0).append("tags", Value{to_append}).build(); + ASSERT_OK(db_->update(uq)); + + auto table = query_items(); + int64_t alice_row = find_row_by_name(table, "Alice"); + ASSERT_GE(alice_row, 0); + + auto tags = get_list_at_row(table, "i.tags", alice_row); + ASSERT_EQ(tags.size(), 3); + EXPECT_EQ(tags[0], "cpp"); + EXPECT_EQ(tags[1], "rust"); + EXPECT_EQ(tags[2], "python"); +} + +TEST_F(ArrayQueryTest, AppendToInt32ArrayById) { + // Alice has scores [90, 85], append 95 + std::vector to_append = {Value{int32_t(95)}}; + auto uq = + UpdateQuery::on("Item", 0).append("scores", Value{to_append}).build(); + ASSERT_OK(db_->update(uq)); + + auto table = query_items(); + int64_t alice_row = find_row_by_name(table, "Alice"); + ASSERT_GE(alice_row, 0); + + auto scores = get_list_at_row(table, "i.scores", alice_row); + ASSERT_EQ(scores.size(), 3); + EXPECT_EQ(scores[0], 90); + EXPECT_EQ(scores[1], 85); + EXPECT_EQ(scores[2], 95); +} + +TEST_F(ArrayQueryTest, AppendMultipleElementsToArray) { + // Alice has tags ["cpp", "rust"], append "go" and "java" + std::vector to_append = {Value{"go"s}, Value{"java"s}}; + auto uq = UpdateQuery::on("Item", 0).append("tags", Value{to_append}).build(); + ASSERT_OK(db_->update(uq)); + + auto table = query_items(); + int64_t alice_row = find_row_by_name(table, "Alice"); + ASSERT_GE(alice_row, 0); + + auto tags = get_list_at_row(table, "i.tags", alice_row); + ASSERT_EQ(tags.size(), 4); + EXPECT_EQ(tags[0], "cpp"); + EXPECT_EQ(tags[1], "rust"); + EXPECT_EQ(tags[2], "go"); + EXPECT_EQ(tags[3], "java"); +} + +TEST_F(ArrayQueryTest, AppendToEmptyArray) { + // Bob has empty scores [], append 42 + std::vector to_append = {Value{int32_t(42)}}; + auto uq = + UpdateQuery::on("Item", 1).append("scores", Value{to_append}).build(); + ASSERT_OK(db_->update(uq)); + + auto table = query_items(); + int64_t bob_row = find_row_by_name(table, "Bob"); + ASSERT_GE(bob_row, 0); + + auto scores = get_list_at_row(table, "i.scores", bob_row); + ASSERT_EQ(scores.size(), 1); + EXPECT_EQ(scores[0], 42); +} + +TEST_F(ArrayQueryTest, AppendDoesNotAffectOtherNodes) { + // Append to Alice, Bob should be unchanged + std::vector to_append = {Value{"extra"s}}; + auto uq = UpdateQuery::on("Item", 0).append("tags", Value{to_append}).build(); + ASSERT_OK(db_->update(uq)); + + auto table = query_items(); + int64_t bob_row = find_row_by_name(table, "Bob"); + ASSERT_GE(bob_row, 0); + + auto bob_tags = get_list_at_row(table, "i.tags", bob_row); + ASSERT_EQ(bob_tags.size(), 3); + EXPECT_EQ(bob_tags[0], "java"); + EXPECT_EQ(bob_tags[1], "go"); + EXPECT_EQ(bob_tags[2], "python"); +} + +TEST_F(ArrayQueryTest, SequentialAppends) { + // Append "a" then "b" to Charlie's empty tags + { + std::vector to_append = {Value{"a"s}}; + auto uq = + UpdateQuery::on("Item", 2).append("tags", Value{to_append}).build(); + ASSERT_OK(db_->update(uq)); + } + { + std::vector to_append = {Value{"b"s}}; + auto uq = + UpdateQuery::on("Item", 2).append("tags", Value{to_append}).build(); + ASSERT_OK(db_->update(uq)); + } + + auto table = query_items(); + int64_t charlie_row = find_row_by_name(table, "Charlie"); + ASSERT_GE(charlie_row, 0); + + auto tags = get_list_at_row(table, "i.tags", charlie_row); + ASSERT_EQ(tags.size(), 2); + EXPECT_EQ(tags[0], "a"); + EXPECT_EQ(tags[1], "b"); +} + +TEST_F(ArrayQueryTest, AppendEmptyVectorIsNoop) { + // Append empty vector to Alice's tags — should remain unchanged + std::vector to_append = {}; + auto uq = UpdateQuery::on("Item", 0).append("tags", Value{to_append}).build(); + ASSERT_OK(db_->update(uq)); + + auto table = query_items(); + int64_t alice_row = find_row_by_name(table, "Alice"); + ASSERT_GE(alice_row, 0); + + auto tags = get_list_at_row(table, "i.tags", alice_row); + ASSERT_EQ(tags.size(), 2); + EXPECT_EQ(tags[0], "cpp"); + EXPECT_EQ(tags[1], "rust"); +} + +TEST_F(ArrayQueryTest, AppendByMatchQuery) { + // Append "matched" to all Items where name = "Bob" + auto q = Query::from("i:Item") + .where("i.name", CompareOp::Eq, Value("Bob"s)) + .build(); + std::vector to_append = {Value{"matched"s}}; + auto uq = UpdateQuery::match(q).append("i.tags", Value{to_append}).build(); + + auto result = db_->update(uq); + ASSERT_OK(result); + EXPECT_EQ(result.ValueOrDie().updated_count, 1); + + auto table = query_items(); + int64_t bob_row = find_row_by_name(table, "Bob"); + ASSERT_GE(bob_row, 0); + + auto tags = get_list_at_row(table, "i.tags", bob_row); + ASSERT_EQ(tags.size(), 4); + EXPECT_EQ(tags[0], "java"); + EXPECT_EQ(tags[1], "go"); + EXPECT_EQ(tags[2], "python"); + EXPECT_EQ(tags[3], "matched"); +} + +// ========================================================================= +// Versioned array tests — verify SET/APPEND create proper versions +// ========================================================================= + +class VersionedArrayTest : public ::testing::Test { + protected: + std::shared_ptr db_; + std::string test_db_path_; + MockClock mock_clock_; + + uint64_t t0_; + uint64_t t1_; + uint64_t t2_; + uint64_t t3_; + + void SetUp() override { + Clock::set_instance(&mock_clock_); + + test_db_path_ = "versioned_array_test_db_" + std::to_string(now_millis()); + auto config = make_config() + .with_db_path(test_db_path_) + .with_shard_capacity(1000) + .with_chunk_size(1000) + .with_versioning_enabled(true) + .build(); + + db_ = std::make_shared(config); + + auto schema = arrow::schema({ + arrow::field("name", arrow::utf8()), + arrow::field("tags", arrow::list(arrow::utf8())), + arrow::field("scores", arrow::list(arrow::int32())), + }); + db_->get_schema_registry()->create("Item", schema).ValueOrDie(); + + t0_ = 1685577600000000000ULL; // 2023-06-01 + t1_ = 1688169600000000000ULL; // 2023-07-01 + t2_ = 1690848000000000000ULL; // 2023-08-01 + t3_ = 1693526400000000000ULL; // 2023-09-01 + + mock_clock_.set_time(t0_); + } + + void TearDown() override { + Clock::reset(); + std::filesystem::remove_all(test_db_path_); + } + + int64_t create_item(const std::string& name, std::vector tags, + std::vector scores) { + std::unordered_map data = { + {"name", Value{name}}, + {"tags", Value{tags}}, + {"scores", Value{scores}}, + }; + return db_->create_node("Item", data).ValueOrDie()->id; + } + + std::shared_ptr query_items() { + auto query = Query::from("i:Item").build(); + return db_->query(query).ValueOrDie()->table(); + } + + std::shared_ptr query_items_as_of(uint64_t valid_time) { + auto query = Query::from("i:Item").as_of_valid_time(valid_time).build(); + return db_->query(query).ValueOrDie()->table(); + } + + template + std::vector get_list_at_row(const std::shared_ptr& table, + const std::string& col_name, int64_t row) { + auto column = table->GetColumnByName(col_name); + EXPECT_NE(column, nullptr) << "Column not found: " << col_name; + if (!column) return {}; + + int64_t offset = 0; + for (int c = 0; c < column->num_chunks(); ++c) { + auto chunk = column->chunk(c); + if (row < offset + chunk->length()) { + auto list_arr = std::static_pointer_cast(chunk); + int64_t local = row - offset; + if (list_arr->IsNull(local)) return {}; + auto values = list_arr->value_slice(local); + std::vector result; + if constexpr (std::is_same_v) { + auto typed = std::static_pointer_cast(values); + for (int64_t i = 0; i < typed->length(); ++i) + result.push_back(typed->Value(i)); + } else if constexpr (std::is_same_v) { + auto typed = std::static_pointer_cast(values); + for (int64_t i = 0; i < typed->length(); ++i) + result.push_back(typed->GetString(i)); + } + return result; + } + offset += chunk->length(); + } + return {}; + } +}; + +TEST_F(VersionedArrayTest, SetArrayCreatesVersion) { + mock_clock_.set_time(t0_); + int64_t id = create_item("Alice", {Value{"cpp"s}, Value{"rust"s}}, + {Value{int32_t(90)}}); + + // SET tags to new array at t1 + mock_clock_.set_time(t1_); + std::vector new_tags = {Value{"go"s}, Value{"java"s}, + Value{"python"s}}; + auto uq = UpdateQuery::on("Item", id).set("tags", Value{new_tags}).build(); + ASSERT_OK(db_->update(uq)); + + // Current view: should see the new tags + auto current = query_items(); + ASSERT_EQ(current->num_rows(), 1); + auto tags_now = get_list_at_row(current, "i.tags", 0); + ASSERT_EQ(tags_now.size(), 3); + EXPECT_EQ(tags_now[0], "go"); + EXPECT_EQ(tags_now[1], "java"); + EXPECT_EQ(tags_now[2], "python"); + + // Query as-of t0: should see the original tags + auto past = query_items_as_of(t0_); + ASSERT_EQ(past->num_rows(), 1); + auto tags_t0 = get_list_at_row(past, "i.tags", 0); + ASSERT_EQ(tags_t0.size(), 2); + EXPECT_EQ(tags_t0[0], "cpp"); + EXPECT_EQ(tags_t0[1], "rust"); +} + +TEST_F(VersionedArrayTest, AppendArrayCreatesVersion) { + mock_clock_.set_time(t0_); + int64_t id = create_item("Bob", {Value{"java"s}}, {}); + + // APPEND "go" at t1 + mock_clock_.set_time(t1_); + std::vector to_append = {Value{"go"s}}; + auto uq = + UpdateQuery::on("Item", id).append("tags", Value{to_append}).build(); + ASSERT_OK(db_->update(uq)); + + // Current: ["java", "go"] + auto current = query_items(); + auto tags_now = get_list_at_row(current, "i.tags", 0); + ASSERT_EQ(tags_now.size(), 2); + EXPECT_EQ(tags_now[0], "java"); + EXPECT_EQ(tags_now[1], "go"); + + // As-of t0: should still be ["java"] + auto past = query_items_as_of(t0_); + auto tags_t0 = get_list_at_row(past, "i.tags", 0); + ASSERT_EQ(tags_t0.size(), 1); + EXPECT_EQ(tags_t0[0], "java"); +} + +TEST_F(VersionedArrayTest, MultipleAppendVersions) { + mock_clock_.set_time(t0_); + int64_t id = create_item("Charlie", {Value{"a"s}}, {}); + + // Append "b" at t1 + mock_clock_.set_time(t1_); + ASSERT_OK( + db_->update(UpdateQuery::on("Item", id) + .append("tags", Value{std::vector{Value{"b"s}}}) + .build())); + + // Append "c" at t2 + mock_clock_.set_time(t2_); + ASSERT_OK( + db_->update(UpdateQuery::on("Item", id) + .append("tags", Value{std::vector{Value{"c"s}}}) + .build())); + + // Current: ["a", "b", "c"] + auto current = query_items(); + auto tags_now = get_list_at_row(current, "i.tags", 0); + ASSERT_EQ(tags_now.size(), 3); + EXPECT_EQ(tags_now[0], "a"); + EXPECT_EQ(tags_now[1], "b"); + EXPECT_EQ(tags_now[2], "c"); + + // As-of t0: ["a"] + auto t0_table = query_items_as_of(t0_); + auto tags_t0 = get_list_at_row(t0_table, "i.tags", 0); + ASSERT_EQ(tags_t0.size(), 1); + EXPECT_EQ(tags_t0[0], "a"); + + // As-of t1: ["a", "b"] + auto t1_table = query_items_as_of(t1_); + auto tags_t1 = get_list_at_row(t1_table, "i.tags", 0); + ASSERT_EQ(tags_t1.size(), 2); + EXPECT_EQ(tags_t1[0], "a"); + EXPECT_EQ(tags_t1[1], "b"); +} + +TEST_F(VersionedArrayTest, SetAfterAppendPreservesHistory) { + mock_clock_.set_time(t0_); + int64_t id = create_item("Dave", {Value{"x"s}}, {}); + + // Append "y" at t1 + mock_clock_.set_time(t1_); + ASSERT_OK( + db_->update(UpdateQuery::on("Item", id) + .append("tags", Value{std::vector{Value{"y"s}}}) + .build())); + + // SET to completely new array at t2 + mock_clock_.set_time(t2_); + std::vector fresh = {Value{"new1"s}, Value{"new2"s}}; + ASSERT_OK(db_->update( + UpdateQuery::on("Item", id).set("tags", Value{fresh}).build())); + + // Current: ["new1", "new2"] + auto current = query_items(); + auto tags_now = get_list_at_row(current, "i.tags", 0); + ASSERT_EQ(tags_now.size(), 2); + EXPECT_EQ(tags_now[0], "new1"); + EXPECT_EQ(tags_now[1], "new2"); + + // As-of t1: ["x", "y"] (append result) + auto t1_table = query_items_as_of(t1_); + auto tags_t1 = get_list_at_row(t1_table, "i.tags", 0); + ASSERT_EQ(tags_t1.size(), 2); + EXPECT_EQ(tags_t1[0], "x"); + EXPECT_EQ(tags_t1[1], "y"); + + // As-of t0: ["x"] (original) + auto t0_table = query_items_as_of(t0_); + auto tags_t0 = get_list_at_row(t0_table, "i.tags", 0); + ASSERT_EQ(tags_t0.size(), 1); + EXPECT_EQ(tags_t0[0], "x"); +} + +TEST_F(VersionedArrayTest, AppendInt32WithVersioning) { + mock_clock_.set_time(t0_); + int64_t id = create_item("Eve", {}, {Value{int32_t(10)}, Value{int32_t(20)}}); + + // Append 30 at t1 + mock_clock_.set_time(t1_); + ASSERT_OK(db_->update( + UpdateQuery::on("Item", id) + .append("scores", Value{std::vector{Value{int32_t(30)}}}) + .build())); + + // Current: [10, 20, 30] + auto current = query_items(); + auto scores_now = get_list_at_row(current, "i.scores", 0); + ASSERT_EQ(scores_now.size(), 3); + EXPECT_EQ(scores_now[0], 10); + EXPECT_EQ(scores_now[1], 20); + EXPECT_EQ(scores_now[2], 30); + + // As-of t0: [10, 20] + auto past = query_items_as_of(t0_); + auto scores_t0 = get_list_at_row(past, "i.scores", 0); + ASSERT_EQ(scores_t0.size(), 2); + EXPECT_EQ(scores_t0[0], 10); + EXPECT_EQ(scores_t0[1], 20); +} + +TEST_F(VersionedArrayTest, AppendToEmptyArrayWithVersioning) { + mock_clock_.set_time(t0_); + int64_t id = create_item("Frank", {}, {}); + + // Append to empty tags at t1 + mock_clock_.set_time(t1_); + ASSERT_OK(db_->update( + UpdateQuery::on("Item", id) + .append("tags", Value{std::vector{Value{"hello"s}}}) + .build())); + + // Current: ["hello"] + auto current = query_items(); + auto tags_now = get_list_at_row(current, "i.tags", 0); + ASSERT_EQ(tags_now.size(), 1); + EXPECT_EQ(tags_now[0], "hello"); + + // As-of t0: empty + auto past = query_items_as_of(t0_); + auto tags_t0 = get_list_at_row(past, "i.tags", 0); + EXPECT_TRUE(tags_t0.empty()); +} + +} // namespace tundradb diff --git a/tests/database_test.cpp b/tests/database_test.cpp index bfe5205..27144a1 100644 --- a/tests/database_test.cpp +++ b/tests/database_test.cpp @@ -606,7 +606,7 @@ TEST_F(DatabaseTest, VerifyUpdatedFlag) { EXPECT_TRUE(is_clean) << "Shard should be marked as clean after snapshot"; // Update a node - db->update_node("users", 0, "age", int64_t(30), SET).ValueOrDie(); + db->update_node("users", 0, "age", int64_t(30), UpdateType::SET).ValueOrDie(); // Verify shard is marked as updated again is_clean = shard_manager->is_shard_clean("users", 0).ValueOrDie(); diff --git a/tests/node_test.cpp b/tests/node_test.cpp index b069821..2c52bac 100644 --- a/tests/node_test.cpp +++ b/tests/node_test.cpp @@ -34,6 +34,16 @@ class NodeTest : public ::testing::Test { auto result = schema_registry_->create("User", test_schema_); ASSERT_TRUE(result.ok()); + // Register a schema with array fields for array tests + auto array_fields = std::vector>{ + arrow::field("name", arrow::utf8(), false), + arrow::field("tags", arrow::list(arrow::field("item", arrow::utf8())), + true), + arrow::field("scores", + arrow::list(arrow::field("item", arrow::int32())), true)}; + auto array_schema = arrow::schema(array_fields); + ASSERT_TRUE(schema_registry_->create("UserWithArrays", array_schema).ok()); + // Create node manager node_manager_ = std::make_unique(schema_registry_); } @@ -603,4 +613,182 @@ TEST_F(NodeTest, MultipleNodesSharedString) { Logger::get_instance().debug( "=== MultipleNodesSharedString Test Complete ==="); +} + +// --------------------------------------------------------------------------- +// Array field tests (schema: UserWithArrays with name, tags[], scores[]) +// --------------------------------------------------------------------------- + +// Test creating a node with array fields (list of strings, list of int32) +TEST_F(NodeTest, NodeManagerCreateNodeWithArray) { + std::unordered_map node_data = { + {"name", Value{"Alice"}}, + {"tags", Value{std::vector{Value{"a"}, Value{"b"}, Value{"c"}}}}, + {"scores", Value{std::vector{Value{static_cast(10)}, + Value{static_cast(20)}, + Value{static_cast(30)}}}}}; + + auto node_result = node_manager_->create_node("UserWithArrays", node_data); + ASSERT_TRUE(node_result.ok()) + << "Failed to create node: " << node_result.status().ToString(); + + auto node = node_result.ValueOrDie(); + ASSERT_NE(node, nullptr); + EXPECT_EQ(node->schema_name, "UserWithArrays"); + EXPECT_GE(node->id, 0); +} + +// Test get_value for array field: storage returns arena-backed ArrayRef +TEST_F(NodeTest, NodeGetValueArray) { + std::vector tags = {Value{"x"}, Value{"y"}, Value{"z"}}; + std::unordered_map node_data = { + {"name", Value{"Bob"}}, + {"tags", Value{tags}}, + }; + + auto node_result = node_manager_->create_node("UserWithArrays", node_data); + ASSERT_TRUE(node_result.ok()); + auto node = node_result.ValueOrDie(); + + auto tags_result = node->get_value("tags"); + ASSERT_TRUE(tags_result.ok()) + << "Failed to get tags: " << tags_result.status().ToString(); + const Value& tags_value = tags_result.ValueOrDie(); + + EXPECT_EQ(tags_value.type(), ValueType::ARRAY); + EXPECT_TRUE(tags_value.holds_array_ref()) + << "Arena should store array as ArrayRef"; + const ArrayRef& arr = tags_value.as_array_ref(); + EXPECT_FALSE(arr.is_null()); + EXPECT_EQ(arr.length(), 3u); + + // to_string() formats as "[x, y, z]" + std::string str = tags_value.to_string(); + EXPECT_EQ(str, "[x, y, z]"); +} + +// Test get_value for int32 array field +TEST_F(NodeTest, NodeGetValueInt32Array) { + std::vector scores = { + Value{static_cast(1)}, + Value{static_cast(2)}, + Value{static_cast(3)}, + }; + std::unordered_map node_data = { + {"name", Value{"Carol"}}, + {"scores", Value{scores}}, + }; + + auto node_result = node_manager_->create_node("UserWithArrays", node_data); + ASSERT_TRUE(node_result.ok()); + auto node = node_result.ValueOrDie(); + + auto scores_result = node->get_value("scores"); + ASSERT_TRUE(scores_result.ok()); + const Value& scores_value = scores_result.ValueOrDie(); + + EXPECT_EQ(scores_value.type(), ValueType::ARRAY); + EXPECT_TRUE(scores_value.holds_array_ref()); + const ArrayRef& arr = scores_value.as_array_ref(); + EXPECT_EQ(arr.length(), 3u); + EXPECT_EQ(scores_value.to_string(), "[1, 2, 3]"); +} + +// Test set_value for array field (replace entire array) +TEST_F(NodeTest, NodeSetValueArray) { + std::unordered_map node_data = { + {"name", Value{"Dave"}}, + {"tags", Value{std::vector{Value{"old1"}, Value{"old2"}}}}, + }; + + auto node_result = node_manager_->create_node("UserWithArrays", node_data); + ASSERT_TRUE(node_result.ok()); + auto node = node_result.ValueOrDie(); + + auto get_before = node->get_value("tags"); + ASSERT_TRUE(get_before.ok()); + EXPECT_EQ(get_before.ValueOrDie().to_string(), "[old1, old2]"); + + std::vector new_tags = {Value{"new1"}, Value{"new2"}, Value{"new3"}}; + auto set_result = node->set_value("tags", Value{new_tags}); + ASSERT_TRUE(set_result.ok()) + << "Failed to set tags: " << set_result.status().ToString(); + + auto get_after = node->get_value("tags"); + ASSERT_TRUE(get_after.ok()); + EXPECT_EQ(get_after.ValueOrDie().to_string(), "[new1, new2, new3]"); +} + +// Test node with empty array +TEST_F(NodeTest, NodeArrayEmpty) { + std::unordered_map node_data = { + {"name", Value{"Eve"}}, + {"tags", Value{std::vector{}}}, + }; + + auto node_result = node_manager_->create_node("UserWithArrays", node_data); + ASSERT_TRUE(node_result.ok()); + auto node = node_result.ValueOrDie(); + + auto tags_result = node->get_value("tags"); + ASSERT_TRUE(tags_result.ok()); + const Value& tags_value = tags_result.ValueOrDie(); + + EXPECT_EQ(tags_value.type(), ValueType::ARRAY); + EXPECT_TRUE(tags_value.holds_array_ref()); + EXPECT_TRUE(tags_value.as_array_ref().empty()); + EXPECT_EQ(tags_value.as_array_ref().length(), 0u); + EXPECT_EQ(tags_value.to_string(), "[]"); +} + +// Test nullable array field omitted (null) +TEST_F(NodeTest, NodeArrayNullableOmitted) { + std::unordered_map node_data = { + {"name", Value{"Frank"}}, + // tags and scores not provided -> null + }; + + auto node_result = node_manager_->create_node("UserWithArrays", node_data); + ASSERT_TRUE(node_result.ok()); + auto node = node_result.ValueOrDie(); + + auto tags_result = node->get_value("tags"); + ASSERT_TRUE(tags_result.ok()); + EXPECT_TRUE(tags_result.ValueOrDie().is_null()); + + auto scores_result = node->get_value("scores"); + ASSERT_TRUE(scores_result.ok()); + EXPECT_TRUE(scores_result.ValueOrDie().is_null()); +} + +// Test array storage: verify ArrayRef ref count and element access +TEST_F(NodeTest, NodeArrayStorage) { + std::vector tags = {Value{"one"}, Value{"two"}}; + std::unordered_map node_data = { + {"name", Value{"Grace"}}, + {"tags", Value{tags}}, + }; + + auto node_result = node_manager_->create_node("UserWithArrays", node_data); + ASSERT_TRUE(node_result.ok()); + auto node = node_result.ValueOrDie(); + + auto tags_result = node->get_value("tags"); + ASSERT_TRUE(tags_result.ok()); + const Value& v = tags_result.ValueOrDie(); + + EXPECT_TRUE(v.holds_array_ref()); + ArrayRef ref = v.as_array_ref(); + EXPECT_GT(ref.get_ref_count(), 0); + EXPECT_FALSE(ref.is_marked_for_deletion()); + EXPECT_EQ(ref.length(), 2u); + EXPECT_EQ(ref.elem_type(), ValueType::STRING); + + // Read elements via Value::read_value_from_memory + auto elem0 = + Value::read_value_from_memory(ref.element_ptr(0), ref.elem_type()); + auto elem1 = + Value::read_value_from_memory(ref.element_ptr(1), ref.elem_type()); + EXPECT_EQ(elem0.to_string(), "one"); + EXPECT_EQ(elem1.to_string(), "two"); } \ No newline at end of file diff --git a/tests/sharding_test.cpp b/tests/sharding_test.cpp index b19b4ff..75c953e 100644 --- a/tests/sharding_test.cpp +++ b/tests/sharding_test.cpp @@ -91,8 +91,8 @@ TEST_F(ShardingTest, AddAndRetrieveNodes) { TEST_F(ShardingTest, UpdateNodes) { // Create a node auto node = create_test_node("Original", 0); - auto update_result = - db->update_node("test-schema", node->id, "name", Value{"Updated"}, SET); + auto update_result = db->update_node("test-schema", node->id, "name", + Value{"Updated"}, UpdateType::SET); ASSERT_TRUE(update_result.ok()) << "Failed to update node: " << update_result.status().ToString(); @@ -116,8 +116,8 @@ TEST_F(ShardingTest, Compaction) { } // Now remove some nodes to create gaps - auto update_result = - db->update_node("test-schema", 1, "name", Value{"Removed"}, SET); + auto update_result = db->update_node("test-schema", 1, "name", + Value{"Removed"}, UpdateType::SET); ASSERT_TRUE(update_result.ok()); // Compact the schema diff --git a/tests/string_ref_concurrent_test.cpp b/tests/string_ref_concurrent_test.cpp index c95335c..bd296ec 100644 --- a/tests/string_ref_concurrent_test.cpp +++ b/tests/string_ref_concurrent_test.cpp @@ -37,8 +37,7 @@ class StringRefConcurrentTest : public ::testing::Test { // ============================================================================ TEST_F(StringRefConcurrentTest, BasicAllocationAndDeallocation) { - // Allocate a string - StringRef ref = arena->store_string("Hello World"); + StringRef ref = arena->store_string("Hello World").ValueOrDie(); EXPECT_FALSE(ref.is_null()); EXPECT_EQ(ref.length(), 11); @@ -47,7 +46,7 @@ TEST_F(StringRefConcurrentTest, BasicAllocationAndDeallocation) { } TEST_F(StringRefConcurrentTest, CopyConstructorIncrementsRefCount) { - StringRef ref1 = arena->store_string("Test"); + StringRef ref1 = arena->store_string("Test").ValueOrDie(); EXPECT_EQ(ref1.get_ref_count(), 1); // Copy constructor should increment ref count @@ -61,7 +60,7 @@ TEST_F(StringRefConcurrentTest, CopyConstructorIncrementsRefCount) { } TEST_F(StringRefConcurrentTest, MoveConstructorDoesNotChangeRefCount) { - StringRef ref1 = arena->store_string("Test"); + StringRef ref1 = arena->store_string("Test").ValueOrDie(); const char* original_data = ref1.data(); EXPECT_EQ(ref1.get_ref_count(), 1); @@ -75,8 +74,8 @@ TEST_F(StringRefConcurrentTest, MoveConstructorDoesNotChangeRefCount) { } TEST_F(StringRefConcurrentTest, CopyAssignmentIncrementsRefCount) { - StringRef ref1 = arena->store_string("First"); - StringRef ref2 = arena->store_string("Second"); + StringRef ref1 = arena->store_string("First").ValueOrDie(); + StringRef ref2 = arena->store_string("Second").ValueOrDie(); EXPECT_EQ(ref1.get_ref_count(), 1); EXPECT_EQ(ref2.get_ref_count(), 1); @@ -90,7 +89,7 @@ TEST_F(StringRefConcurrentTest, CopyAssignmentIncrementsRefCount) { } TEST_F(StringRefConcurrentTest, DestructorDecrementsRefCount) { - StringRef ref1 = arena->store_string("Test"); + StringRef ref1 = arena->store_string("Test").ValueOrDie(); EXPECT_EQ(ref1.get_ref_count(), 1); { @@ -108,7 +107,7 @@ TEST_F(StringRefConcurrentTest, DestructorDecrementsRefCount) { // ============================================================================ TEST_F(StringRefConcurrentTest, MarkForDeletionWithMultipleRefs) { - StringRef ref1 = arena->store_string("ToDelete"); + StringRef ref1 = arena->store_string("ToDelete").ValueOrDie(); StringRef ref2 = ref1; // ref_count = 2 EXPECT_EQ(ref1.get_ref_count(), 2); @@ -127,7 +126,8 @@ TEST_F(StringRefConcurrentTest, MarkForDeletionWithMultipleRefs) { } TEST_F(StringRefConcurrentTest, DeallocateWhenLastRefDestroyed) { - StringRef* ref1 = new StringRef(arena->store_string("Temporary")); + StringRef* ref1 = + new StringRef(arena->store_string("Temporary").ValueOrDie()); const char* data_ptr = ref1->data(); // Mark for deletion @@ -142,7 +142,8 @@ TEST_F(StringRefConcurrentTest, DeallocateWhenLastRefDestroyed) { } TEST_F(StringRefConcurrentTest, NoDeallocateIfNotMarked) { - StringRef* ref1 = new StringRef(arena->store_string("NotMarked")); + StringRef* ref1 = + new StringRef(arena->store_string("NotMarked").ValueOrDie()); // Do NOT mark for deletion EXPECT_FALSE(ref1->is_marked_for_deletion()); @@ -162,8 +163,8 @@ TEST_F(StringRefConcurrentTest, DeduplicationSharesMemory) { // Enable deduplication for this test arena->enable_deduplication(true); - StringRef ref1 = arena->store_string("Shared"); - StringRef ref2 = arena->store_string("Shared"); // Same content + StringRef ref1 = arena->store_string("Shared").ValueOrDie(); + StringRef ref2 = arena->store_string("Shared").ValueOrDie(); // Both refs should point to the same memory EXPECT_EQ(ref1.data(), ref2.data()); @@ -176,14 +177,14 @@ TEST_F(StringRefConcurrentTest, MarkForDeletionRemovesFromDedup) { // Enable deduplication for this test arena->enable_deduplication(true); - StringRef ref1 = arena->store_string("DedupTest"); + StringRef ref1 = arena->store_string("DedupTest").ValueOrDie(); const char* original_data = ref1.data(); // Mark for deletion - should remove from dedup cache arena->mark_for_deletion(ref1); // New string with same content should allocate NEW memory - StringRef ref2 = arena->store_string("DedupTest"); + StringRef ref2 = arena->store_string("DedupTest").ValueOrDie(); EXPECT_NE(ref1.data(), ref2.data()); // Different memory! } @@ -195,7 +196,7 @@ TEST_F(StringRefConcurrentTest, CacheNotCorruptedByDeallocation) { arena->enable_deduplication(true); // Step 1: Create and mark old string for deletion - StringRef old_ref = arena->store_string("SharedString"); + StringRef old_ref = arena->store_string("SharedString").ValueOrDie(); const char* old_data = old_ref.data(); // Mark for deletion - removes from cache @@ -203,7 +204,7 @@ TEST_F(StringRefConcurrentTest, CacheNotCorruptedByDeallocation) { // Step 2: Create NEW string with same content // This should allocate new memory and add to cache - StringRef new_ref = arena->store_string("SharedString"); + StringRef new_ref = arena->store_string("SharedString").ValueOrDie(); const char* new_data = new_ref.data(); // Different memory addresses @@ -216,7 +217,7 @@ TEST_F(StringRefConcurrentTest, CacheNotCorruptedByDeallocation) { // Step 4: Try to get from cache - should hit! // If release_string() incorrectly removed from cache, this would allocate new // memory - StringRef cached = arena->store_string("SharedString"); + StringRef cached = arena->store_string("SharedString").ValueOrDie(); // ✅ Should be cache hit - same address as new_ref EXPECT_EQ(cached.data(), new_data); @@ -234,7 +235,7 @@ TEST_F(StringRefConcurrentTest, ConcurrentCopying) { const int num_threads = 10; const int copies_per_thread = 1000; - StringRef original = arena->store_string("ConcurrentTest"); + StringRef original = arena->store_string("ConcurrentTest").ValueOrDie(); std::vector threads; // Each thread makes many copies @@ -268,7 +269,7 @@ TEST_F(StringRefConcurrentTest, ConcurrentReadWhileMarkingForDeletion) { const int num_readers = 5; const int reads_per_thread = 10000; - StringRef shared = arena->store_string("SharedData"); + StringRef shared = arena->store_string("SharedData").ValueOrDie(); std::atomic start{false}; std::atomic marked{false}; @@ -337,7 +338,7 @@ TEST_F(StringRefConcurrentTest, StressTestManyStrings) { for (int j = 0; j < num_strings; ++j) { std::string content = "Thread" + std::to_string(i) + "_String" + std::to_string(j); - StringRef ref = arena->store_string(content); + StringRef ref = arena->store_string(content).ValueOrDie(); // Verify immediately EXPECT_EQ(ref.view(), content); @@ -372,7 +373,7 @@ TEST_F(StringRefConcurrentTest, StressTestManyStrings) { // ============================================================================ TEST_F(StringRefConcurrentTest, EmptyString) { - StringRef ref = arena->store_string(""); + StringRef ref = arena->store_string("").ValueOrDie(); EXPECT_FALSE(ref.is_null()); EXPECT_EQ(ref.length(), 0); @@ -391,7 +392,7 @@ TEST_F(StringRefConcurrentTest, NullStringRef) { TEST_F(StringRefConcurrentTest, LargeString) { std::string large(10000, 'X'); - StringRef ref = arena->store_string(large); + StringRef ref = arena->store_string(large).ValueOrDie(); EXPECT_FALSE(ref.is_null()); EXPECT_EQ(ref.length(), 10000); @@ -400,7 +401,7 @@ TEST_F(StringRefConcurrentTest, LargeString) { } TEST_F(StringRefConcurrentTest, SelfAssignment) { - StringRef ref = arena->store_string("SelfAssign"); + StringRef ref = arena->store_string("SelfAssign").ValueOrDie(); #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wself-assign-overloaded" @@ -417,13 +418,11 @@ TEST_F(StringRefConcurrentTest, SelfAssignment) { // ============================================================================ TEST_F(StringRefConcurrentTest, AutoPoolSelection) { - StringRef small = arena->store_string_auto("X"); // Pool 0 (<=16) + StringRef small = arena->store_string_auto("X").ValueOrDie(); StringRef medium = - arena->store_string_auto(std::string(20, 'Y')); // Pool 1 (<=32) - StringRef large = - arena->store_string_auto(std::string(50, 'Z')); // Pool 2 (<=64) - StringRef huge = - arena->store_string_auto(std::string(100, 'W')); // Pool 3 (unlimited) + arena->store_string_auto(std::string(20, 'Y')).ValueOrDie(); + StringRef large = arena->store_string_auto(std::string(50, 'Z')).ValueOrDie(); + StringRef huge = arena->store_string_auto(std::string(100, 'W')).ValueOrDie(); EXPECT_EQ(small.pool_id(), 0); EXPECT_EQ(medium.pool_id(), 1); @@ -433,9 +432,9 @@ TEST_F(StringRefConcurrentTest, AutoPoolSelection) { TEST_F(StringRefConcurrentTest, ExplicitPoolSelection) { // Test explicit pool selection by pool_id - StringRef ref0 = arena->store_string("Test", 0); // Pool 0 (<=16 bytes) - StringRef ref1 = arena->store_string("Test", 1); // Pool 1 (<=32 bytes) - StringRef ref3 = arena->store_string("Test", 3); // Pool 3 (unlimited) + StringRef ref0 = arena->store_string("Test", 0).ValueOrDie(); + StringRef ref1 = arena->store_string("Test", 1).ValueOrDie(); + StringRef ref3 = arena->store_string("Test", 3).ValueOrDie(); EXPECT_EQ(ref0.pool_id(), 0); EXPECT_EQ(ref1.pool_id(), 1);