diff --git a/libs/@local/hashql/mir/benches/execution.rs b/libs/@local/hashql/mir/benches/execution.rs index 2b9a6bbfc38..f5a5b09cc82 100644 --- a/libs/@local/hashql/mir/benches/execution.rs +++ b/libs/@local/hashql/mir/benches/execution.rs @@ -104,7 +104,7 @@ fn execution_analysis(criterion: &mut Criterion) { scratch: &mut *scratch, }; - let _result = core::hint::black_box(analysis.run(context, body)); + let _result = core::hint::black_box(analysis.run_in(context, body, context.heap)); }); }); @@ -123,7 +123,7 @@ fn execution_analysis(criterion: &mut Criterion) { scratch: &mut *scratch, }; - let _result = core::hint::black_box(analysis.run(context, body)); + let _result = core::hint::black_box(analysis.run_in(context, body, context.heap)); }, ); }); @@ -140,7 +140,7 @@ fn execution_analysis(criterion: &mut Criterion) { scratch: &mut *scratch, }; - let _result = core::hint::black_box(analysis.run(context, body)); + let _result = core::hint::black_box(analysis.run_in(context, body, context.heap)); }); }); } diff --git a/libs/@local/hashql/mir/src/builder/body.rs b/libs/@local/hashql/mir/src/builder/body.rs index df15d7b3d68..1f2bd5ad682 100644 --- a/libs/@local/hashql/mir/src/builder/body.rs +++ b/libs/@local/hashql/mir/src/builder/body.rs @@ -255,7 +255,7 @@ impl<'env, 'heap> Deref for BodyBuilder<'env, 'heap> { /// /// Binary (`bin.`): `==`, `!=`, `<`, `<=`, `>`, `>=`, `&`, `|`, `+`, `-`, `*`, `/`. /// -/// Unary (`un.`): `!`, `neg`. +/// Unary (`un.`): `!`, `neg`, `~`. #[macro_export] macro_rules! body { ( @@ -364,7 +364,7 @@ macro_rules! body { $types.unknown() }; (@type $types:ident; $other:expr) => { - $other($types) + $other(&$types) }; (@source thunk) => { diff --git a/libs/@local/hashql/mir/src/builder/rvalue.rs b/libs/@local/hashql/mir/src/builder/rvalue.rs index d1017f5451b..fe0c0b98fb8 100644 --- a/libs/@local/hashql/mir/src/builder/rvalue.rs +++ b/libs/@local/hashql/mir/src/builder/rvalue.rs @@ -3,6 +3,7 @@ use core::ops::Deref; use hashql_core::{ heap::{self, FromIteratorIn as _}, id::IdVec, + symbol::Symbol, r#type::builder::IntoSymbol, }; use hashql_hir::node::operation::{InputOp, UnOp}; @@ -158,6 +159,16 @@ impl<'env, 'heap> RValueBuilder<'env, 'heap> { }) } + pub fn opaque(self, id: Symbol<'heap>, value: impl Into>) -> RValue<'heap> { + let mut operands = IdVec::with_capacity_in(1, self.interner.heap); + operands.push(value.into()); + + RValue::Aggregate(Aggregate { + kind: AggregateKind::Opaque(id), + operands, + }) + } + /// Creates a function application r-value. #[must_use] pub fn apply( @@ -235,6 +246,12 @@ macro_rules! rvalue { rv.tuple(members) }; $payload; $($rest)*) }; + ($resume:path; $payload:tt; opaque $name:tt, $value:tt; $($rest:tt)*) => { + $resume!(@rvalue |rv| { + let inner = $crate::builder::_private::operand!(rv; $value); + rv.opaque($name, inner) + }; $payload; $($rest)*) + }; ($resume:path; $payload:tt; list; $($rest:tt)*) => { $resume!(@rvalue |rv| { rv.list([] as [!; 0]) diff --git a/libs/@local/hashql/mir/src/lib.rs b/libs/@local/hashql/mir/src/lib.rs index 7dc428fccdf..744d289dd47 100644 --- a/libs/@local/hashql/mir/src/lib.rs +++ b/libs/@local/hashql/mir/src/lib.rs @@ -23,6 +23,7 @@ iter_collect_into, likely_unlikely, maybe_uninit_fill, + option_into_flat_iter, step_trait, temporary_niche_types, try_trait_v2, diff --git a/libs/@local/hashql/mir/src/pass/execution/island/graph/mod.rs b/libs/@local/hashql/mir/src/pass/execution/island/graph/mod.rs index 7b0dd9d82f8..e3e661d377b 100644 --- a/libs/@local/hashql/mir/src/pass/execution/island/graph/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/island/graph/mod.rs @@ -120,6 +120,24 @@ impl IslandNode { self.target } + #[inline] + pub fn members(&self) -> impl Iterator { + match &self.kind { + IslandKind::Exec(exec_island) => Some(exec_island.members.iter()), + IslandKind::Data => None, + } + .into_flat_iter() + } + + #[inline] + #[must_use] + pub fn contains(&self, block: BasicBlockId) -> bool { + match &self.kind { + IslandKind::Exec(exec_island) => exec_island.members.contains(block), + IslandKind::Data => false, + } + } + /// Returns the set of traversal paths this island requires. #[inline] #[must_use] @@ -254,6 +272,13 @@ impl IslandGraph { RequirementResolver::new(self, start, scratch).resolve(&topo); } + + pub fn find(&self, target: TargetId) -> impl Iterator { + self.inner + .iter_nodes() + .filter(move |node| node.data.target == target) + .map(|node| (IslandId::new(node.id().as_u32()), &node.data)) + } } impl DirectedGraph for IslandGraph { diff --git a/libs/@local/hashql/mir/src/pass/execution/island/graph/tests.rs b/libs/@local/hashql/mir/src/pass/execution/island/graph/tests.rs index 800a9ff063c..48a9fdd2b22 100644 --- a/libs/@local/hashql/mir/src/pass/execution/island/graph/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/island/graph/tests.rs @@ -34,7 +34,7 @@ pub(crate) fn make_targets(assignments: &[TargetId]) -> BasicBlockVec, targets: &[TargetId]) -> IslandGraph { let target_vec = make_targets(targets); - let islands = IslandPlacement::new().run(body, VertexType::Entity, &target_vec, Global); + let islands = IslandPlacement::new().run_in(body, VertexType::Entity, &target_vec, Global); IslandGraph::new_in(body, VertexType::Entity, islands, Global, Global) } diff --git a/libs/@local/hashql/mir/src/pass/execution/island/mod.rs b/libs/@local/hashql/mir/src/pass/execution/island/mod.rs index 661ba8f5302..c8d866efff6 100644 --- a/libs/@local/hashql/mir/src/pass/execution/island/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/island/mod.rs @@ -102,8 +102,8 @@ impl Island { /// Two blocks belong to the same island when they are connected in the CFG (directly or /// transitively through same-target successors) and share the same [`TargetId`]. The pass /// uses a union-find to identify these components in nearly linear time. -pub(crate) struct IslandPlacement { - scratch: A, +pub(crate) struct IslandPlacement { + scratch: S, } impl IslandPlacement { @@ -130,7 +130,7 @@ impl IslandPlacement { /// /// Returns an [`IslandVec`] where each [`Island`] contains the set of blocks that form /// a connected same-target component. The output is allocated with `alloc`. - pub(crate) fn run( + pub(crate) fn run_in( &self, body: &Body<'_>, vertex: VertexType, diff --git a/libs/@local/hashql/mir/src/pass/execution/island/tests.rs b/libs/@local/hashql/mir/src/pass/execution/island/tests.rs index 542013c8cd6..02032ed41c6 100644 --- a/libs/@local/hashql/mir/src/pass/execution/island/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/island/tests.rs @@ -42,7 +42,7 @@ fn single_block() { }); let targets = make_targets(&[TargetId::Interpreter]); - let islands = IslandPlacement::new().run(&body, VertexType::Entity, &targets, Global); + let islands = IslandPlacement::new().run_in(&body, VertexType::Entity, &targets, Global); assert_eq!(islands.len(), 1); assert_eq!(islands[IslandId::new(0)].target(), TargetId::Interpreter); @@ -71,7 +71,7 @@ fn same_target_chain() { }); let targets = make_targets(&[TargetId::Postgres, TargetId::Postgres]); - let islands = IslandPlacement::new().run(&body, VertexType::Entity, &targets, Global); + let islands = IslandPlacement::new().run_in(&body, VertexType::Entity, &targets, Global); assert_eq!(islands.len(), 1); assert_eq!(islands[IslandId::new(0)].target(), TargetId::Postgres); @@ -101,7 +101,7 @@ fn different_targets() { }); let targets = make_targets(&[TargetId::Interpreter, TargetId::Postgres]); - let islands = IslandPlacement::new().run(&body, VertexType::Entity, &targets, Global); + let islands = IslandPlacement::new().run_in(&body, VertexType::Entity, &targets, Global); assert_eq!(islands.len(), 2); @@ -153,7 +153,7 @@ fn diamond_same_target() { TargetId::Interpreter, TargetId::Interpreter, ]); - let islands = IslandPlacement::new().run(&body, VertexType::Entity, &targets, Global); + let islands = IslandPlacement::new().run_in(&body, VertexType::Entity, &targets, Global); assert_eq!(islands.len(), 1); assert_eq!(islands[IslandId::new(0)].target(), TargetId::Interpreter); @@ -198,7 +198,7 @@ fn diamond_mixed_targets() { TargetId::Embedding, TargetId::Interpreter, ]); - let islands = IslandPlacement::new().run(&body, VertexType::Entity, &targets, Global); + let islands = IslandPlacement::new().run_in(&body, VertexType::Entity, &targets, Global); // bb0 alone, bb1 alone, bb2 alone, bb3 alone — 4 islands, since no same-target // edges exist between any pair of connected blocks. @@ -250,7 +250,7 @@ fn alternating_targets() { TargetId::Interpreter, TargetId::Postgres, ]); - let islands = IslandPlacement::new().run(&body, VertexType::Entity, &targets, Global); + let islands = IslandPlacement::new().run_in(&body, VertexType::Entity, &targets, Global); assert_eq!(islands.len(), 4); for island_id in islands.ids() { @@ -300,7 +300,7 @@ fn transitive_same_target_chain() { }); let targets = make_targets(&[TargetId::Postgres, TargetId::Postgres, TargetId::Postgres]); - let islands = IslandPlacement::new().run(&body, VertexType::Entity, &targets, Global); + let islands = IslandPlacement::new().run_in(&body, VertexType::Entity, &targets, Global); assert_eq!(islands.len(), 1); assert_eq!(islands[IslandId::new(0)].count(), 3); @@ -337,7 +337,7 @@ fn island_joins_traversal_paths() { }); let targets = make_targets(&[TargetId::Interpreter, TargetId::Interpreter]); - let islands = IslandPlacement::new().run(&body, VertexType::Entity, &targets, Global); + let islands = IslandPlacement::new().run_in(&body, VertexType::Entity, &targets, Global); assert_eq!(islands.len(), 1); let island = &islands[IslandId::new(0)]; diff --git a/libs/@local/hashql/mir/src/pass/execution/mod.rs b/libs/@local/hashql/mir/src/pass/execution/mod.rs index 2c8a56fbb1a..ff0a03efa7a 100644 --- a/libs/@local/hashql/mir/src/pass/execution/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/mod.rs @@ -49,24 +49,27 @@ use super::analysis::size_estimation::BodyFootprint; use crate::{ body::{Body, Source, basic_block::BasicBlockVec, local::Local}, context::MirContext, - def::DefIdSlice, + def::{DefIdSlice, DefIdVec}, pass::analysis::size_estimation::InformationRange, }; +pub struct ExecutionAnalysisResidual { + pub assignment: BasicBlockVec, + pub islands: IslandGraph, +} + pub struct ExecutionAnalysis<'ctx, 'heap, S: Allocator> { pub footprints: &'ctx DefIdSlice>, pub scratch: S, } impl<'heap, S: BumpAllocator> ExecutionAnalysis<'_, 'heap, S> { - pub fn run( + pub fn run_in( &self, context: &mut MirContext<'_, 'heap>, body: &mut Body<'heap>, - ) -> ( - BasicBlockVec, - IslandVec, - ) { + alloc: A, + ) -> ExecutionAnalysisResidual { assert_matches!(body.source, Source::GraphReadFilter(_)); let Some(vertex) = VertexType::from_local(context.env, &body.local_decls[Local::VERTEX]) @@ -129,14 +132,42 @@ impl<'heap, S: BumpAllocator> ExecutionAnalysis<'_, 'heap, S> { } .build_in(body, &self.scratch); - let mut assignment = solver.run(context, body); + let mut assignment = solver.run_in(context, body, alloc.clone()); let fusion = BasicBlockFusion::new_in(&self.scratch); fusion.fuse(body, &mut assignment); let islands = - IslandPlacement::new_in(&self.scratch).run(body, vertex, &assignment, context.heap); + IslandPlacement::new_in(&self.scratch).run_in(body, vertex, &assignment, &self.scratch); + let islands = IslandGraph::new_in(body, vertex, islands, &self.scratch, alloc); + + ExecutionAnalysisResidual { + assignment, + islands, + } + } + + pub fn run_all_in( + &self, + context: &mut MirContext<'_, 'heap>, + bodies: &mut DefIdSlice>, + alloc: A, + ) -> DefIdVec>, A> { + let mut items = DefIdVec::with_capacity_in(bodies.len(), alloc.clone()); + + for (def, body) in bodies.iter_enumerated_mut() { + match body.source { + Source::Ctor(_) + | Source::Closure(_, _) + | Source::Thunk(_, _) + | Source::Intrinsic(_) => continue, + Source::GraphReadFilter(_) => {} + } + + let residual = self.run_in(context, body, alloc.clone()); + items.insert(def, residual); + } - (assignment, islands) + items } } diff --git a/libs/@local/hashql/mir/src/pass/execution/placement/solve/csp/mod.rs b/libs/@local/hashql/mir/src/pass/execution/placement/solve/csp/mod.rs index ec9ea086bcf..2480b0c4aec 100644 --- a/libs/@local/hashql/mir/src/pass/execution/placement/solve/csp/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/placement/solve/csp/mod.rs @@ -11,7 +11,7 @@ //! Block ordering uses the MRV (minimum remaining values) heuristic, with highest constraint degree //! as tie-breaker. Forward checking narrows domains bidirectionally after each assignment. -use core::{alloc::Allocator, cmp, mem}; +use core::{alloc::Allocator, mem}; use std::f32; use hashql_core::{ @@ -415,13 +415,13 @@ impl<'ctx, 'parent, 'alloc, A: Allocator, S: BumpAllocator> // Per-unassigned-block: minimum block cost over remaining domain for block in unfixed { - let mut min_block = ApproxCost::INF; - - for target in &block.possible { - min_block = cmp::min(min_block, self.solver.data.blocks.cost(block.id, target)); - } + let min_block = block + .possible + .iter() + .map(|target| self.solver.data.blocks.cost(block.id, target)) + .min(); - if min_block < ApproxCost::INF { + if let Some(min_block) = min_block { bound += min_block; } } @@ -446,7 +446,7 @@ impl<'ctx, 'parent, 'alloc, A: Allocator, S: BumpAllocator> #[expect(clippy::option_if_let_else, reason = "readability")] let min_trans = if let Some(succ_possible) = succ_domain { - // Both endpoints involve an unfixed block — min over all compatible pairs + // Both endpoints involve an unfixed block - min over all compatible pairs block .possible .iter() @@ -457,9 +457,8 @@ impl<'ctx, 'parent, 'alloc, A: Allocator, S: BumpAllocator> .then_some(cost.as_approx()) }) .min() - .unwrap_or(ApproxCost::INF) } else { - // Successor is fixed (or external) — min over block's domain + // Successor is fixed (or external) - min over block's domain let succ_target = self .region .find_block(succ) @@ -471,20 +470,17 @@ impl<'ctx, 'parent, 'alloc, A: Allocator, S: BumpAllocator> }) .or_else(|| self.solver.targets[succ].map(|elem| elem.target)); - if let Some(succ_target) = succ_target { + succ_target.and_then(|succ_target| { block .possible .iter() .filter_map(|source_target| matrix.get(source_target, succ_target)) .map(Cost::as_approx) .min() - .unwrap_or(ApproxCost::INF) - } else { - ApproxCost::INF - } + }) }; - if min_trans < ApproxCost::INF { + if let Some(min_trans) = min_trans { bound += min_trans; } } @@ -640,7 +636,7 @@ impl<'ctx, 'parent, 'alloc, A: Allocator, S: BumpAllocator> solutions } else { self.solver - .alloc + .scratch .allocate_slice_uninit(RETAIN_SOLUTIONS) .write_filled(Solution::new()) }; diff --git a/libs/@local/hashql/mir/src/pass/execution/placement/solve/estimate/mod.rs b/libs/@local/hashql/mir/src/pass/execution/placement/solve/estimate/mod.rs index 5805ddf67ae..843c2c54338 100644 --- a/libs/@local/hashql/mir/src/pass/execution/placement/solve/estimate/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/placement/solve/estimate/mod.rs @@ -198,7 +198,7 @@ where let mut block_cost = self.solver.data.blocks.cost(edge.target.block, target); block_cost += cost; - if block_cost < current_minimum { + if block_cost <= current_minimum { current_minimum = block_cost; minimum_transition_cost = Some(cost); } @@ -219,7 +219,7 @@ where let mut block_cost = self.solver.data.blocks.cost(edge.source.block, source); block_cost += cost; - if block_cost < current_minimum { + if block_cost <= current_minimum { current_minimum = block_cost; minimum_transition_cost = Some(cost); } diff --git a/libs/@local/hashql/mir/src/pass/execution/placement/solve/mod.rs b/libs/@local/hashql/mir/src/pass/execution/placement/solve/mod.rs index d5355ea276d..e0b685fd39e 100644 --- a/libs/@local/hashql/mir/src/pass/execution/placement/solve/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/placement/solve/mod.rs @@ -9,16 +9,11 @@ //! that can change its assignment. //! //! Entry point: [`PlacementSolverContext::build_in`] constructs a [`PlacementSolver`], then -//! [`PlacementSolver::run`] executes both passes. +//! [`PlacementSolver::run_in`] executes both passes. use core::{alloc::Allocator, mem}; -use hashql_core::{ - graph::DirectedGraph as _, - heap::{BumpAllocator, Heap}, - id, - span::SpanId, -}; +use hashql_core::{graph::DirectedGraph as _, heap::BumpAllocator, id, span::SpanId}; use self::{ condensation::{Condensation, PlacementRegionKind, TrivialPlacementRegion}, @@ -87,7 +82,6 @@ fn back_edge_span(body: &Body<'_>, members: &[BasicBlockId]) -> SpanId { #[derive(Debug, Copy, Clone)] pub(crate) struct PlacementSolverContext<'ctx, A: Allocator> { pub blocks: &'ctx BasicBlockCostVec, - pub terminators: &'ctx TerminatorCostVec, } @@ -124,7 +118,7 @@ impl<'ctx, A: Allocator> PlacementSolverContext<'ctx, A> { options, targets, - alloc, + scratch: alloc, } } } @@ -134,26 +128,29 @@ impl<'ctx, A: Allocator> PlacementSolverContext<'ctx, A> { /// Uses a two-pass approach: the forward pass assigns targets in topological order, the backward /// pass refines them with full boundary context. Rewind-based backtracking recovers from /// assignment failures in the forward pass. -pub(crate) struct PlacementSolver<'ctx, 'alloc, A: Allocator, S: BumpAllocator> { - data: PlacementSolverContext<'ctx, A>, +// We need two allocators here, because the `BumpAllocator` trait does not carry a lifetime, but we +// move `Copy` data into the bump allocator. +pub(crate) struct PlacementSolver<'ctx, 'alloc, S1: Allocator, S2: BumpAllocator> { + data: PlacementSolverContext<'ctx, S1>, - condensation: Condensation<'alloc, S>, + condensation: Condensation<'alloc, S2>, options: &'alloc mut BasicBlockSlice, targets: &'alloc mut BasicBlockSlice>, - alloc: &'alloc S, + scratch: &'alloc S2, } -impl<'alloc, A: Allocator, S: BumpAllocator> PlacementSolver<'_, 'alloc, A, S> { +impl<'alloc, S1: Allocator, S: BumpAllocator> PlacementSolver<'_, 'alloc, S1, S> { /// Runs the forward and backward passes, returning the chosen [`TargetId`] for each basic /// block. - pub(crate) fn run<'heap>( + pub(crate) fn run_in<'heap, A: Allocator>( &mut self, context: &mut MirContext<'_, 'heap>, body: &Body<'heap>, - ) -> BasicBlockVec { - let mut regions = Vec::with_capacity_in(self.condensation.node_count(), self.alloc); + alloc: A, + ) -> BasicBlockVec { + let mut regions = Vec::with_capacity_in(self.condensation.node_count(), self.scratch); self.condensation .reverse_topological_order() .rev() @@ -176,7 +173,7 @@ impl<'alloc, A: Allocator, S: BumpAllocator> PlacementSolver<'_, 'alloc, A, S> { // Collect the final assignments into the output vec. Unassigned blocks (from a // failed forward pass) default to the interpreter — the universal fallback target. - let mut output = BasicBlockVec::with_capacity_in(body.basic_blocks.len(), context.heap); + let mut output = BasicBlockVec::with_capacity_in(body.basic_blocks.len(), alloc); for target in &*self.targets { output.push( target diff --git a/libs/@local/hashql/mir/src/pass/execution/placement/solve/tests.rs b/libs/@local/hashql/mir/src/pass/execution/placement/solve/tests.rs index 527c4c6dd9f..a4520cd2f82 100644 --- a/libs/@local/hashql/mir/src/pass/execution/placement/solve/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/placement/solve/tests.rs @@ -1,5 +1,6 @@ #![expect(clippy::min_ident_chars)] +use alloc::alloc::Global; use core::alloc::Allocator; use hashql_core::{ @@ -168,7 +169,7 @@ pub(crate) fn run_solver<'heap>( terminators, }; let mut solver = data.build_in(body, env.heap); - solver.run(&mut context, body) + solver.run_in(&mut context, body, env.heap) } pub(crate) fn find_region_of( @@ -1161,7 +1162,7 @@ fn trivial_failure_emits_diagnostic() { let mut solver = data.build_in(&body, &heap); let mut context = MirContext::new(&env, &interner); - let _result = solver.run(&mut context, &body); + let _result = solver.run_in(&mut context, &body, Global); assert_eq!(context.diagnostics.len(), 1); let diagnostic = context.diagnostics.iter().next().expect("one diagnostic"); @@ -1230,7 +1231,7 @@ fn cyclic_failure_emits_diagnostic() { let mut solver = data.build_in(&body, &heap); let mut context = MirContext::new(&env, &interner); - let _result = solver.run(&mut context, &body); + let _result = solver.run_in(&mut context, &body, Global); assert_eq!(context.diagnostics.len(), 1); let diagnostic = context.diagnostics.iter().next().expect("one diagnostic"); @@ -1304,7 +1305,7 @@ fn path_premiums_influence_placement() { }; let mut context = MirContext::new(&env, &interner); let mut solver = data.build_in(&body, &heap); - let result = solver.run(&mut context, &body); + let result = solver.run_in(&mut context, &body, Global); assert_ne!( result[bb(0)], diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/tests.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/tests.rs index 4ac8917a460..25098b10a59 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/interpret/tests.rs @@ -9,9 +9,12 @@ use crate::{ context::MirContext, def::DefId, intern::Interner, - pass::execution::statement_placement::{ - InterpreterStatementPlacement, - tests::{assert_placement, run_placement}, + pass::execution::{ + statement_placement::{ + InterpreterStatementPlacement, + tests::{assert_placement, run_placement}, + }, + tests::make_entity_uuid_eq_body, }, }; @@ -276,3 +279,34 @@ fn storage_statements_zero_cost() { &statement_costs, ); } + +/// Interpreter placement for `EntityUuid == EntityUuid` with real stdlib types. +/// +/// The interpreter is the universal fallback — it must assign costs to all statements, +/// including equality on opaque types like `EntityUuid`. +#[test] +fn eq_opaque_entity_uuid() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = make_entity_uuid_eq_body(&heap, &interner, &env); + + let context = MirContext { + heap: &heap, + env: &env, + interner: &interner, + diagnostics: DiagnosticIssues::new(), + }; + + let mut placement = InterpreterStatementPlacement::new(); + let (body, statement_costs) = run_placement(&context, &mut placement, body); + + assert_placement( + "eq_opaque_entity_uuid", + "interpret", + &body, + &context, + &statement_costs, + ); +} diff --git a/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/tests.rs b/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/tests.rs index 7770c6d45b5..39d9b667449 100644 --- a/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/statement_placement/postgres/tests.rs @@ -32,9 +32,38 @@ use crate::{ PostgresStatementPlacement, StatementPlacement as _, tests::{assert_placement, run_placement}, }, + tests::make_entity_uuid_eq_body, }, }; +/// Postgres placement for `EntityUuid == EntityUuid` comparison. +#[test] +fn eq_opaque_entity_uuid() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = make_entity_uuid_eq_body(&heap, &interner, &env); + + let context = MirContext { + heap: &heap, + env: &env, + interner: &interner, + diagnostics: DiagnosticIssues::new(), + }; + + let mut placement = PostgresStatementPlacement::new_in(Global); + let (body, statement_costs) = run_placement(&context, &mut placement, body); + + assert_placement( + "eq_opaque_entity_uuid", + "postgres", + &body, + &context, + &statement_costs, + ); +} + /// Arithmetic and comparison operations work. /// /// Tests that `Binary` and `Unary` `RValue`s are supported when operands are constants diff --git a/libs/@local/hashql/mir/src/pass/execution/terminator_placement/mod.rs b/libs/@local/hashql/mir/src/pass/execution/terminator_placement/mod.rs index f9903d47b10..b0d41247077 100644 --- a/libs/@local/hashql/mir/src/pass/execution/terminator_placement/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/terminator_placement/mod.rs @@ -19,13 +19,13 @@ //! //! | Transition | Allowed? | Cost | //! |------------|----------|------| -//! | Same backend (A → A) | Always | 0 | -//! | Any → Interpreter | Always | Transfer cost | -//! | Other → Postgres | Never | — | +//! | Same backend (A -> A) | Always | 0 | +//! | Any -> Interpreter | Always | Transfer cost | +//! | Other -> Postgres | Never | — | //! | Any Postgres in loop | Never | — | -//! | `GraphRead` edge | Interpreter → Interpreter only | 0 | +//! | `GraphRead` edge | Interpreter -> Interpreter only | 0 | //! | `Goto` edge | Any supported transition | Transfer cost | -//! | `SwitchInt` edge | Same-backend or → Interpreter only | Transfer cost | +//! | `SwitchInt` edge | Same-backend or -> Interpreter only | Transfer cost | //! //! Transfer cost is computed from the estimated size of live locals that must cross the edge. //! @@ -34,7 +34,7 @@ use alloc::alloc::Global; use core::{ alloc::Allocator, - ops::{Index, IndexMut}, + ops::{AddAssign, Index, IndexMut}, }; use hashql_core::{ @@ -63,6 +63,7 @@ use crate::{ local::Local, terminator::TerminatorKind, }, + macros::forward_ref_op_assign, pass::analysis::{ dataflow::{ TraversalLivenessAnalysis, @@ -82,7 +83,7 @@ mod tests; /// /// # Invariants /// -/// - Same-backend transitions (`A → A`) always have cost 0, enforced by [`insert`](Self::insert) +/// - Same-backend transitions (`A -> A`) always have cost 0, enforced by [`insert`](Self::insert) #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub(crate) struct TransMatrix { matrix: [Option; TargetId::VARIANT_COUNT * TargetId::VARIANT_COUNT], @@ -144,7 +145,7 @@ impl TransMatrix { /// Removes all incoming transitions to `target` from other backends. /// - /// Self-loops (`target` → `target`) are preserved. + /// Self-loops (`target` -> `target`) are preserved. #[inline] pub(crate) fn remove_incoming(&mut self, target: TargetId) { for source in TargetId::all() { @@ -219,6 +220,20 @@ impl IndexMut<(TargetId, TargetId)> for TransMatrix { } } +impl AddAssign for TransMatrix { + /// Element-wise saturating addition. Only entries where both matrices have `Some` are added; + /// `None` entries in either matrix are left unchanged. + fn add_assign(&mut self, rhs: Self) { + for (entry, overhead) in self.matrix.iter_mut().zip(rhs.matrix) { + if let (Some(cost), Some(overhead)) = (entry, overhead) { + *cost = cost.saturating_add(overhead); + } + } + } +} + +forward_ref_op_assign!(impl AddAssign::add_assign for TransMatrix); + /// Collection of [`TransMatrix`] entries for all terminator edges in a body. /// /// Indexed by [`BasicBlockId`] via [`of`](Self::of), returning a slice of matrices corresponding @@ -301,6 +316,27 @@ impl Metadata for ComponentSizeMetadata { fn merge_reachable(&mut self, _: &mut Self::Annotation, _: &Self::Annotation) {} } +/// Fixed overhead for switching between different execution backends, independent of how much +/// data crosses the edge. +fn backend_switch_cost() -> TransMatrix { + let mut matrix = TransMatrix::new(); + + // Postgres -> Interpreter: continuation ROW + block id + locals/values arrays + interpreter + // resume. This is the heaviest switch path. + matrix.insert(TargetId::Postgres, TargetId::Interpreter, cost!(8)); + + // Postgres -> Embedding: via interpreter (P->I + I->E = 8+4). + matrix.insert(TargetId::Postgres, TargetId::Embedding, cost!(12)); + + // Interpreter -> Embedding: serialize embedding request. + matrix.insert(TargetId::Interpreter, TargetId::Embedding, cost!(4)); + + // Embedding -> Interpreter: deserialize embedding result. + matrix.insert(TargetId::Embedding, TargetId::Interpreter, cost!(4)); + + matrix +} + /// Parameters for populating a single edge's [`TransMatrix`]. struct PopulateEdgeMatrix { /// Backends the source block can execute on. @@ -310,6 +346,8 @@ struct PopulateEdgeMatrix { /// Cost of transferring live data across this edge. transfer_cost: Cost, + /// Per-pair fixed overhead for switching backends. + switch_cost: TransMatrix, /// Whether this edge is part of a loop (disables Postgres transitions). is_in_loop: bool, } @@ -321,6 +359,7 @@ impl PopulateEdgeMatrix { self.add_interpreter_fallback(matrix); self.add_terminator_specific_transitions(matrix, terminator); self.apply_postgres_restrictions(matrix); + *matrix += self.switch_cost; } /// Adds zero-cost transitions for staying on the same backend. @@ -472,7 +511,7 @@ impl TerminatorPlacement { /// Computes transition costs for all terminator edges in `body`. /// - /// For each edge, determines which (source → destination) backend transitions are valid and + /// For each edge, determines which (source -> destination) backend transitions are valid and /// their associated costs. The `targets` slice provides the set of backends each block can /// execute on (from statement placement), and `footprint` provides size estimates for /// computing transfer costs. @@ -489,6 +528,7 @@ impl TerminatorPlacement { ) -> TerminatorCostVec { let live_in = self.compute_liveness(body, vertex); let scc = self.compute_scc(body); + let switch_cost = backend_switch_cost(); let mut output = TerminatorCostVec::new(&body.basic_blocks, alloc); let mut required_locals = DenseBitSet::new_empty(body.local_decls.len()); @@ -514,6 +554,7 @@ impl TerminatorPlacement { source_targets: block_targets, target_targets: successor_targets, transfer_cost, + switch_cost, is_in_loop, } .populate(&mut matrices[edge_index], &block.terminator.kind); diff --git a/libs/@local/hashql/mir/src/pass/execution/terminator_placement/tests.rs b/libs/@local/hashql/mir/src/pass/execution/terminator_placement/tests.rs index 0b18f2d16d7..d76a0c5058c 100644 --- a/libs/@local/hashql/mir/src/pass/execution/terminator_placement/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/terminator_placement/tests.rs @@ -217,9 +217,10 @@ fn goto_allows_cross_backend_non_postgres() { ); let matrix = costs.of(BasicBlockId::new(0))[0]; + // data transfer (1) + backend switch I->E (4) = 5 assert_eq!( matrix.get(TargetId::Interpreter, TargetId::Embedding), - Some(cost!(1)) + Some(cost!(5)) ); } @@ -263,9 +264,10 @@ fn switchint_blocks_cross_backend() { let matrix = costs.of(BasicBlockId::new(0))[0]; assert_eq!(matrix.get(TargetId::Interpreter, TargetId::Embedding), None); + // data transfer (1) + backend switch E->I (4) = 5 assert_eq!( matrix.get(TargetId::Embedding, TargetId::Interpreter), - Some(cost!(1)) + Some(cost!(5)) ); } @@ -552,9 +554,10 @@ fn transfer_cost_counts_live_and_params() { ); let matrix = costs.of(BasicBlockId::new(0))[1]; + // data transfer (2) + backend switch P->I (8) = 10 assert_eq!( matrix.get(TargetId::Postgres, TargetId::Interpreter), - Some(cost!(2)) + Some(cost!(10)) ); } @@ -647,11 +650,11 @@ fn transfer_cost_from_live_locals() { build_targets(&body, &targets), ); - // local_cost: `live` scalar = 1 + // data transfer (1) + backend switch P->I (8) = 9 let matrix = costs.of(BasicBlockId::new(0))[0]; assert_eq!( matrix.get(TargetId::Postgres, TargetId::Interpreter), - Some(cost!(1)) + Some(cost!(9)) ); } diff --git a/libs/@local/hashql/mir/src/pass/execution/tests.rs b/libs/@local/hashql/mir/src/pass/execution/tests.rs index 998891df660..eec7dc3f433 100644 --- a/libs/@local/hashql/mir/src/pass/execution/tests.rs +++ b/libs/@local/hashql/mir/src/pass/execution/tests.rs @@ -6,16 +6,19 @@ use core::fmt::Write as _; use std::path::PathBuf; use hashql_core::{ + graph::DirectedGraph as _, heap::{Heap, Scratch}, + module::{ModuleRegistry, Universe}, symbol::sym, - r#type::environment::Environment, + r#type::{TypeId, environment::Environment}, + value::Primitive, }; use hashql_diagnostics::DiagnosticIssues; use insta::{Settings, assert_snapshot}; -use super::island::Island; +use super::{ExecutionAnalysisResidual, IslandGraph}; use crate::{ - body::{Body, basic_block::BasicBlockVec}, + body::{Body, basic_block::BasicBlockVec, constant::Constant, operand::Operand}, builder::body, context::MirContext, def::{DefId, DefIdSlice}, @@ -23,19 +26,123 @@ use crate::{ pass::{ GlobalAnalysisPass as _, analysis::size_estimation::SizeEstimationAnalysis, - execution::{ExecutionAnalysis, island::IslandVec, target::TargetId}, + execution::{ExecutionAnalysis, target::TargetId}, }, }; +/// Looks up a type from the stdlib module registry by path segments. +fn lookup_stdlib_type<'heap>(heap: &'heap Heap, env: &Environment<'heap>, path: &[&str]) -> TypeId { + let registry = ModuleRegistry::new(env); + let item = registry + .lookup(path.iter().map(|s| heap.intern_symbol(s)), Universe::Type) + .unwrap_or_else(|| panic!("type {path:?} should exist in stdlib")); + + #[expect(clippy::wildcard_enum_match_arm)] + match item.kind { + hashql_core::module::item::ItemKind::Type(typedef) => typedef.id, + other => panic!("expected type, got {other:?}"), + } +} + +/// Builds a MIR filter body that compares `vertex.id.entity_id.entity_uuid` against a constant +/// `EntityUuid(Uuid("e2851dbb-..."))`, using real stdlib types. +/// +/// Replicates the post-inline MIR shape: +/// ```text +/// %3 = opaque(Uuid, "e2851dbb-...") +/// %4 = opaque(EntityUuid, %3) +/// %2 = %1.id.entity_id.entity_uuid == %4 +/// ``` +pub(super) fn make_entity_uuid_eq_body<'heap>( + heap: &'heap Heap, + interner: &Interner<'heap>, + env: &Environment<'heap>, +) -> Body<'heap> { + let entity_type_id = lookup_stdlib_type( + heap, + env, + &["graph", "types", "knowledge", "entity", "Entity"], + ); + let entity_uuid_type_id = lookup_stdlib_type( + heap, + env, + &["graph", "types", "knowledge", "entity", "EntityUuid"], + ); + let uuid_type_id = lookup_stdlib_type(heap, env, &["core", "uuid", "Uuid"]); + + let opaque_symbol = |ty| { + env.r#type(ty) + .kind + .opaque() + .expect("type should be opaque") + .name + }; + + let entity_uuid_type_symbol = opaque_symbol(entity_uuid_type_id); + let uuid_type_symbol = opaque_symbol(uuid_type_id); + + let const_uuid = Operand::Constant(Constant::Primitive(Primitive::String( + hashql_core::value::String::new( + env.heap + .intern_symbol("e2851dbb-7376-4959-9bca-f72cafc4448f"), + ), + ))); + + body!(interner, env; [graph::read::filter]@0/2 -> Bool { + decl env: (), + vertex: (|_types: &_| entity_type_id), + result: Bool, + uuid_val: (|_types: &_| uuid_type_id), + entity_uuid_val: (|_types: &_| entity_uuid_type_id); + @proj vertex_id = vertex.id: ?, entity_id = vertex_id.entity_id: ?, vertex_uuid = entity_id.entity_uuid: (|_types: &_| entity_uuid_type_id); + + bb0() { + uuid_val = opaque uuid_type_symbol, const_uuid; + entity_uuid_val = opaque entity_uuid_type_symbol, uuid_val; + result = bin.== vertex_uuid entity_uuid_val; + return result; + } + }) +} + /// Formats the per-block target assignment and island structure for snapshot comparison. #[track_caller] fn assert_execution<'heap>( name: &'static str, + body: &Body<'heap>, + context: &MirContext<'_, 'heap>, assignment: &BasicBlockVec, - islands: &IslandVec, + islands: &IslandGraph<&'heap Heap>, ) { - let mut output = String::new(); + use hashql_core::{ + pretty::Formatter, + r#type::{TypeFormatter, TypeFormatterOptions}, + }; + use crate::pretty::{TextFormatAnnotations, TextFormatOptions}; + + struct NoAnnotations; + impl TextFormatAnnotations for NoAnnotations {} + + let formatter = Formatter::new(context.heap); + let type_formatter = TypeFormatter::new(&formatter, context.env, TypeFormatterOptions::terse()); + + let mut text_format = TextFormatOptions { + writer: Vec::::new(), + indent: 4, + sources: (), + types: type_formatter, + annotations: NoAnnotations, + } + .build(); + + text_format.format_body(body).expect("formatting failed"); + + let mut output = String::from_utf8_lossy(&text_format.writer).into_owned(); + + writeln!(output).expect("infallible"); + writeln!(output, "---").expect("infallible"); + writeln!(output).expect("infallible"); writeln!(output, "Assignment:").expect("infallible"); for (block, target) in assignment.iter_enumerated() { writeln!(output, " {block}: {target}").expect("infallible"); @@ -45,8 +152,8 @@ fn assert_execution<'heap>( writeln!(output, "Islands:").expect("infallible"); #[expect(clippy::use_debug)] - for (island_id, island) in islands.iter_enumerated() { - let blocks: Vec<_> = island.iter().collect(); + for (island_id, island) in islands.iter_nodes() { + let blocks: Vec<_> = island.members().collect(); writeln!( output, @@ -56,6 +163,18 @@ fn assert_execution<'heap>( .expect("infallible"); } + #[expect(clippy::use_debug)] + for edge in islands.iter_edges() { + writeln!( + output, + " bb{} -> bb{}: {:?}", + edge.source(), + edge.target(), + edge.data + ) + .expect("infallible"); + } + let dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")); let mut settings = Settings::clone_current(); settings.set_snapshot_path(dir.join("tests/ui/pass/execution")); @@ -72,7 +191,7 @@ fn run_execution<'heap>( body: &mut Body<'heap>, ) -> ( BasicBlockVec, - IslandVec, + IslandGraph<&'heap Heap>, ) { let mut size_analysis = SizeEstimationAnalysis::new_in(Global); size_analysis.run(context, DefIdSlice::from_raw(core::slice::from_ref(body))); @@ -84,7 +203,19 @@ fn run_execution<'heap>( scratch: &mut scratch, }; - analysis.run(context, body) + let heap = context.heap; + let ExecutionAnalysisResidual { + assignment, + islands, + } = analysis.run_in(context, body, heap); + + assert!( + context.diagnostics.is_empty(), + "execution analysis produced diagnostics: {:?}", + context.diagnostics, + ); + + (assignment, islands) } /// Closures and function calls force the interpreter. @@ -124,7 +255,13 @@ fn closure_forces_interpreter() { let (assignment, islands) = run_execution(&mut context, &mut body); - assert_execution("closure_forces_interpreter", &assignment, &islands); + assert_execution( + "closure_forces_interpreter", + &body, + &context, + &assignment, + &islands, + ); } /// Mixing a Postgres projection with `Apply` splits the block across targets. @@ -165,7 +302,13 @@ fn projection_and_apply_splits() { let (assignment, islands) = run_execution(&mut context, &mut body); - assert_execution("projection_and_apply_splits", &assignment, &islands); + assert_execution( + "projection_and_apply_splits", + &body, + &context, + &assignment, + &islands, + ); } /// Three targets: Postgres projection, Embedding projection, and `Apply` on interpreter. @@ -211,6 +354,37 @@ fn mixed_postgres_embedding_interpreter() { assert_execution( "mixed_postgres_embedding_interpreter", + &body, + &context, + &assignment, + &islands, + ); +} + +/// `EntityUuid` equality with real stdlib types through the full execution pipeline. +/// +/// Reproduces the placement failure from entity-uuid-equality compiletest. +#[test] +fn entity_uuid_equality() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let mut body = make_entity_uuid_eq_body(&heap, &interner, &env); + + let mut context = MirContext { + heap: &heap, + env: &env, + interner: &interner, + diagnostics: DiagnosticIssues::new(), + }; + + let (assignment, islands) = run_execution(&mut context, &mut body); + + assert_execution( + "entity_uuid_equality", + &body, + &context, &assignment, &islands, ); diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs index 575f9c91c96..2e9b2f77646 100644 --- a/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs @@ -5,7 +5,7 @@ use hashql_core::{ Id, bit_vec::{BitRelations as _, FiniteBitSet}, }, - symbol::{ConstantSymbol, sym}, + symbol::{ConstantSymbol, Symbol, sym}, }; use super::{ @@ -194,6 +194,41 @@ impl EntityPath { resolve(projections) } + /// Returns a unique symbol identifying this path variant. + /// + /// Used as column aliases in SQL generation so the interpreter can locate + /// result columns by name. + #[must_use] + pub const fn as_symbol(self) -> Symbol<'static> { + match self { + Self::Properties => sym::properties, + Self::Vectors => sym::vectors, + Self::RecordId => sym::record_id, + Self::EntityId => sym::entity_id, + Self::WebId => sym::web_id, + Self::EntityUuid => sym::entity_uuid, + Self::DraftId => sym::draft_id, + Self::EditionId => sym::edition_id, + Self::TemporalVersioning => sym::temporal_versioning, + Self::DecisionTime => sym::decision_time, + Self::TransactionTime => sym::transaction_time, + Self::EntityTypeIds => sym::entity_type_ids, + Self::Archived => sym::archived, + Self::Confidence => sym::confidence, + Self::ProvenanceInferred => sym::provenance_inferred, + Self::ProvenanceEdition => sym::provenance_edition, + Self::PropertyMetadata => sym::property_metadata, + Self::LeftEntityWebId => sym::left_entity_web_id, + Self::LeftEntityUuid => sym::left_entity_uuid, + Self::RightEntityWebId => sym::right_entity_web_id, + Self::RightEntityUuid => sym::right_entity_uuid, + Self::LeftEntityConfidence => sym::left_entity_confidence, + Self::RightEntityConfidence => sym::right_entity_confidence, + Self::LeftEntityProvenance => sym::left_entity_provenance, + Self::RightEntityProvenance => sym::right_entity_provenance, + } + } + /// Returns the set of execution targets that natively serve this path. pub(crate) const fn origin(self) -> TargetBitSet { let mut set = TargetBitSet::new_empty(TargetId::VARIANT_COUNT_U32); diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs index 4b89cccb71e..f72b02e0e75 100644 --- a/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs @@ -19,6 +19,7 @@ mod analysis; mod tests; pub(crate) use analysis::{TraversalAnalysisVisitor, TraversalResult}; +use hashql_core::{id::IdArray, symbol::Symbol}; pub use self::entity::{EntityPath, EntityPathBitSet}; pub(crate) use self::{access::Access, entity::TransferCostConfig}; @@ -148,6 +149,14 @@ impl TraversalPathBitSet { pub fn iter(&self) -> impl ExactSizeIterator { self.into_iter() } + + #[must_use] + #[inline] + pub const fn vertex(self) -> VertexType { + match self { + Self::Entity(_) => VertexType::Entity, + } + } } impl IntoIterator for &TraversalPathBitSet { @@ -220,6 +229,18 @@ pub enum TraversalPath { } impl TraversalPath { + /// Returns a unique symbol identifying this path variant. + /// + /// Used as column aliases in SQL generation so the interpreter can locate + /// result columns by name. + #[inline] + #[must_use] + pub const fn as_symbol(self) -> Symbol<'static> { + match self { + Self::Entity(path) => path.as_symbol(), + } + } + /// Returns the set of execution targets that natively serve this path. #[inline] #[must_use] @@ -237,3 +258,100 @@ impl TraversalPath { } } } + +/// Traversal path bitsets for all vertex types. +/// +/// Maps each [`VertexType`] to its [`TraversalPathBitSet`], providing a unified view of path +/// accesses across all vertex types in a query. Where [`TraversalPathBitSet`] tracks paths for a +/// single vertex type, the bitmap tracks paths for all of them. +/// +/// Lattice operations are pointwise via [`TraversalMapLattice`]: bottom is all-empty, top has +/// every slot at its [`TraversalPathBitSet`] top, and join unions each slot independently. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub struct TraversalPathBitMap( + IdArray, +); + +impl TraversalPathBitMap { + const BOTTOM: Self = Self(IdArray::from_raw([TraversalPathBitSet::empty( + VertexType::Entity, + )])); + const TOP: Self = { + let mut entity = TraversalPathBitSet::empty(VertexType::Entity); + entity.insert_all(); + + Self(IdArray::from_raw([entity])) + }; + + /// Joins a [`TraversalPathBitSet`] into the slot for its vertex type. + pub fn insert(&mut self, bitset: TraversalPathBitSet) { + let vertex = bitset.vertex(); + let lattice = TraversalLattice::new(vertex); + lattice.join(&mut self.0[vertex], &bitset); + } +} + +impl From for TraversalPathBitMap { + fn from(value: TraversalPathBitSet) -> Self { + let mut this = TraversalMapLattice.bottom(); + this[value.vertex()] = value; + this + } +} + +impl core::ops::Index for TraversalPathBitMap { + type Output = TraversalPathBitSet; + + #[inline] + fn index(&self, index: VertexType) -> &Self::Output { + &self.0[index] + } +} + +impl core::ops::IndexMut for TraversalPathBitMap { + #[inline] + fn index_mut(&mut self, index: VertexType) -> &mut Self::Output { + &mut self.0[index] + } +} + +/// Pointwise lattice over [`TraversalPathBitMap`]. +/// +/// Delegates each [`VertexType`] slot to its [`TraversalLattice`], so bottom is all-empty, +/// top has every slot at its [`TraversalPathBitSet`] top, and join unions each slot +/// independently. +#[derive(Debug, Copy, Clone)] +pub struct TraversalMapLattice; + +impl HasBottom for TraversalMapLattice { + fn bottom(&self) -> TraversalPathBitMap { + TraversalPathBitMap::BOTTOM + } + + fn is_bottom(&self, value: &TraversalPathBitMap) -> bool { + *value == self.bottom() + } +} + +impl HasTop for TraversalMapLattice { + fn top(&self) -> TraversalPathBitMap { + TraversalPathBitMap::TOP + } + + fn is_top(&self, value: &TraversalPathBitMap) -> bool { + *value == self.top() + } +} + +impl JoinSemiLattice for TraversalMapLattice { + fn join(&self, lhs: &mut TraversalPathBitMap, rhs: &TraversalPathBitMap) -> bool { + let mut changed = false; + + for (vertex, rhs_bitset) in rhs.0.iter_enumerated() { + let lattice = TraversalLattice::new(vertex); + changed |= lattice.join(&mut lhs.0[vertex], rhs_bitset); + } + + changed + } +} diff --git a/libs/@local/hashql/mir/src/pass/execution/vertex.rs b/libs/@local/hashql/mir/src/pass/execution/vertex.rs index b0d8441da76..d4fb98bc887 100644 --- a/libs/@local/hashql/mir/src/pass/execution/vertex.rs +++ b/libs/@local/hashql/mir/src/pass/execution/vertex.rs @@ -1,5 +1,6 @@ use hashql_core::{ debug_panic, + id::Id, symbol::sym, r#type::{ TypeId, @@ -47,7 +48,8 @@ fn peel<'heap>( /// The vertex type of a [`GraphReadFilter`] body's vertex argument. /// /// [`GraphReadFilter`]: crate::body::Source::GraphReadFilter -#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Id)] +#[repr(u8)] pub enum VertexType { Entity, } diff --git a/libs/@local/hashql/mir/src/pretty/text.rs b/libs/@local/hashql/mir/src/pretty/text.rs index 8c41aab20df..96544f722c8 100644 --- a/libs/@local/hashql/mir/src/pretty/text.rs +++ b/libs/@local/hashql/mir/src/pretty/text.rs @@ -80,6 +80,12 @@ pub trait TextFormatAnnotations { where Self: 'this; + /// The type of annotation displayed after basic blocks. + type BasicBlockAnnotation<'this, 'heap>: Display + = ! + where + Self: 'this; + /// Returns an optional annotation for the given statement at `location`. #[expect(unused_variables, reason = "trait definition")] fn annotate_statement<'heap>( @@ -99,10 +105,59 @@ pub trait TextFormatAnnotations { ) -> Option> { None } + + /// Returns an optional annotation for the given basic block. + #[expect(unused_variables, reason = "trait definition")] + fn annotate_basic_block<'heap>( + &self, + id: BasicBlockId, + block: &BasicBlock<'heap>, + ) -> Option> { + None + } } impl TextFormatAnnotations for () {} +impl TextFormatAnnotations for &mut T { + type BasicBlockAnnotation<'this, 'heap> + = T::BasicBlockAnnotation<'this, 'heap> + where + Self: 'this; + type DeclarationAnnotation<'this, 'heap> + = T::DeclarationAnnotation<'this, 'heap> + where + Self: 'this; + type StatementAnnotation<'this, 'heap> + = T::StatementAnnotation<'this, 'heap> + where + Self: 'this; + + fn annotate_statement<'heap>( + &self, + location: Location, + statement: &Statement<'heap>, + ) -> Option> { + (**self).annotate_statement(location, statement) + } + + fn annotate_local_decl<'heap>( + &self, + local: Local, + declaration: &LocalDecl<'heap>, + ) -> Option> { + (**self).annotate_local_decl(local, declaration) + } + + fn annotate_basic_block<'heap>( + &self, + id: BasicBlockId, + block: &BasicBlock<'heap>, + ) -> Option> { + (**self).annotate_basic_block(id, block) + } +} + /// Configuration for constructing a [`TextFormat`] formatter. pub struct TextFormatOptions { /// The writer where formatted text will be written. @@ -168,6 +223,14 @@ impl TextFormat { line_buffer: Vec::new(), } } + + /// Swaps in a new annotation provider and returns the old one. + /// + /// Useful when formatting multiple bodies in sequence where each body needs + /// different annotation context (e.g. per-body execution analysis results). + pub const fn replace_annotations(&mut self, annotations: A) -> A { + core::mem::replace(&mut self.annotations, annotations) + } } impl TextFormat @@ -414,6 +477,14 @@ where write!(self.line_buffer, "{id}(")?; self.csv(block.params.iter().copied())?; write!(self.line_buffer, "): {{")?; + if let Some(annotation) = self.annotations.annotate_basic_block(id, block) { + // We estimate that we never exceed 80 columns, calculate the remaining width, if we + // don't have enough space, we add 4 spaces breathing room. + let remaining_width = 80_usize.checked_sub(self.line_buffer.len()).unwrap_or(4); + self.line_buffer + .resize(self.line_buffer.len() + remaining_width, b' '); + write!(self.line_buffer, "// {annotation}")?; + } self.newline()?; let mut location = Location { diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/closure_forces_interpreter.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/closure_forces_interpreter.snap index c536a9df1d9..2d13b0424f9 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/closure_forces_interpreter.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/closure_forces_interpreter.snap @@ -2,10 +2,27 @@ source: libs/@local/hashql/mir/src/pass/execution/tests.rs expression: output --- +fn {graph::read::filter@4294967040}(%0: (Integer, (Integer) -> Integer), %1: Entity) -> Integer { + let %2: Integer + let %3: (Integer) -> Integer + let %4: (Integer, (Integer) -> Integer) + let %5: (Integer) -> Integer + let %6: Integer + + bb0(): { + %2 = %0.0 + %3 = %0.1 + %4 = (%2, %3) + %5 = closure(({def@42} as FnPtr), %4) + %6 = apply %5 5 + + return %6 + } +} +--- + Assignment: - bb0: postgres - bb1: interpreter + bb0: interpreter Islands: - 0: target=postgres, blocks=[BasicBlockId(0)] - 1: target=interpreter, blocks=[BasicBlockId(1)] + 0: target=interpreter, blocks=[BasicBlockId(0)] diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/entity_uuid_equality.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/entity_uuid_equality.snap new file mode 100644 index 00000000000..2192f800648 --- /dev/null +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/entity_uuid_equality.snap @@ -0,0 +1,28 @@ +--- +source: libs/@local/hashql/mir/src/pass/execution/tests.rs +expression: output +--- +fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Boolean { + let %2: Boolean + let %3: Uuid + let %4: EntityUuid + + bb0(): { + %3 = opaque(::core::uuid::Uuid, "e2851dbb-7376-4959-9bca-f72cafc4448f") + %4 = opaque(::graph::types::knowledge::entity::EntityUuid, %3) + %2 = %1.id.entity_id.entity_uuid == %4 + + return %2 + } +} +--- + +Assignment: + bb0: interpreter + +Islands: + 0: target=interpreter, blocks=[BasicBlockId(0)] + 1: target=postgres, blocks=[] + 2: target=embedding, blocks=[] + bb1 -> bb0: DataFlow + bb2 -> bb0: DataFlow diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/mixed_postgres_embedding_interpreter.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/mixed_postgres_embedding_interpreter.snap index f3a6814cfcb..45e25791186 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/mixed_postgres_embedding_interpreter.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/mixed_postgres_embedding_interpreter.snap @@ -2,12 +2,48 @@ source: libs/@local/hashql/mir/src/pass/execution/tests.rs expression: output --- +fn {graph::read::filter@4294967040}(%0: (Integer,), %1: Entity) -> Integer { + let %2: Boolean + let %3: ? + let %4: Integer + let %5: (Integer,) + let %6: (Boolean) -> Integer + let %7: Integer + + bb0(): { + %2 = %1.metadata.archived + + goto -> bb1() + } + + bb1(): { + %3 = %1.encodings.vectors + + goto -> bb2() + } + + bb2(): { + %4 = %0.0 + %5 = (%4) + %6 = closure(({def@42} as FnPtr), %5) + %7 = apply %6 %2 + + return %7 + } +} +--- + Assignment: - bb0: postgres + bb0: interpreter bb1: embedding bb2: interpreter Islands: - 0: target=postgres, blocks=[BasicBlockId(0)] + 0: target=interpreter, blocks=[BasicBlockId(0)] 1: target=embedding, blocks=[BasicBlockId(1)] 2: target=interpreter, blocks=[BasicBlockId(2)] + 3: target=postgres, blocks=[] + bb0 -> bb1: ControlFlow + bb1 -> bb2: ControlFlow + bb3 -> bb0: DataFlow + bb0 -> bb2: Inherits diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/projection_and_apply_splits.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/projection_and_apply_splits.snap index c536a9df1d9..25b7b46fa59 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/projection_and_apply_splits.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/projection_and_apply_splits.snap @@ -2,6 +2,30 @@ source: libs/@local/hashql/mir/src/pass/execution/tests.rs expression: output --- +fn {graph::read::filter@4294967040}(%0: (Integer,), %1: Entity) -> Integer { + let %2: Boolean + let %3: Integer + let %4: (Integer,) + let %5: (Boolean) -> Integer + let %6: Integer + + bb0(): { + %2 = %1.metadata.archived + %3 = %0.0 + %4 = (%3) + + goto -> bb1() + } + + bb1(): { + %5 = closure(({def@42} as FnPtr), %4) + %6 = apply %5 %2 + + return %6 + } +} +--- + Assignment: bb0: postgres bb1: interpreter @@ -9,3 +33,4 @@ Assignment: Islands: 0: target=postgres, blocks=[BasicBlockId(0)] 1: target=interpreter, blocks=[BasicBlockId(1)] + bb0 -> bb1: ControlFlow diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/eq_opaque_entity_uuid.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/eq_opaque_entity_uuid.snap new file mode 100644 index 00000000000..1e9e2dd7f7a --- /dev/null +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/eq_opaque_entity_uuid.snap @@ -0,0 +1,17 @@ +--- +source: libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs +expression: output +--- +fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Boolean { + let %2: Boolean + let %3: Uuid + let %4: EntityUuid + + bb0(): { + %3 = opaque(::core::uuid::Uuid, "e2851dbb-7376-4959-9bca-f72cafc4448f") // cost: 8 + %4 = opaque(::graph::types::knowledge::entity::EntityUuid, %3) // cost: 8 + %2 = %1.id.entity_id.entity_uuid == %4 // cost: 8 + + return %2 + } +} diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/eq_opaque_entity_uuid.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/eq_opaque_entity_uuid.snap new file mode 100644 index 00000000000..87cdcc5db7c --- /dev/null +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/eq_opaque_entity_uuid.snap @@ -0,0 +1,17 @@ +--- +source: libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs +expression: output +--- +fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Boolean { + let %2: Boolean + let %3: Uuid + let %4: EntityUuid + + bb0(): { + %3 = opaque(::core::uuid::Uuid, "e2851dbb-7376-4959-9bca-f72cafc4448f") // cost: 4 + %4 = opaque(::graph::types::knowledge::entity::EntityUuid, %3) // cost: 4 + %2 = %1.id.entity_id.entity_uuid == %4 + + return %2 + } +} diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/terminator_placement/terminator_placement_snapshot.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/terminator_placement/terminator_placement_snapshot.snap index 023c596cbeb..41125ca3f68 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/terminator_placement/terminator_placement_snapshot.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/terminator_placement/terminator_placement_snapshot.snap @@ -26,7 +26,7 @@ fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Integer { ================ Terminator Edges ================ bb0: - edge[0]:I->I=0P->I=2P->P=0E->I=2 - edge[1]:I->I=0P->I=0E->I=0E->E=0 + edge[0]:I->I=0P->I=10P->P=0E->I=6 + edge[1]:I->I=0P->I=8E->I=4E->E=0 bb1: bb2: