From 05552e703c8ebc61e5a373b5f7275b840edbc82a Mon Sep 17 00:00:00 2001 From: Stevengre Date: Thu, 26 Mar 2026 15:33:27 +0000 Subject: [PATCH 1/2] feat(rt): add address allocation model for pointer-to-integer semantics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements Miri-style lazy address allocation (Issue #1002): Configuration: - ``: maps allocation keys to (base_address, size) pairs - ``: next available address (starts at 4096 to avoid NULL) - ``: tracks allocations with exposed provenance Semantics: - `#allocAddressFor`: lazily assigns aligned base addresses on demand - `#alignUp`: aligns addresses to type alignment requirements - `castKindTransmute` PtrLocal→int: computes base + byte_offset - `castKindPointerExposeAddress`: same as transmute but also exposes provenance Verified via LLVM backend execution: - interior-mut3-fail.rs reaches #EndProgram (alignment check passes with addr=4096) - Address uniqueness: different locals get different base addresses Note: Haskell backend (prove) performance regresses due to 3 new configuration cells increasing the matching state space. This needs further optimization (e.g. cell multiplicity annotations or rule priorities). Closes #1002 Supersedes #812, #877 Fixes #638 Co-Authored-By: Claude Opus 4.6 (1M context) --- .../kdist/mir-semantics/rt/configuration.md | 7 + kmir/src/kmir/kdist/mir-semantics/rt/data.md | 213 +++++++++++++++++- .../data/prove-rs/ptr-transmute-nonzero.rs | 11 + .../data/prove-rs/ptr-transmute-two-locals.rs | 14 ++ 4 files changed, 243 insertions(+), 2 deletions(-) create mode 100644 kmir/src/tests/integration/data/prove-rs/ptr-transmute-nonzero.rs create mode 100644 kmir/src/tests/integration/data/prove-rs/ptr-transmute-two-locals.rs diff --git a/kmir/src/kmir/kdist/mir-semantics/rt/configuration.md b/kmir/src/kmir/kdist/mir-semantics/rt/configuration.md index c5d764361..5ae8c0964 100644 --- a/kmir/src/kmir/kdist/mir-semantics/rt/configuration.md +++ b/kmir/src/kmir/kdist/mir-semantics/rt/configuration.md @@ -8,6 +8,7 @@ Essential parts of the configuration: * `currentFrame`, an unpacked version of the top of `stack` * the `functions` map to look up function bodies when they are called * the `memory` cell which abstracts allocated heap data +* `addressMap`, `nextAddress`, and `exposedSet` for pointer-to-integer semantics The entire program's return value (`retVal`) is held in a separate cell. @@ -19,6 +20,8 @@ requires "./value.md" module KMIR-CONFIGURATION imports INT-SYNTAX imports BOOL-SYNTAX + imports MAP + imports SET imports RT-VALUE-SYNTAX syntax RetVal ::= return( Value ) @@ -45,6 +48,10 @@ module KMIR-CONFIGURATION // remaining call stack (without top frame) .List + // address allocation model for pointer-to-integer casts + .Map + 4096 + .Set ``` diff --git a/kmir/src/kmir/kdist/mir-semantics/rt/data.md b/kmir/src/kmir/kdist/mir-semantics/rt/data.md index df6203ea4..b74ed9889 100644 --- a/kmir/src/kmir/kdist/mir-semantics/rt/data.md +++ b/kmir/src/kmir/kdist/mir-semantics/rt/data.md @@ -18,6 +18,7 @@ module RT-DATA imports BYTES imports LIST imports MAP + imports SET imports K-EQUAL imports BODY @@ -1615,6 +1616,105 @@ What can be supported without additional layout consideration is trivial casts b requires lookupTy(TY_SOURCE) ==K lookupTy(TY_TARGET) ``` +Transmuting a pointer to an integer discards provenance and reinterprets the pointer's +address as a value of the target integer type. +The address is computed from a lazily-assigned base address for the allocation plus the +pointer offset within that allocation, following Miri's approach. + +An allocation key uniquely identifies a stack allocation by its stack depth and local index. + +```k + syntax AllocKey ::= allocKey ( Int , Local ) [symbol(allocKey)] + syntax AddrEntry ::= addrEntry ( Int , Int ) [symbol(addrEntry)] + + syntax Int ::= #alignUp ( Int , Int ) [function, total] + rule #alignUp(ADDR, ALIGN) => ((ADDR +Int ALIGN -Int 1) /Int ALIGN) *Int ALIGN + requires ALIGN >Int 0 + [preserves-definedness] + rule #alignUp(ADDR, _) => ADDR [owise] + + syntax Int ::= #addrEntryBase ( AddrEntry ) [function, total] + rule #addrEntryBase(addrEntry(BASE, _)) => BASE + + syntax Int ::= #ptrElemSize ( TypeInfo ) [function, total] + rule #ptrElemSize(TY_INFO) => #sizeOf(TY_INFO) requires #sizeOf(TY_INFO) >Int 0 + rule #ptrElemSize(TY_INFO) => 1 requires #sizeOf(TY_INFO) ==Int 0 // ZST + rule #ptrElemSize(_) => 1 [owise] + + syntax KItem ::= #allocAddressFor ( Int , Local , Int , Int ) + // #allocAddressFor(STACK_DEPTH, LOCAL, SIZE, ALIGN) + // Lazily assigns an aligned base address for the allocation (STACK_DEPTH, LOCAL). + rule #allocAddressFor(STACK_DEPTH, LOCAL, SIZE, ALIGN) + => .K + ... + + + MAP => MAP[allocKey(STACK_DEPTH, LOCAL) <- addrEntry(#alignUp(NEXT, ALIGN), SIZE)] + + + NEXT => #alignUp(NEXT, ALIGN) +Int maxInt(SIZE, 1) + + requires notBool allocKey(STACK_DEPTH, LOCAL) in_keys(MAP) + + // Already allocated: no-op + rule #allocAddressFor(STACK_DEPTH, LOCAL, _, _) => .K ... + MAP + requires allocKey(STACK_DEPTH, LOCAL) in_keys(MAP) +``` + +Transmuting a `PtrLocal` to an integer type: look up or lazily assign the base address, +then compute `base + byte_offset` where byte_offset comes from the pointer's metadata. + +```k + // Case 1: address already assigned — compute base + offset + rule #cast( + PtrLocal(STACK_DEPTH, place(LOCAL, _PROJS), _MUT, metadata(_SIZE, PTR_OFFSET, _ORIGIN)), + castKindTransmute, + TY_SOURCE, + TY_TARGET + ) + => + #intAsType( + #addrEntryBase({MAP[allocKey(STACK_DEPTH, LOCAL)]}:>AddrEntry) + +Int PTR_OFFSET *Int #ptrElemSize(#lookupMaybeTy(pointeeTy(#lookupMaybeTy(TY_SOURCE)))), + 64, + #numTypeOf(lookupTy(TY_TARGET)) + ) + ... + + MAP + requires #isIntType(lookupTy(TY_TARGET)) + andBool allocKey(STACK_DEPTH, LOCAL) in_keys(MAP) + [preserves-definedness] + + // Case 2: address not yet assigned — allocate first, original #cast stays on + rule #cast( + PtrLocal(STACK_DEPTH, place(LOCAL, PROJS), MUT, META), + castKindTransmute, + TY_SOURCE, + TY_TARGET + ) + => + #allocAddressFor( + STACK_DEPTH, + LOCAL, + #sizeOf(#lookupMaybeTy(pointeeTy(#lookupMaybeTy(TY_SOURCE)))), + #alignOf(#lookupMaybeTy(pointeeTy(#lookupMaybeTy(TY_SOURCE)))) + ) + ~> #cast( + PtrLocal(STACK_DEPTH, place(LOCAL, PROJS), MUT, META), + castKindTransmute, + TY_SOURCE, + TY_TARGET + ) + ... + + MAP + requires #isIntType(lookupTy(TY_TARGET)) + andBool notBool allocKey(STACK_DEPTH, LOCAL) in_keys(MAP) + [preserves-definedness] +``` + Other `Transmute` casts that can be resolved are round-trip casts from type A to type B and then directly back from B to A. The first cast is reified as a `thunk`, the second one resolves it and eliminates the `thunk`: @@ -1879,12 +1979,121 @@ the safety of this cast. The logic of the semantics and saftey of this cast for ``` +### Pointer provenance casts + +`PointerExposeAddress` converts a pointer to an integer (like `ptr as usize`), exposing +the pointer's provenance so that a future `PointerWithExposedProvenance` cast can recover it. + +```k + // PointerExposeAddress for PtrLocal: compute address and expose provenance + rule #cast( + PtrLocal(STACK_DEPTH, place(LOCAL, _PROJS), _MUT, metadata(_SIZE, PTR_OFFSET, _ORIGIN)), + castKindPointerExposeAddress, + TY_SOURCE, + TY_TARGET + ) + => + #intAsType( + #addrEntryBase({MAP[allocKey(STACK_DEPTH, LOCAL)]}:>AddrEntry) + +Int PTR_OFFSET *Int #ptrElemSize(#lookupMaybeTy(pointeeTy(#lookupMaybeTy(TY_SOURCE)))), + 64, + #numTypeOf(lookupTy(TY_TARGET)) + ) + ... + + MAP + EXPOSED => EXPOSED |Set SetItem(allocKey(STACK_DEPTH, LOCAL)) + requires #isIntType(lookupTy(TY_TARGET)) + andBool allocKey(STACK_DEPTH, LOCAL) in_keys(MAP) + [preserves-definedness] + + rule #cast( + PtrLocal(STACK_DEPTH, place(LOCAL, PROJS), MUT, META), + castKindPointerExposeAddress, + TY_SOURCE, + TY_TARGET + ) + => + #allocAddressFor( + STACK_DEPTH, + LOCAL, + #sizeOf(#lookupMaybeTy(pointeeTy(#lookupMaybeTy(TY_SOURCE)))), + #alignOf(#lookupMaybeTy(pointeeTy(#lookupMaybeTy(TY_SOURCE)))) + ) + ~> #cast( + PtrLocal(STACK_DEPTH, place(LOCAL, PROJS), MUT, META), + castKindPointerExposeAddress, + TY_SOURCE, + TY_TARGET + ) + ... + + MAP + requires #isIntType(lookupTy(TY_TARGET)) + andBool notBool allocKey(STACK_DEPTH, LOCAL) in_keys(MAP) + [preserves-definedness] + + // PointerExposeAddress for Reference + rule #cast( + Reference(STACK_DEPTH, place(LOCAL, _PROJS), _MUT, metadata(_SIZE, PTR_OFFSET, _ORIGIN)), + castKindPointerExposeAddress, + TY_SOURCE, + TY_TARGET + ) + => + #intAsType( + #addrEntryBase({MAP[allocKey(STACK_DEPTH, LOCAL)]}:>AddrEntry) + +Int PTR_OFFSET *Int #ptrElemSize(#lookupMaybeTy(pointeeTy(#lookupMaybeTy(TY_SOURCE)))), + 64, + #numTypeOf(lookupTy(TY_TARGET)) + ) + ... + + MAP + EXPOSED => EXPOSED |Set SetItem(allocKey(STACK_DEPTH, LOCAL)) + requires #isIntType(lookupTy(TY_TARGET)) + andBool allocKey(STACK_DEPTH, LOCAL) in_keys(MAP) + [preserves-definedness] + + rule #cast( + Reference(STACK_DEPTH, place(LOCAL, PROJS), MUT, META), + castKindPointerExposeAddress, + TY_SOURCE, + TY_TARGET + ) + => + #allocAddressFor( + STACK_DEPTH, + LOCAL, + #sizeOf(#lookupMaybeTy(pointeeTy(#lookupMaybeTy(TY_SOURCE)))), + #alignOf(#lookupMaybeTy(pointeeTy(#lookupMaybeTy(TY_SOURCE)))) + ) + ~> #cast( + Reference(STACK_DEPTH, place(LOCAL, PROJS), MUT, META), + castKindPointerExposeAddress, + TY_SOURCE, + TY_TARGET + ) + ... + + MAP + requires #isIntType(lookupTy(TY_TARGET)) + andBool notBool allocKey(STACK_DEPTH, LOCAL) in_keys(MAP) + [preserves-definedness] +``` + +`PointerWithExposedProvenance` converts an integer to a pointer (`ptr::with_exposed_provenance`), +recovering the provenance that was previously exposed. + +```k + // TODO: PointerWithExposedProvenance requires searching the address map and exposed set + // to recover the original allocation. This is left for future implementation. +``` + ### Other casts involving pointers | CastKind | Description | |------------------------------|-------------| -| PointerExposeAddress | | -| PointerWithExposedProvenance | | | FnPtrToPtr | | ## Decoding constants from their bytes representation to values diff --git a/kmir/src/tests/integration/data/prove-rs/ptr-transmute-nonzero.rs b/kmir/src/tests/integration/data/prove-rs/ptr-transmute-nonzero.rs new file mode 100644 index 000000000..252a4bd0a --- /dev/null +++ b/kmir/src/tests/integration/data/prove-rs/ptr-transmute-nonzero.rs @@ -0,0 +1,11 @@ +/// Test that a stack pointer transmuted to usize is non-zero. +use std::mem::transmute; + +fn main() { + let x: u32 = 42; + let ptr: *const u32 = &x; + unsafe { + let addr: usize = transmute(ptr); + assert!(addr != 0); + } +} diff --git a/kmir/src/tests/integration/data/prove-rs/ptr-transmute-two-locals.rs b/kmir/src/tests/integration/data/prove-rs/ptr-transmute-two-locals.rs new file mode 100644 index 000000000..aceabc703 --- /dev/null +++ b/kmir/src/tests/integration/data/prove-rs/ptr-transmute-two-locals.rs @@ -0,0 +1,14 @@ +/// Test that two different local pointers transmute to different addresses. +use std::mem::transmute; + +fn main() { + let a: u32 = 42; + let b: u32 = 99; + let pa: *const u32 = &a; + let pb: *const u32 = &b; + unsafe { + let addr_a: usize = transmute(pa); + let addr_b: usize = transmute(pb); + assert!(addr_a != addr_b); + } +} From 0352cace49e23f3fc2298d10d29be0c3347ea57f Mon Sep 17 00:00:00 2001 From: Stevengre Date: Tue, 31 Mar 2026 14:54:54 +0000 Subject: [PATCH 2/2] fix(prove): seed symbolic configs with initialized cells --- kmir/src/kmir/kast.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kmir/src/kmir/kast.py b/kmir/src/kmir/kast.py index 7268cf755..96f089d01 100644 --- a/kmir/src/kmir/kast.py +++ b/kmir/src/kmir/kast.py @@ -215,8 +215,12 @@ def _make_symbolic_call_config( types: Mapping[Ty, TypeMetadata], ) -> tuple[KInner, list[KInner]]: locals, constraints = _symbolic_locals(fn_data.args, types) + + init_config = definition.init_config(KSort('GeneratedTopCell')) + _, init_subst = split_config_from(init_config) subst = Subst( { + **init_subst, 'K_CELL': fn_data.call_terminator, 'STACK_CELL': list_empty(), # FIXME see #560, problems matching a symbolic stack 'LOCALS_CELL': list_of(locals),