diff --git a/Cargo.lock b/Cargo.lock index 9a4dee8c..e3f50d30 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1328,8 +1328,8 @@ dependencies = [ "libnet", "mg-common", "omicron-common", - "opte-ioctl 0.1.0 (git+https://github.com/oxidecomputer/opte?rev=04c3d5d37d7b919cbf01019d2a17b93ff2df2eb8)", - "oxide-vpc 0.1.0 (git+https://github.com/oxidecomputer/opte?rev=04c3d5d37d7b919cbf01019d2a17b93ff2df2eb8)", + "opte-ioctl 0.1.0 (git+https://github.com/oxidecomputer/opte?rev=c570ac2126dbbebbd8e98e73b580c5be6b7e460e)", + "oxide-vpc 0.1.0 (git+https://github.com/oxidecomputer/opte?rev=c570ac2126dbbebbd8e98e73b580c5be6b7e460e)", "oximeter", "oximeter-producer", "oxnet", @@ -3001,7 +3001,7 @@ dependencies = [ [[package]] name = "illumos-sys-hdrs" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=04c3d5d37d7b919cbf01019d2a17b93ff2df2eb8#04c3d5d37d7b919cbf01019d2a17b93ff2df2eb8" +source = "git+https://github.com/oxidecomputer/opte?rev=c570ac2126dbbebbd8e98e73b580c5be6b7e460e#c570ac2126dbbebbd8e98e73b580c5be6b7e460e" dependencies = [ "bitflags 2.11.0", ] @@ -3437,7 +3437,7 @@ dependencies = [ [[package]] name = "kstat-macro" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=04c3d5d37d7b919cbf01019d2a17b93ff2df2eb8#04c3d5d37d7b919cbf01019d2a17b93ff2df2eb8" +source = "git+https://github.com/oxidecomputer/opte?rev=c570ac2126dbbebbd8e98e73b580c5be6b7e460e#c570ac2126dbbebbd8e98e73b580c5be6b7e460e" dependencies = [ "quote", "syn 2.0.117", @@ -4632,14 +4632,14 @@ dependencies = [ [[package]] name = "opte" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=04c3d5d37d7b919cbf01019d2a17b93ff2df2eb8#04c3d5d37d7b919cbf01019d2a17b93ff2df2eb8" +source = "git+https://github.com/oxidecomputer/opte?rev=c570ac2126dbbebbd8e98e73b580c5be6b7e460e#c570ac2126dbbebbd8e98e73b580c5be6b7e460e" dependencies = [ "bitflags 2.11.0", "dyn-clone", - "illumos-sys-hdrs 0.1.0 (git+https://github.com/oxidecomputer/opte?rev=04c3d5d37d7b919cbf01019d2a17b93ff2df2eb8)", + "illumos-sys-hdrs 0.1.0 (git+https://github.com/oxidecomputer/opte?rev=c570ac2126dbbebbd8e98e73b580c5be6b7e460e)", "ingot", - "kstat-macro 0.1.0 (git+https://github.com/oxidecomputer/opte?rev=04c3d5d37d7b919cbf01019d2a17b93ff2df2eb8)", - "opte-api 0.1.0 (git+https://github.com/oxidecomputer/opte?rev=04c3d5d37d7b919cbf01019d2a17b93ff2df2eb8)", + "kstat-macro 0.1.0 (git+https://github.com/oxidecomputer/opte?rev=c570ac2126dbbebbd8e98e73b580c5be6b7e460e)", + "opte-api 0.1.0 (git+https://github.com/oxidecomputer/opte?rev=c570ac2126dbbebbd8e98e73b580c5be6b7e460e)", "postcard", "ref-cast", "serde", @@ -4670,9 +4670,9 @@ dependencies = [ [[package]] name = "opte-api" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=04c3d5d37d7b919cbf01019d2a17b93ff2df2eb8#04c3d5d37d7b919cbf01019d2a17b93ff2df2eb8" +source = "git+https://github.com/oxidecomputer/opte?rev=c570ac2126dbbebbd8e98e73b580c5be6b7e460e#c570ac2126dbbebbd8e98e73b580c5be6b7e460e" dependencies = [ - "illumos-sys-hdrs 0.1.0 (git+https://github.com/oxidecomputer/opte?rev=04c3d5d37d7b919cbf01019d2a17b93ff2df2eb8)", + "illumos-sys-hdrs 0.1.0 (git+https://github.com/oxidecomputer/opte?rev=c570ac2126dbbebbd8e98e73b580c5be6b7e460e)", "ingot", "ipnetwork", "postcard", @@ -4696,12 +4696,12 @@ dependencies = [ [[package]] name = "opte-ioctl" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=04c3d5d37d7b919cbf01019d2a17b93ff2df2eb8#04c3d5d37d7b919cbf01019d2a17b93ff2df2eb8" +source = "git+https://github.com/oxidecomputer/opte?rev=c570ac2126dbbebbd8e98e73b580c5be6b7e460e#c570ac2126dbbebbd8e98e73b580c5be6b7e460e" dependencies = [ "libc", "libnet", - "opte 0.1.0 (git+https://github.com/oxidecomputer/opte?rev=04c3d5d37d7b919cbf01019d2a17b93ff2df2eb8)", - "oxide-vpc 0.1.0 (git+https://github.com/oxidecomputer/opte?rev=04c3d5d37d7b919cbf01019d2a17b93ff2df2eb8)", + "opte 0.1.0 (git+https://github.com/oxidecomputer/opte?rev=c570ac2126dbbebbd8e98e73b580c5be6b7e460e)", + "oxide-vpc 0.1.0 (git+https://github.com/oxidecomputer/opte?rev=c570ac2126dbbebbd8e98e73b580c5be6b7e460e)", "postcard", "serde", "thiserror 2.0.18", @@ -4742,11 +4742,11 @@ dependencies = [ [[package]] name = "oxide-vpc" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=04c3d5d37d7b919cbf01019d2a17b93ff2df2eb8#04c3d5d37d7b919cbf01019d2a17b93ff2df2eb8" +source = "git+https://github.com/oxidecomputer/opte?rev=c570ac2126dbbebbd8e98e73b580c5be6b7e460e#c570ac2126dbbebbd8e98e73b580c5be6b7e460e" dependencies = [ "cfg-if", - "illumos-sys-hdrs 0.1.0 (git+https://github.com/oxidecomputer/opte?rev=04c3d5d37d7b919cbf01019d2a17b93ff2df2eb8)", - "opte 0.1.0 (git+https://github.com/oxidecomputer/opte?rev=04c3d5d37d7b919cbf01019d2a17b93ff2df2eb8)", + "illumos-sys-hdrs 0.1.0 (git+https://github.com/oxidecomputer/opte?rev=c570ac2126dbbebbd8e98e73b580c5be6b7e460e)", + "opte 0.1.0 (git+https://github.com/oxidecomputer/opte?rev=c570ac2126dbbebbd8e98e73b580c5be6b7e460e)", "serde", "tabwriter", "uuid", @@ -5256,6 +5256,11 @@ dependencies = [ "universal-hash", ] +[[package]] +name = "poptrie" +version = "0.1.0" +source = "git+https://github.com/oxidecomputer/poptrie?branch=main#5bf62f6b889c61e0608d8463ed11da28e130cb34" + [[package]] name = "portable-atomic" version = "1.13.1" @@ -5944,7 +5949,9 @@ dependencies = [ "itertools 0.14.0", "mg-common", "ndp", + "omicron-common", "oxnet", + "poptrie", "proptest", "rdb-types 0.1.0", "schemars 0.8.22", diff --git a/Cargo.toml b/Cargo.toml index efb5a6d2..ba358134 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -126,6 +126,7 @@ oximeter = { git = "https://github.com/oxidecomputer/omicron", branch = "main"} oximeter-producer = { git = "https://github.com/oxidecomputer/omicron", branch = "main"} oxnet = { version = "0.1.4", default-features = false, features = ["schemars", "serde"] } omicron-common = { git = "https://github.com/oxidecomputer/omicron", branch = "main"} +poptrie = { git = "https://github.com/oxidecomputer/poptrie", branch = "main" } gateway-client = { git = "https://github.com/oxidecomputer/omicron", branch = "main" } uuid = { version = "1.21", features = ["serde", "v4"] } smf = { git = "https://github.com/illumos/smf-rs", branch = "main" } @@ -142,11 +143,11 @@ natord = "1.0" [workspace.dependencies.opte-ioctl] git = "https://github.com/oxidecomputer/opte" -rev = "04c3d5d37d7b919cbf01019d2a17b93ff2df2eb8" +rev = "c570ac2126dbbebbd8e98e73b580c5be6b7e460e" [workspace.dependencies.oxide-vpc] git = "https://github.com/oxidecomputer/opte" -rev = "04c3d5d37d7b919cbf01019d2a17b93ff2df2eb8" +rev = "c570ac2126dbbebbd8e98e73b580c5be6b7e460e" [workspace.dependencies.dpd-client] git = "https://github.com/oxidecomputer/dendrite" diff --git a/mg-api/src/lib.rs b/mg-api/src/lib.rs index 1da4cd83..fa28fae4 100644 --- a/mg-api/src/lib.rs +++ b/mg-api/src/lib.rs @@ -2,6 +2,8 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +// Copyright 2026 Oxide Computer Company + use std::collections::HashMap; use std::net::IpAddr; @@ -17,6 +19,7 @@ use dropshot::{ }; use dropshot_api_manager_types::api_versions; use mg_types_versions::{latest, v1, v2, v5}; +use rdb::types::MulticastRoute; use rdb::{BfdPeerConfig, Prefix}; api_versions!([ @@ -31,6 +34,7 @@ api_versions!([ // | example for the next person. // v // (next_int, IDENT), + (8, MULTICAST_SUPPORT), (7, OPERATION_ID_CLEANUP), (6, RIB_EXPORTED_STRING_KEY), (5, UNNUMBERED), @@ -622,4 +626,56 @@ pub trait MgAdminApi { rqctx: RequestContext, request: Query, ) -> Result, HttpError>; + + // MRIB: Multicast ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + /// Get imported multicast routes from `mrib_in`. + #[endpoint { method = GET, path = "/mrib/status/imported", versions = VERSION_MULTICAST_SUPPORT.. }] + async fn get_mrib_imported( + rqctx: RequestContext, + query: Query, + ) -> Result>, HttpError>; + + /// Get selected multicast routes from `mrib_loc` (RPF-validated). + #[endpoint { method = GET, path = "/mrib/status/selected", versions = VERSION_MULTICAST_SUPPORT.. }] + async fn get_mrib_selected( + rqctx: RequestContext, + query: Query, + ) -> Result>, HttpError>; + + /// Add static multicast routes. + #[endpoint { method = PUT, path = "/static/mroute", versions = VERSION_MULTICAST_SUPPORT.. }] + async fn static_add_mcast_route( + rqctx: RequestContext, + request: TypedBody, + ) -> Result; + + /// Remove static multicast routes. + #[endpoint { method = DELETE, path = "/static/mroute", versions = VERSION_MULTICAST_SUPPORT.. }] + async fn static_remove_mcast_route( + rqctx: RequestContext, + request: TypedBody, + ) -> Result; + + /// List all static multicast routes from persistence. + #[endpoint { method = GET, path = "/static/mroute", versions = VERSION_MULTICAST_SUPPORT.. }] + async fn static_list_mcast_routes( + rqctx: RequestContext, + ) -> Result>, HttpError>; + + /// Get the RPF rebuild rate-limit interval. + #[endpoint { method = GET, path = "/mrib/config/rpf/rebuild-interval", versions = VERSION_MULTICAST_SUPPORT.. }] + async fn read_mrib_rpf_rebuild_interval( + rqctx: RequestContext, + ) -> Result< + HttpResponseOk, + HttpError, + >; + + /// Set the RPF rebuild rate-limit interval. + #[endpoint { method = POST, path = "/mrib/config/rpf/rebuild-interval", versions = VERSION_MULTICAST_SUPPORT.. }] + async fn update_mrib_rpf_rebuild_interval( + rqctx: RequestContext, + request: TypedBody, + ) -> Result; } diff --git a/mg-types/src/lib.rs b/mg-types/src/lib.rs index 9e307a20..1d5b1883 100644 --- a/mg-types/src/lib.rs +++ b/mg-types/src/lib.rs @@ -2,8 +2,11 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +// Copyright 2026 Oxide Computer Company + pub mod bfd; pub mod bgp; +pub mod mrib; pub mod ndp; pub mod rib; pub mod static_routes; diff --git a/mg-types/src/mrib.rs b/mg-types/src/mrib.rs new file mode 100644 index 00000000..adfc51ae --- /dev/null +++ b/mg-types/src/mrib.rs @@ -0,0 +1,7 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Copyright 2026 Oxide Computer Company + +pub use mg_types_versions::latest::mrib::*; diff --git a/mg-types/versions/src/impls/mod.rs b/mg-types/versions/src/impls/mod.rs index 9bf28bda..250ed83f 100644 --- a/mg-types/versions/src/impls/mod.rs +++ b/mg-types/versions/src/impls/mod.rs @@ -2,6 +2,8 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +// Copyright 2026 Oxide Computer Company + //! Functional code for the latest versions of types. mod bgp; diff --git a/mg-types/versions/src/latest.rs b/mg-types/versions/src/latest.rs index 4587af2f..3dd8487d 100644 --- a/mg-types/versions/src/latest.rs +++ b/mg-types/versions/src/latest.rs @@ -2,6 +2,8 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +// Copyright 2026 Oxide Computer Company + //! Re-exports of the latest versions of types. pub mod bfd { @@ -27,6 +29,10 @@ pub mod bgp { pub use crate::v5::bgp::UnnumberedNeighborSelector; } +pub mod mrib { + pub use crate::v8::mrib::*; +} + pub mod ndp { pub use crate::v5::ndp::NdpInterface; pub use crate::v5::ndp::NdpInterfaceSelector; diff --git a/mg-types/versions/src/lib.rs b/mg-types/versions/src/lib.rs index 15fa5c87..7825c31a 100644 --- a/mg-types/versions/src/lib.rs +++ b/mg-types/versions/src/lib.rs @@ -2,6 +2,8 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +// Copyright 2026 Oxide Computer Company + //! Versioned types for the Maghemite Admin API. //! //! # Adding a new API version @@ -41,3 +43,7 @@ pub mod v3; pub mod v4; #[path = "unnumbered/mod.rs"] pub mod v5; +// v6 (RIB_EXPORTED_STRING_KEY) and v7 (OPERATION_ID_CLEANUP) introduced +// no new types. +#[path = "multicast_support/mod.rs"] +pub mod v8; diff --git a/mg-types/versions/src/multicast_support/mod.rs b/mg-types/versions/src/multicast_support/mod.rs new file mode 100644 index 00000000..19c9eaf5 --- /dev/null +++ b/mg-types/versions/src/multicast_support/mod.rs @@ -0,0 +1,12 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Copyright 2026 Oxide Computer Company + +//! Version `MULTICAST_SUPPORT` of the Maghemite Admin API. +//! +//! Added MRIB (Multicast Routing Information Base) support with static +//! multicast route management, RPF verification, and query endpoints. + +pub mod mrib; diff --git a/mg-types/versions/src/multicast_support/mrib.rs b/mg-types/versions/src/multicast_support/mrib.rs new file mode 100644 index 00000000..e6779b3a --- /dev/null +++ b/mg-types/versions/src/multicast_support/mrib.rs @@ -0,0 +1,98 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Copyright 2026 Oxide Computer Company + +//! MRIB (Multicast Routing Information Base) API types. + +use std::net::IpAddr; + +use rdb::types::{ + AddressFamily, MulticastRouteKey, UnderlayMulticastIpv6, Vni, +}; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +/// Input for adding static multicast routes. +#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema)] +pub struct StaticMulticastRouteInput { + /// The multicast route key (S,G) or (*,G). + pub key: MulticastRouteKey, + /// Underlay multicast group address (ff04::/64). + pub underlay_group: UnderlayMulticastIpv6, +} + +/// Request body for adding static multicast routes to the MRIB. +#[derive(Debug, Deserialize, Serialize, JsonSchema)] +pub struct MribAddStaticRequest { + /// List of static multicast routes to add. + pub routes: Vec, +} + +/// Request body for deleting static multicast routes from the MRIB. +#[derive(Debug, Deserialize, Serialize, JsonSchema)] +pub struct MribDeleteStaticRequest { + /// List of route keys to delete. + pub keys: Vec, +} + +/// Response containing the current RPF rebuild interval. +#[derive(Debug, Deserialize, Serialize, JsonSchema)] +pub struct MribRpfRebuildIntervalResponse { + /// Minimum interval between RPF cache rebuilds in milliseconds. + /// A value of 0 means rate-limiting is disabled. + pub interval_ms: u64, +} + +/// Request body for setting the RPF rebuild interval. +#[derive(Debug, Deserialize, Serialize, JsonSchema)] +pub struct MribRpfRebuildIntervalRequest { + /// Minimum interval between RPF cache rebuilds in milliseconds. + /// A value of 0 disables rate-limiting. + /// Every unicast RIB change triggers an immediate poptrie rebuild. + pub interval_ms: u64, +} + +/// Filter for multicast route origin. +#[derive( + Debug, Clone, Copy, Deserialize, Serialize, JsonSchema, PartialEq, Eq, +)] +#[serde(rename_all = "snake_case")] +pub enum RouteOriginFilter { + /// Static routes only (operator configured). + Static, + /// Dynamic routes only (learned via IGMP, MLD, etc.). + Dynamic, +} + +/// Query parameters for MRIB routes. +/// +/// When `group` is provided, looks up a specific route. +/// When `group` is omitted, lists all routes (with optional filters). +#[derive(Debug, Deserialize, Serialize, JsonSchema)] +pub struct MribQuery { + /// Multicast group address. If provided, returns a specific route. + /// If omitted, returns all routes matching the filters. + #[serde(default)] + pub group: Option, + /// Source address (`None` for (*,G) routes). Only used when `group` + /// is set. + #[serde(default)] + pub source: Option, + /// VNI (defaults to 77 for fleet-scoped multicast). Only used when + /// `group` is set. + #[serde(default = "default_multicast_vni")] + pub vni: Vni, + /// Filter by address family. Only used when listing all routes. + #[serde(default)] + pub address_family: Option, + /// Filter by route origin ("static" or "dynamic"). + /// Only used when listing all routes. + #[serde(default)] + pub route_origin: Option, +} + +fn default_multicast_vni() -> Vni { + Vni::DEFAULT_MULTICAST_VNI +} diff --git a/mgadm/src/main.rs b/mgadm/src/main.rs index 3ceec95c..8e72ca1e 100644 --- a/mgadm/src/main.rs +++ b/mgadm/src/main.rs @@ -2,6 +2,8 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +// Copyright 2026 Oxide Computer Company + #![allow(clippy::large_enum_variant)] use anyhow::Result; @@ -14,6 +16,7 @@ use std::net::{IpAddr, SocketAddr}; mod bfd; mod bgp; +mod mrib; mod ndp; mod rib; mod static_routing; @@ -60,6 +63,10 @@ enum Commands { /// Neighbor Discovery Protocol state for BGP unnumbered #[command(subcommand)] Ndp(ndp::Commands), + + /// Multicast RIB management commands. + #[command(subcommand)] + Mrib(mrib::Commands), } fn main() -> Result<()> { @@ -83,6 +90,7 @@ async fn run() -> Result<()> { Commands::Bfd(command) => bfd::commands(command, client).await?, Commands::Rib(command) => rib::commands(command, client).await?, Commands::Ndp(command) => ndp::commands(command, client).await?, + Commands::Mrib(command) => mrib::commands(command, client).await?, } Ok(()) } diff --git a/mgadm/src/mrib.rs b/mgadm/src/mrib.rs new file mode 100644 index 00000000..402421d2 --- /dev/null +++ b/mgadm/src/mrib.rs @@ -0,0 +1,482 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Copyright 2026 Oxide Computer Company + +//! MRIB (Multicast RIB) administration commands. +//! +//! This module provides read-only inspection of multicast routing state. +//! Omicron is the source of truth for multicast group membership and +//! programs the MRIB via the mg-api. Administrative writes are not +//! exposed here to avoid conflicts with Omicron-managed state. + +use std::net::IpAddr; + +use anyhow::Result; +use clap::{Args, Subcommand}; + +use mg_admin_client::Client; +use mg_admin_client::types::{ + MribRpfRebuildIntervalRequest, MulticastRoute, MulticastRouteKey, + RouteOriginFilter, Vni, +}; +use rdb::types::{AddressFamily, DEFAULT_MULTICAST_VNI}; + +const DEFAULT_VNI: u32 = DEFAULT_MULTICAST_VNI.as_u32(); + +fn parse_route_origin(s: &str) -> Result { + match s.to_lowercase().as_str() { + "static" => Ok(RouteOriginFilter::Static), + "dynamic" => Ok(RouteOriginFilter::Dynamic), + _ => Err(format!( + "invalid origin: {s} (expected 'static' or 'dynamic')" + )), + } +} + +#[derive(Subcommand, Debug)] +pub enum Commands { + /// View MRIB state. + Status(StatusCommand), + + /// RPF (Reverse Path Forwarding) table configuration and lookup. + Rpf(RpfCommand), +} + +#[derive(Debug, Args)] +pub struct StatusCommand { + #[command(subcommand)] + command: StatusCmd, +} + +#[derive(Subcommand, Debug)] +pub enum StatusCmd { + /// Get imported multicast routes (`mrib_in`). + /// + /// Lists all routes, or gets a specific route with `-g`. + /// + /// Usage: `mrib status imported [ipv4|ipv6] [-g group] [-s source] [-v vni]` + Imported { + /// Address family to filter by. + #[arg(value_enum)] + address_family: Option, + + /// Multicast group address (if omitted, lists all routes). + #[arg(short, long)] + group: Option, + + /// Source address (omit for any-source (*,G)). + #[arg(short, long)] + source: Option, + + /// VNI (defaults to DEFAULT_MULTICAST_VNI for fleet-scoped multicast). + #[arg(short, long, default_value_t = DEFAULT_VNI, value_parser = clap::value_parser!(u32).range(0..=(rdb::Vni::MAX_VNI as i64)))] + vni: u32, + + /// Filter by route origin ("static" or "dynamic"). + #[arg(long, value_parser = parse_route_origin)] + origin: Option, + }, + + /// Get selected multicast routes (`mrib_loc`, RPF-validated). + /// + /// Lists all routes, or gets a specific route with `-g`. + /// + /// Usage: `mrib status selected [ipv4|ipv6] [-g group] [-s source] [-v vni]` + Selected { + /// Address family to filter by. + #[arg(value_enum)] + address_family: Option, + + /// Multicast group address (if omitted, lists all routes). + #[arg(short, long)] + group: Option, + + /// Source address (omit for any-source (*,G)). + #[arg(short, long)] + source: Option, + + /// VNI (defaults to DEFAULT_MULTICAST_VNI for fleet-scoped multicast). + #[arg(short, long, default_value_t = DEFAULT_VNI, value_parser = clap::value_parser!(u32).range(0..=(rdb::Vni::MAX_VNI as i64)))] + vni: u32, + + /// Filter by route origin ("static" or "dynamic"). + #[arg(long, value_parser = parse_route_origin)] + origin: Option, + }, +} + +#[derive(Debug, Args)] +pub struct RpfCommand { + #[command(subcommand)] + command: RpfCmd, +} + +#[derive(Subcommand, Debug)] +pub enum RpfCmd { + /// Get RPF rebuild interval. + GetInterval, + + /// Set RPF rebuild interval. + SetInterval { + /// Rebuild interval in milliseconds + interval_ms: u64, + }, +} + +pub async fn commands(command: Commands, c: Client) -> Result<()> { + match command { + Commands::Status(status_cmd) => match status_cmd.command { + StatusCmd::Imported { + group, + source, + vni, + address_family, + origin, + } => { + if let Some(g) = group { + get_route(c, g, source, vni).await? + } else { + get_imported(c, address_family, origin).await? + } + } + StatusCmd::Selected { + group, + source, + vni, + address_family, + origin, + } => { + if let Some(g) = group { + get_route_selected(c, g, source, vni).await? + } else { + get_selected(c, address_family, origin).await? + } + } + }, + Commands::Rpf(rpf_cmd) => match rpf_cmd.command { + RpfCmd::GetInterval => get_rpf_interval(c).await?, + RpfCmd::SetInterval { interval_ms } => { + set_rpf_interval(c, interval_ms).await? + } + }, + } + Ok(()) +} + +async fn get_imported( + c: Client, + address_family: Option, + origin: Option, +) -> Result<()> { + let routes = c + .get_mrib_imported(address_family.as_ref(), None, origin, None, None) + .await? + .into_inner(); + print_routes(&routes); + Ok(()) +} + +async fn get_selected( + c: Client, + address_family: Option, + origin: Option, +) -> Result<()> { + let routes = c + .get_mrib_selected(address_family.as_ref(), None, origin, None, None) + .await? + .into_inner(); + print_routes(&routes); + Ok(()) +} + +async fn get_route( + c: Client, + group: IpAddr, + source: Option, + vni: u32, +) -> Result<()> { + let vni = Vni::from(vni); + let routes = c + .get_mrib_imported( + None, + Some(&group), + None, + source.as_ref(), + Some(&vni), + ) + .await? + .into_inner(); + if let Some(route) = routes.first() { + println!("{route:#?}"); + } else { + anyhow::bail!("route not found"); + } + Ok(()) +} + +async fn get_route_selected( + c: Client, + group: IpAddr, + source: Option, + vni: u32, +) -> Result<()> { + let vni = Vni::from(vni); + let routes = c + .get_mrib_selected( + None, + Some(&group), + None, + source.as_ref(), + Some(&vni), + ) + .await? + .into_inner(); + if let Some(route) = routes.first() { + println!("{route:#?}"); + } else { + anyhow::bail!("route not found in mrib_loc"); + } + Ok(()) +} + +async fn get_rpf_interval(c: Client) -> Result<()> { + let result = c.read_mrib_rpf_rebuild_interval().await?.into_inner(); + println!("RPF rebuild interval: {}ms", result.interval_ms); + Ok(()) +} + +async fn set_rpf_interval(c: Client, interval_ms: u64) -> Result<()> { + c.update_mrib_rpf_rebuild_interval(&MribRpfRebuildIntervalRequest { + interval_ms, + }) + .await?; + println!("Updated RPF rebuild interval to: {interval_ms}ms"); + Ok(()) +} + +fn print_routes(routes: &[MulticastRoute]) { + if routes.is_empty() { + println!("No multicast routes"); + return; + } + for route in routes { + let (source_str, group_str, vni) = match &route.key { + MulticastRouteKey::V4(k) => { + let src = k.source.map_or("*".to_string(), |s| s.to_string()); + let grp = k.group.to_string(); + (src, grp, k.vni.clone()) + } + MulticastRouteKey::V6(k) => { + let src = k.source.map_or("*".to_string(), |s| s.to_string()); + let grp = k.group.to_string(); + (src, grp, k.vni.clone()) + } + }; + println!( + "({source_str},{group_str}) vni={vni} underlay={} rpf={:?} source={:?}", + route.underlay_group, route.rpf_neighbor, route.source, + ); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use clap::Parser; + use std::net::{Ipv4Addr, Ipv6Addr}; + + // Wrapper to test subcommand parsing + #[derive(Parser, Debug)] + struct TestCli { + #[command(subcommand)] + command: Commands, + } + + #[test] + fn test_status_imported_specific_route() { + let cli = TestCli::try_parse_from([ + "test", + "status", + "imported", + "-g", + "225.1.2.3", + ]) + .unwrap(); + + match cli.command { + Commands::Status(cmd) => match cmd.command { + StatusCmd::Imported { + group, source, vni, .. + } => { + assert_eq!( + group, + Some(IpAddr::V4(Ipv4Addr::new(225, 1, 2, 3))) + ); + assert_eq!(source, None); + assert_eq!(vni, DEFAULT_VNI); + } + _ => panic!("expected Imported"), + }, + _ => panic!("expected Status command"), + } + } + + #[test] + fn test_status_imported_specific_route_all_flags() { + let cli = TestCli::try_parse_from([ + "test", + "status", + "imported", + "-g", + "225.1.2.3", + "-s", + "10.0.0.1", + "-v", + "100", + ]) + .unwrap(); + + match cli.command { + Commands::Status(cmd) => match cmd.command { + StatusCmd::Imported { + group, source, vni, .. + } => { + assert_eq!( + group, + Some(IpAddr::V4(Ipv4Addr::new(225, 1, 2, 3))) + ); + assert_eq!( + source, + Some(IpAddr::V4(Ipv4Addr::new(10, 0, 0, 1))) + ); + assert_eq!(vni, 100); + } + _ => panic!("expected Imported"), + }, + _ => panic!("expected Status command"), + } + } + + #[test] + fn test_status_selected_specific_route_ipv6() { + let cli = TestCli::try_parse_from([ + "test", + "status", + "selected", + "--group", + "ff0e::1", + "--source", + "2001:db8::1", + "--vni", + "42", + ]) + .unwrap(); + + match cli.command { + Commands::Status(cmd) => match cmd.command { + StatusCmd::Selected { + group, source, vni, .. + } => { + assert_eq!( + group, + Some(IpAddr::V6(Ipv6Addr::new( + 0xff0e, 0, 0, 0, 0, 0, 0, 1 + ))) + ); + assert_eq!( + source, + Some(IpAddr::V6(Ipv6Addr::new( + 0x2001, 0xdb8, 0, 0, 0, 0, 0, 1 + ))) + ); + assert_eq!(vni, 42); + } + _ => panic!("expected Selected"), + }, + _ => panic!("expected Status command"), + } + } + + #[test] + fn test_status_imported_list_with_af() { + let cli = + TestCli::try_parse_from(["test", "status", "imported", "ipv4"]) + .unwrap(); + + match cli.command { + Commands::Status(cmd) => match cmd.command { + StatusCmd::Imported { + group, + address_family, + .. + } => { + assert_eq!(group, None); + assert_eq!(address_family, Some(AddressFamily::Ipv4)); + } + _ => panic!("expected Imported"), + }, + _ => panic!("expected Status command"), + } + } + + #[test] + fn test_status_imported_list_with_origin() { + let cli = TestCli::try_parse_from([ + "test", "status", "imported", "--origin", "dynamic", + ]) + .unwrap(); + + match cli.command { + Commands::Status(cmd) => match cmd.command { + StatusCmd::Imported { group, origin, .. } => { + assert_eq!(group, None); + assert_eq!(origin, Some(RouteOriginFilter::Dynamic)); + } + _ => panic!("expected Imported"), + }, + _ => panic!("expected Status command"), + } + } + + #[test] + fn test_status_selected_list_all() { + let cli = + TestCli::try_parse_from(["test", "status", "selected"]).unwrap(); + + match cli.command { + Commands::Status(cmd) => match cmd.command { + StatusCmd::Selected { + group, + address_family, + origin, + .. + } => { + assert_eq!(group, None); + assert_eq!(address_family, None); + assert_eq!(origin, None); + } + _ => panic!("expected Selected"), + }, + _ => panic!("expected Status command"), + } + } + + #[test] + fn test_rpf_set_interval() { + let cli = + TestCli::try_parse_from(["test", "rpf", "set-interval", "500"]) + .unwrap(); + + match cli.command { + Commands::Rpf(cmd) => match cmd.command { + RpfCmd::SetInterval { interval_ms } => { + assert_eq!(interval_ms, 500); + } + _ => panic!("expected SetInterval"), + }, + _ => panic!("expected Rpf command"), + } + } +} diff --git a/mgadm/src/static_routing.rs b/mgadm/src/static_routing.rs index 761f187e..01c4dc4a 100644 --- a/mgadm/src/static_routing.rs +++ b/mgadm/src/static_routing.rs @@ -2,6 +2,8 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +// Copyright 2026 Oxide Computer Company + use anyhow::Result; use clap::{Args, Subcommand}; use mg_admin_client::{Client, types}; @@ -11,12 +13,16 @@ use std::net::{Ipv4Addr, Ipv6Addr}; #[derive(Subcommand, Debug)] pub enum Commands { + // Unicast static routes GetV4Routes, AddV4Route(StaticRoute4), RemoveV4Routes(StaticRoute4), GetV6Routes, AddV6Route(StaticRoute6), RemoveV6Routes(StaticRoute6), + + // Multicast static routes (read-only -> Omicron is source of truth) + GetMroutes, } #[derive(Debug, Args)] @@ -113,10 +119,39 @@ pub async fn commands(command: Commands, client: Client) -> Result<()> { }; client.static_remove_v6_route(&arg).await?; } + Commands::GetMroutes => { + let routes = client.static_list_mcast_routes().await?.into_inner(); + if routes.is_empty() { + println!("No static multicast routes"); + } else { + print_mroutes(&routes); + } + } } Ok(()) } +fn print_mroutes(routes: &[types::MulticastRoute]) { + for route in routes { + let (source_str, group_str, vni) = match &route.key { + types::MulticastRouteKey::V4(k) => { + let src = k.source.map_or("*".to_string(), |s| s.to_string()); + let grp = k.group.to_string(); + (src, grp, k.vni.clone()) + } + types::MulticastRouteKey::V6(k) => { + let src = k.source.map_or("*".to_string(), |s| s.to_string()); + let grp = k.group.to_string(); + (src, grp, k.vni.clone()) + } + }; + println!( + "({source_str}, {group_str}) vni={vni} underlay={}", + route.underlay_group, + ); + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/mgd/src/admin.rs b/mgd/src/admin.rs index bae5a04a..668e8368 100644 --- a/mgd/src/admin.rs +++ b/mgd/src/admin.rs @@ -2,7 +2,9 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use crate::{bfd_admin, bgp_admin, rib_admin, static_admin}; +// Copyright 2026 Oxide Computer Company + +use crate::{bfd_admin, bgp_admin, mrib_admin, rib_admin, static_admin}; use bfd_admin::BfdContext; use bgp::params::*; use bgp_admin::BgpContext; @@ -20,6 +22,10 @@ use mg_types::bgp::{ NeighborSelector, UnnumberedNeighborResetRequest, UnnumberedNeighborSelector, }; +use mg_types::mrib::{ + MribAddStaticRequest, MribDeleteStaticRequest, MribQuery, + MribRpfRebuildIntervalRequest, MribRpfRebuildIntervalResponse, +}; use mg_types::ndp::{NdpInterface, NdpInterfaceSelector, NdpManagerState}; use mg_types::rib::{ BestpathFanoutRequest, BestpathFanoutResponse, GetRibResult, Rib, RibQuery, @@ -621,6 +627,56 @@ impl MgAdminApi for MgAdminApiImpl { ) -> Result, HttpError> { bgp_admin::get_ndp_interface_detail(ctx, request).await } + + async fn get_mrib_imported( + rqctx: RequestContext, + query: Query, + ) -> Result>, HttpError> + { + mrib_admin::get_mrib_imported(rqctx, query).await + } + + async fn get_mrib_selected( + rqctx: RequestContext, + query: Query, + ) -> Result>, HttpError> + { + mrib_admin::get_mrib_selected(rqctx, query).await + } + + async fn static_add_mcast_route( + rqctx: RequestContext, + request: TypedBody, + ) -> Result { + mrib_admin::static_add_mcast_route(rqctx, request).await + } + + async fn static_remove_mcast_route( + rqctx: RequestContext, + request: TypedBody, + ) -> Result { + mrib_admin::static_remove_mcast_route(rqctx, request).await + } + + async fn static_list_mcast_routes( + rqctx: RequestContext, + ) -> Result>, HttpError> + { + mrib_admin::static_list_mcast_routes(rqctx).await + } + + async fn read_mrib_rpf_rebuild_interval( + rqctx: RequestContext, + ) -> Result, HttpError> { + mrib_admin::read_mrib_rpf_rebuild_interval(rqctx).await + } + + async fn update_mrib_rpf_rebuild_interval( + rqctx: RequestContext, + request: TypedBody, + ) -> Result { + mrib_admin::update_mrib_rpf_rebuild_interval(rqctx, request).await + } } pub fn api_description() -> ApiDescription> { diff --git a/mgd/src/error.rs b/mgd/src/error.rs index e184c980..7a9fb97b 100644 --- a/mgd/src/error.rs +++ b/mgd/src/error.rs @@ -2,6 +2,8 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +// Copyright 2026 Oxide Computer Company + use crate::unnumbered_manager::{AddNeighborError, ResolveNeighborError}; use dropshot::{ClientErrorStatusCode, HttpError}; @@ -35,7 +37,15 @@ pub enum Error { impl From for HttpError { fn from(value: Error) -> Self { match value { - Error::Db(_) => Self::for_internal_error(value.to_string()), + Error::Db(ref db_err) => match db_err { + rdb::error::Error::Validation(msg) => { + Self::for_bad_request(None, msg.clone()) + } + rdb::error::Error::NotFound(msg) => { + Self::for_not_found(None, msg.clone()) + } + _ => Self::for_internal_error(value.to_string()), + }, Error::Conflict(_) => Self::for_client_error_with_status( Some(value.to_string()), ClientErrorStatusCode::CONFLICT, diff --git a/mgd/src/main.rs b/mgd/src/main.rs index 02a77ad0..549015c2 100644 --- a/mgd/src/main.rs +++ b/mgd/src/main.rs @@ -2,6 +2,8 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +// Copyright 2026 Oxide Computer Company + use crate::admin::HandlerContext; use crate::bfd_admin::BfdContext; use crate::bgp_admin::BgpContext; @@ -33,6 +35,7 @@ mod bfd_admin; mod bgp_admin; mod error; mod log; +mod mrib_admin; mod oxstats; mod rib_admin; mod signal; diff --git a/mgd/src/mrib_admin.rs b/mgd/src/mrib_admin.rs new file mode 100644 index 00000000..81bdaae2 --- /dev/null +++ b/mgd/src/mrib_admin.rs @@ -0,0 +1,181 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Copyright 2026 Oxide Computer Company + +use std::sync::Arc; +use std::time::Duration; + +use dropshot::{ + HttpError, HttpResponseDeleted, HttpResponseOk, + HttpResponseUpdatedNoContent, RequestContext, TypedBody, +}; + +use mg_types::mrib::{ + MribAddStaticRequest, MribDeleteStaticRequest, MribQuery, + MribRpfRebuildIntervalRequest, MribRpfRebuildIntervalResponse, + RouteOriginFilter, +}; +use rdb::types::{ + MulticastAddr, MulticastRoute, MulticastRouteKey, MulticastSourceProtocol, +}; + +use crate::admin::HandlerContext; +use crate::error::Error; + +/// Convert [`RouteOriginFilter`] to the `static_only` parameter +/// used by [`rdb::Db::mrib_list`]. +fn origin_to_static_only(origin: Option) -> Option { + match origin { + None => None, + Some(RouteOriginFilter::Static) => Some(true), + Some(RouteOriginFilter::Dynamic) => Some(false), + } +} + +pub async fn get_mrib_imported( + rqctx: RequestContext>, + query: dropshot::Query, +) -> Result>, HttpError> { + let ctx = rqctx.context(); + let q = query.into_inner(); + + // If group is provided, look up a specific route + if let Some(group_addr) = q.group { + let group = MulticastAddr::try_from(group_addr).map_err(|e| { + HttpError::for_bad_request( + None, + format!("invalid group address: {e}"), + ) + })?; + let key = MulticastRouteKey::new(q.source, group, q.vni) + .map_err(|e| HttpError::for_bad_request(None, format!("{e}")))?; + let route = ctx.db.get_mcast_route(&key).ok_or_else(|| { + HttpError::for_not_found(None, format!("route {key} not found")) + })?; + return Ok(HttpResponseOk(vec![route])); + } + + // Otherwise, list all routes with filters + let routes = ctx.db.mrib_list( + q.address_family, + origin_to_static_only(q.route_origin), + false, // `mrib_in` + ); + Ok(HttpResponseOk(routes)) +} + +pub async fn get_mrib_selected( + rqctx: RequestContext>, + query: dropshot::Query, +) -> Result>, HttpError> { + let ctx = rqctx.context(); + let q = query.into_inner(); + + // If group is provided, look up a specific route + if let Some(group_addr) = q.group { + let group = MulticastAddr::try_from(group_addr).map_err(|e| { + HttpError::for_bad_request( + None, + format!("invalid group address: {e}"), + ) + })?; + let key = MulticastRouteKey::new(q.source, group, q.vni) + .map_err(|e| HttpError::for_bad_request(None, format!("{e}")))?; + let route = ctx.db.get_selected_mcast_route(&key).ok_or_else(|| { + HttpError::for_not_found( + None, + format!("route {key} not found in mrib_loc"), + ) + })?; + return Ok(HttpResponseOk(vec![route])); + } + + // Otherwise, list all routes with filters + let routes = ctx.db.mrib_list( + q.address_family, + origin_to_static_only(q.route_origin), + true, // `mrib_loc` + ); + Ok(HttpResponseOk(routes)) +} + +pub async fn static_add_mcast_route( + rqctx: RequestContext>, + request: TypedBody, +) -> Result { + let ctx = rqctx.context(); + let body = request.into_inner(); + + // Convert input to full `MulticastRoute` with timestamps + let routes: Vec = body + .routes + .into_iter() + .map(|input| { + MulticastRoute::new( + input.key, + input.underlay_group, + MulticastSourceProtocol::Static, + ) + }) + .collect(); + + // Validate routes before adding + for route in &routes { + route.validate().map_err(|e| { + HttpError::for_bad_request(None, format!("validation error: {e}")) + })?; + } + + ctx.db + .add_static_mcast_routes(&routes) + .map_err(Error::from)?; + Ok(HttpResponseUpdatedNoContent()) +} + +pub async fn static_remove_mcast_route( + rqctx: RequestContext>, + request: TypedBody, +) -> Result { + let ctx = rqctx.context(); + let body = request.into_inner(); + ctx.db + .remove_static_mcast_routes(&body.keys) + .map_err(Error::from)?; + Ok(HttpResponseDeleted()) +} + +pub async fn static_list_mcast_routes( + rqctx: RequestContext>, +) -> Result>, HttpError> { + let ctx = rqctx.context(); + let routes = ctx.db.get_static_mcast_routes().map_err(Error::from)?; + Ok(HttpResponseOk(routes)) +} + +pub async fn read_mrib_rpf_rebuild_interval( + rqctx: RequestContext>, +) -> Result, HttpError> { + let ctx = rqctx.context(); + let interval = ctx + .db + .get_mrib_rpf_rebuild_interval() + .map_err(|e| HttpError::for_internal_error(format!("{e}")))?; + Ok(HttpResponseOk(MribRpfRebuildIntervalResponse { + interval_ms: u64::try_from(interval.as_millis()).unwrap_or(u64::MAX), + })) +} + +pub async fn update_mrib_rpf_rebuild_interval( + rqctx: RequestContext>, + request: TypedBody, +) -> Result { + let ctx = rqctx.context(); + let body = request.into_inner(); + let interval = Duration::from_millis(body.interval_ms); + ctx.db + .set_mrib_rpf_rebuild_interval(interval) + .map_err(|e| HttpError::for_internal_error(format!("{e}")))?; + Ok(HttpResponseUpdatedNoContent()) +} diff --git a/openapi/mg-admin/mg-admin-7.0.0-fba416.json.gitstub b/openapi/mg-admin/mg-admin-7.0.0-fba416.json.gitstub new file mode 100644 index 00000000..fa8c6377 --- /dev/null +++ b/openapi/mg-admin/mg-admin-7.0.0-fba416.json.gitstub @@ -0,0 +1 @@ +d20fd61909d7a4c06ec322962321cb61f2f4e252:openapi/mg-admin/mg-admin-7.0.0-fba416.json diff --git a/openapi/mg-admin/mg-admin-7.0.0-fba416.json b/openapi/mg-admin/mg-admin-8.0.0-fc8d9c.json similarity index 90% rename from openapi/mg-admin/mg-admin-7.0.0-fba416.json rename to openapi/mg-admin/mg-admin-8.0.0-fc8d9c.json index 5231dda4..b264b109 100644 --- a/openapi/mg-admin/mg-admin-7.0.0-fba416.json +++ b/openapi/mg-admin/mg-admin-8.0.0-fc8d9c.json @@ -6,7 +6,7 @@ "url": "https://oxide.computer", "email": "api@oxide.computer" }, - "version": "7.0.0" + "version": "8.0.0" }, "paths": { "/bfd/peers": { @@ -1234,6 +1234,203 @@ } } }, + "/mrib/config/rpf/rebuild-interval": { + "get": { + "summary": "Get the RPF rebuild rate-limit interval.", + "operationId": "read_mrib_rpf_rebuild_interval", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/MribRpfRebuildIntervalResponse" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + }, + "post": { + "summary": "Set the RPF rebuild rate-limit interval.", + "operationId": "update_mrib_rpf_rebuild_interval", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/MribRpfRebuildIntervalRequest" + } + } + }, + "required": true + }, + "responses": { + "204": { + "description": "resource updated" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/mrib/status/imported": { + "get": { + "summary": "Get imported multicast routes from `mrib_in`.", + "operationId": "get_mrib_imported", + "parameters": [ + { + "in": "query", + "name": "address_family", + "description": "Filter by address family. Only used when listing all routes.", + "schema": { + "$ref": "#/components/schemas/AddressFamily" + } + }, + { + "in": "query", + "name": "group", + "description": "Multicast group address. If provided, returns a specific route. If omitted, returns all routes matching the filters.", + "schema": { + "nullable": true, + "type": "string", + "format": "ip" + } + }, + { + "in": "query", + "name": "route_origin", + "description": "Filter by route origin (\"static\" or \"dynamic\"). Only used when listing all routes.", + "schema": { + "$ref": "#/components/schemas/RouteOriginFilter" + } + }, + { + "in": "query", + "name": "source", + "description": "Source address (`None` for (*,G) routes). Only used when `group` is set.", + "schema": { + "nullable": true, + "type": "string", + "format": "ip" + } + }, + { + "in": "query", + "name": "vni", + "description": "VNI (defaults to 77 for fleet-scoped multicast). Only used when `group` is set.", + "schema": { + "$ref": "#/components/schemas/Vni" + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "title": "Array_of_MulticastRoute", + "type": "array", + "items": { + "$ref": "#/components/schemas/MulticastRoute" + } + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/mrib/status/selected": { + "get": { + "summary": "Get selected multicast routes from `mrib_loc` (RPF-validated).", + "operationId": "get_mrib_selected", + "parameters": [ + { + "in": "query", + "name": "address_family", + "description": "Filter by address family. Only used when listing all routes.", + "schema": { + "$ref": "#/components/schemas/AddressFamily" + } + }, + { + "in": "query", + "name": "group", + "description": "Multicast group address. If provided, returns a specific route. If omitted, returns all routes matching the filters.", + "schema": { + "nullable": true, + "type": "string", + "format": "ip" + } + }, + { + "in": "query", + "name": "route_origin", + "description": "Filter by route origin (\"static\" or \"dynamic\"). Only used when listing all routes.", + "schema": { + "$ref": "#/components/schemas/RouteOriginFilter" + } + }, + { + "in": "query", + "name": "source", + "description": "Source address (`None` for (*,G) routes). Only used when `group` is set.", + "schema": { + "nullable": true, + "type": "string", + "format": "ip" + } + }, + { + "in": "query", + "name": "vni", + "description": "VNI (defaults to 77 for fleet-scoped multicast). Only used when `group` is set.", + "schema": { + "$ref": "#/components/schemas/Vni" + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "title": "Array_of_MulticastRoute", + "type": "array", + "items": { + "$ref": "#/components/schemas/MulticastRoute" + } + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/ndp/interface": { "get": { "operationId": "get_ndp_interface_detail", @@ -1484,6 +1681,84 @@ } } }, + "/static/mroute": { + "get": { + "summary": "List all static multicast routes from persistence.", + "operationId": "static_list_mcast_routes", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "title": "Array_of_MulticastRoute", + "type": "array", + "items": { + "$ref": "#/components/schemas/MulticastRoute" + } + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + }, + "put": { + "summary": "Add static multicast routes.", + "operationId": "static_add_mcast_route", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/MribAddStaticRequest" + } + } + }, + "required": true + }, + "responses": { + "204": { + "description": "resource updated" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + }, + "delete": { + "summary": "Remove static multicast routes.", + "operationId": "static_remove_mcast_route", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/MribDeleteStaticRequest" + } + } + }, + "required": true + }, + "responses": { + "204": { + "description": "successful deletion" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/static/route4": { "get": { "operationId": "static_list_v4_routes", @@ -3725,6 +4000,231 @@ } ] }, + "MribAddStaticRequest": { + "description": "Request body for adding static multicast routes to the MRIB.", + "type": "object", + "properties": { + "routes": { + "description": "List of static multicast routes to add.", + "type": "array", + "items": { + "$ref": "#/components/schemas/StaticMulticastRouteInput" + } + } + }, + "required": [ + "routes" + ] + }, + "MribDeleteStaticRequest": { + "description": "Request body for deleting static multicast routes from the MRIB.", + "type": "object", + "properties": { + "keys": { + "description": "List of route keys to delete.", + "type": "array", + "items": { + "$ref": "#/components/schemas/MulticastRouteKey" + } + } + }, + "required": [ + "keys" + ] + }, + "MribRpfRebuildIntervalRequest": { + "description": "Request body for setting the RPF rebuild interval.", + "type": "object", + "properties": { + "interval_ms": { + "description": "Minimum interval between RPF cache rebuilds in milliseconds. A value of 0 disables rate-limiting. Every unicast RIB change triggers an immediate poptrie rebuild.", + "type": "integer", + "format": "uint64", + "minimum": 0 + } + }, + "required": [ + "interval_ms" + ] + }, + "MribRpfRebuildIntervalResponse": { + "description": "Response containing the current RPF rebuild interval.", + "type": "object", + "properties": { + "interval_ms": { + "description": "Minimum interval between RPF cache rebuilds in milliseconds. A value of 0 means rate-limiting is disabled.", + "type": "integer", + "format": "uint64", + "minimum": 0 + } + }, + "required": [ + "interval_ms" + ] + }, + "MulticastRoute": { + "description": "Multicast route entry containing replication groups and metadata.", + "type": "object", + "properties": { + "created": { + "description": "Creation timestamp.", + "type": "string", + "format": "date-time" + }, + "key": { + "description": "The multicast route key (S,G) or (*,G).", + "allOf": [ + { + "$ref": "#/components/schemas/MulticastRouteKey" + } + ] + }, + "rpf_neighbor": { + "nullable": true, + "description": "Expected RPF neighbor for the source (for RPF checks).", + "type": "string", + "format": "ip" + }, + "source": { + "description": "Route source (static, IGMP, etc.).", + "allOf": [ + { + "$ref": "#/components/schemas/MulticastSourceProtocol" + } + ] + }, + "underlay_group": { + "description": "Underlay multicast group address (ff04::/64).\n\nOverlay multicast addresses are mapped 1:1 to admin-local scope underlay addresses. Switches replicate to this address via the PRE (with tofino_asic).\n\nOPTE handles the overlay/underlay translation at sled boundaries, while sled membership is managed by Omicron and programmed to DPD/OPTE directly.", + "type": "string", + "format": "ipv6" + }, + "updated": { + "description": "Last updated timestamp.\n\nOnly updated when route fields change semantically (rpf_neighbor, underlay_group, source). An idempotent upsert with an identical value does not update this timestamp.", + "type": "string", + "format": "date-time" + } + }, + "required": [ + "created", + "key", + "source", + "underlay_group", + "updated" + ] + }, + "MulticastRouteKey": { + "description": "Multicast route key: (Source, Group) pair for source-specific multicast, or (*, Group) for any-source multicast.\n\nUses type-enforced address family matching: IPv4 sources can only be paired with IPv4 groups, and IPv6 sources with IPv6 groups.", + "oneOf": [ + { + "type": "object", + "properties": { + "V4": { + "$ref": "#/components/schemas/MulticastRouteKeyV4" + } + }, + "required": [ + "V4" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "V6": { + "$ref": "#/components/schemas/MulticastRouteKeyV6" + } + }, + "required": [ + "V6" + ], + "additionalProperties": false + } + ] + }, + "MulticastRouteKeyV4": { + "description": "IPv4 multicast route key with type-enforced address family matching.", + "type": "object", + "properties": { + "group": { + "description": "Multicast group address.", + "type": "string", + "format": "ipv4" + }, + "source": { + "nullable": true, + "description": "Source address (`None` for (*,G) routes).", + "type": "string", + "format": "ipv4" + }, + "vni": { + "description": "VNI (Virtual Network Identifier).", + "default": 77, + "allOf": [ + { + "$ref": "#/components/schemas/Vni" + } + ] + } + }, + "required": [ + "group" + ] + }, + "MulticastRouteKeyV6": { + "description": "IPv6 multicast route key with type-enforced address family matching.", + "type": "object", + "properties": { + "group": { + "description": "Multicast group address.", + "type": "string", + "format": "ipv6" + }, + "source": { + "nullable": true, + "description": "Source address (`None` for (*,G) routes).", + "type": "string", + "format": "ipv6" + }, + "vni": { + "description": "VNI (Virtual Network Identifier).", + "default": 77, + "allOf": [ + { + "$ref": "#/components/schemas/Vni" + } + ] + } + }, + "required": [ + "group" + ] + }, + "MulticastSourceProtocol": { + "description": "Source of a multicast route entry.", + "oneOf": [ + { + "description": "Static route configured via API.", + "type": "string", + "enum": [ + "Static" + ] + }, + { + "description": "Learned via IGMP snooping (future).", + "type": "string", + "enum": [ + "Igmp" + ] + }, + { + "description": "Learned via MLD snooping (future).", + "type": "string", + "enum": [ + "Mld" + ] + } + ] + }, "NdpInterface": { "description": "NDP state for an interface", "type": "object", @@ -5244,6 +5744,29 @@ "code" ] }, + "StaticMulticastRouteInput": { + "description": "Input for adding static multicast routes.", + "type": "object", + "properties": { + "key": { + "description": "The multicast route key (S,G) or (*,G).", + "allOf": [ + { + "$ref": "#/components/schemas/MulticastRouteKey" + } + ] + }, + "underlay_group": { + "description": "Underlay multicast group address (ff04::/64).", + "type": "string", + "format": "ipv6" + } + }, + "required": [ + "key", + "underlay_group" + ] + }, "StaticRoute4": { "type": "object", "properties": { @@ -5733,6 +6256,12 @@ "withdrawn" ] }, + "Vni": { + "description": "A Geneve Virtual Network Identifier", + "type": "integer", + "format": "uint32", + "minimum": 0 + }, "AddressFamily": { "description": "Represents the address family (protocol version) for network routes.\n\nThis is the canonical source of truth for address family definitions across the entire codebase. All routing-related components (RIB operations, BGP messages, API filtering, CLI tools) use this single enum rather than defining their own.\n\n# Semantics\n\nWhen used in filtering contexts (e.g., database queries or API parameters), `Option` is preferred: - `None` = no filter (match all address families) - `Some(Ipv4)` = IPv4 routes only - `Some(Ipv6)` = IPv6 routes only\n\n# Examples\n\n``` use rdb_types::AddressFamily;\n\nlet ipv4 = AddressFamily::Ipv4; let ipv6 = AddressFamily::Ipv6;\n\n// For filtering, use Option let filter: Option = Some(AddressFamily::Ipv4); let no_filter: Option = None; // matches all families ```", "oneOf": [ @@ -5752,6 +6281,25 @@ } ] }, + "RouteOriginFilter": { + "description": "Filter for multicast route origin.", + "oneOf": [ + { + "description": "Static routes only (operator configured).", + "type": "string", + "enum": [ + "static" + ] + }, + { + "description": "Dynamic routes only (learned via IGMP, MLD, etc.).", + "type": "string", + "enum": [ + "dynamic" + ] + } + ] + }, "ProtocolFilter": { "oneOf": [ { diff --git a/openapi/mg-admin/mg-admin-latest.json b/openapi/mg-admin/mg-admin-latest.json index 0858fdf0..329966e4 120000 --- a/openapi/mg-admin/mg-admin-latest.json +++ b/openapi/mg-admin/mg-admin-latest.json @@ -1 +1 @@ -mg-admin-7.0.0-fba416.json \ No newline at end of file +mg-admin-8.0.0-fc8d9c.json \ No newline at end of file diff --git a/rdb-types/src/lib.rs b/rdb-types/src/lib.rs index 4705985c..af365e51 100644 --- a/rdb-types/src/lib.rs +++ b/rdb-types/src/lib.rs @@ -2,6 +2,8 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +// Copyright 2026 Oxide Computer Company + //! Core types for routing database operations, shared across maghemite components. //! //! This crate provides the fundamental types used for representing network prefixes @@ -59,22 +61,20 @@ impl Prefix4 { new } - pub fn host_bits_are_unset(&self) -> bool { - let mask = match self.length { + /// Return the network mask as a `u32` with the top `length` bits set. + pub fn mask(&self) -> u32 { + match self.length { 0 => 0, _ => u32::MAX << (Self::HOST_MASK - self.length), - }; + } + } - self.value.to_bits() & mask == self.value.to_bits() + pub fn host_bits_are_unset(&self) -> bool { + self.value.to_bits() & self.mask() == self.value.to_bits() } pub fn unset_host_bits(&mut self) { - let mask = match self.length { - 0 => 0, - _ => u32::MAX << (Self::HOST_MASK - self.length), - }; - - self.value = Ipv4Addr::from_bits(self.value.to_bits() & mask) + self.value = Ipv4Addr::from_bits(self.value.to_bits() & self.mask()) } /// Check if this prefix is contained within another prefix. @@ -187,22 +187,20 @@ impl Prefix6 { new } - pub fn host_bits_are_unset(&self) -> bool { - let mask = match self.length { + /// Return the network mask as a `u128` with the top `length` bits set. + pub fn mask(&self) -> u128 { + match self.length { 0 => 0, _ => u128::MAX << (Self::HOST_MASK - self.length), - }; + } + } - self.value.to_bits() & mask == self.value.to_bits() + pub fn host_bits_are_unset(&self) -> bool { + self.value.to_bits() & self.mask() == self.value.to_bits() } pub fn unset_host_bits(&mut self) { - let mask = match self.length { - 0 => 0, - _ => u128::MAX << (Self::HOST_MASK - self.length), - }; - - self.value = Ipv6Addr::from_bits(self.value.to_bits() & mask) + self.value = Ipv6Addr::from_bits(self.value.to_bits() & self.mask()) } /// Check if this prefix is contained within another prefix. diff --git a/rdb/Cargo.toml b/rdb/Cargo.toml index 7cce9782..2b7c5e7a 100644 --- a/rdb/Cargo.toml +++ b/rdb/Cargo.toml @@ -18,6 +18,8 @@ chrono.workspace = true clap = { workspace = true, optional = true } oxnet.workspace = true rdb-types = { workspace = true, features = ["clap"] } +poptrie.workspace = true +omicron-common.workspace = true ndp.workspace = true [dev-dependencies] diff --git a/rdb/src/db.rs b/rdb/src/db.rs index a0ad6708..e4e52f03 100644 --- a/rdb/src/db.rs +++ b/rdb/src/db.rs @@ -2,6 +2,8 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +// Copyright 2026 Oxide Computer Company + //! The routing database (rdb). //! //! This is the maghmite routing database. The routing database holds both @@ -9,19 +11,37 @@ //! in a sled key-value store that is persisted to disk via flush operations. //! Volatile information is stored in in-memory data structures such as hash //! sets. +//! +//! ## Lock Ordering +//! +//! The Db struct contains multiple locks, which we acquire +//! in this ordering: +//! +//! 1. Unicast RIB locks (`rib4_in`, `rib4_loc`, `rib6_in`, `rib6_loc`) +//! 2. MRIB locks (see [`mrib`] module: `mrib_in` → `mrib_loc` → `watchers`) +//! 3. RpfTable poptrie caches (`cache_v4`, `cache_v6`) +//! +//! **RPF lookup exception**: RPF lookups ([`mrib::rpf::RpfTable::lookup`]) hold +//! at most one lock at a time. They first try the poptrie cache (read lock), +//! release it, then fall back to linear scan (RIB lock) if needed. This avoids +//! deadlocks since no path holds cache + RIB locks simultaneously. + use crate::bestpath::bestpaths; use crate::error::Error; use crate::log::rdb_log; +use crate::mrib; +use crate::mrib::rpf::RpfTable; +use crate::mrib::{Mrib, spawn_rpf_revalidator}; use crate::types::*; use chrono::Utc; use mg_common::{lock, read_lock, write_lock}; use sled::Tree; -use slog::{Logger, error}; +use slog::{Logger, debug, error}; use std::cmp::Ordering as CmpOrdering; use std::collections::{BTreeMap, BTreeSet}; use std::net::{IpAddr, Ipv6Addr}; use std::num::NonZeroU8; -use std::sync::atomic::{AtomicU64, Ordering}; +use std::sync::atomic::{AtomicU8, AtomicU64, Ordering}; use std::sync::mpsc::Sender; use std::sync::{Arc, Mutex, RwLock}; use std::thread::{sleep, spawn}; @@ -59,6 +79,10 @@ const STATIC4_ROUTES: &str = "static4_routes"; /// The handle used to open a persistent key-value tree for IPv6 static routes. const STATIC6_ROUTES: &str = "static6_routes"; +/// The handle used to open a persistent key-value tree for multicast static +/// routes. +const STATIC_MCAST_ROUTES: &str = "static_mcast_routes"; + /// Key used in settings tree for tunnel endpoint setting const TEP_KEY: &str = "tep"; @@ -72,10 +96,39 @@ const BESTPATH_FANOUT: &str = "bestpath_fanout"; /// Default bestpath fanout value. Maximum number of ECMP paths in RIB. const DEFAULT_BESTPATH_FANOUT: u8 = 1; +/// Key used in settings tree for MRIB RPF rebuild interval. +const MRIB_RPF_REBUILD_INTERVAL: &str = "mrib_rpf_rebuild_interval"; + +/// Key used in settings tree for MRIB RPF revalidation interval. +const MRIB_RPF_REVALIDATION_INTERVAL: &str = "mrib_rpf_revalidation_interval"; + +/// Default MRIB RPF rebuild interval in milliseconds. +/// +/// Multicast route additions can be bursty, and poptrie rebuilds can be +/// expensive. During rebuilds, RPF verification falls back to linear scan. +const DEFAULT_MRIB_RPF_REBUILD_INTERVAL_MS: u64 = 1000; + pub type Rib = BTreeMap>; pub type Rib4 = BTreeMap>; pub type Rib6 = BTreeMap>; +/// Cached configuration values for low-overhead reads. +/// +/// These atomic values are read frequently by hot paths (bestpath selection, +/// RPF revalidation) and cached here to avoid locking the persistent store. +#[derive(Debug, Clone)] +struct CachedConfig { + /// Bestpath fanout for ECMP. + /// + /// This controls how many equal-cost paths are selected for unicast routing + /// and considered for multicast RPF lookup. + bestpath_fanout: Arc, + + /// Periodic RPF revalidation sweep interval in milliseconds. + /// 0 means use [`mrib::DEFAULT_REVALIDATION_INTERVAL`]. + rpf_revalidation_interval_ms: Arc, +} + /// The central routing information base. Both persistent an volatile route /// information is managed through this structure. #[derive(Clone)] @@ -99,6 +152,17 @@ pub struct Db { /// added to the lower half forwarding plane. rib6_loc: Arc>, + /// Multicast routing information base (MRIB). + mrib: Mrib, + + /// [RPF] (Reverse Path Forwarding) table for multicast route verification. + /// + /// [RPF]: https://datatracker.ietf.org/doc/html/rfc5110 + rpf_table: RpfTable, + + /// Cached configuration for low-overhead reads on (possibly) hot paths. + config: CachedConfig, + /// A generation number for the overall data store. generation: Arc, @@ -114,6 +178,7 @@ pub struct Db { log: Logger, } + unsafe impl Sync for Db {} unsafe impl Send for Db {} @@ -123,23 +188,85 @@ struct Watcher { sender: Sender, } -//TODO we need bulk operations with atomic semantics here. +// TODO we need bulk operations with atomic semantics here. impl Db { /// Create a new routing database that stores persistent data at `path`. pub fn new(path: &str, log: Logger) -> Result { let rib_loc = Arc::new(Mutex::new(Rib::new())); - Ok(Self { - persistent: sled::open(path)?, + let persistent = sled::open(path)?; + + let config = CachedConfig { + bestpath_fanout: Arc::new(AtomicU8::new(DEFAULT_BESTPATH_FANOUT)), + rpf_revalidation_interval_ms: Arc::new(AtomicU64::new( + u64::try_from(mrib::DEFAULT_REVALIDATION_INTERVAL.as_millis()) + .unwrap(), + )), + }; + + let db = Self { + persistent, rib4_in: Arc::new(Mutex::new(BTreeMap::new())), rib4_loc: Arc::new(Mutex::new(BTreeMap::new())), rib6_in: Arc::new(Mutex::new(BTreeMap::new())), rib6_loc: Arc::new(Mutex::new(BTreeMap::new())), + mrib: Mrib::new(log.clone()), + rpf_table: RpfTable::new(log.clone()), + config, generation: Arc::new(AtomicU64::new(0)), watchers: Arc::new(RwLock::new(Vec::new())), reaper: Reaper::new(rib_loc), slot: Arc::new(RwLock::new(None)), log, - }) + }; + + // Load persisted static multicast routes into `mrib_in` + db.load_mcast_static_routes(); + + // Load bestpath fanout from settings. + let fanout = db.get_bestpath_fanout().unwrap_or_else(|e| { + error!(db.log, "failed to load bestpath_fanout from settings: {e}"); + NonZeroU8::new(DEFAULT_BESTPATH_FANOUT).unwrap() + }); + db.config + .bestpath_fanout + .store(fanout.get(), Ordering::Relaxed); + + // Load RPF rebuild interval from settings and apply to `RpfTable` + let rebuild_interval = db.get_mrib_rpf_rebuild_interval().unwrap_or_else(|e| { + error!( + db.log, + "failed to load mrib_rpf_rebuild_interval from settings: {e}" + ); + std::time::Duration::from_millis( + DEFAULT_MRIB_RPF_REBUILD_INTERVAL_MS, + ) + }); + + db.rpf_table.set_rebuild_interval(rebuild_interval); + + // Load RPF revalidation interval from settings. + let revalidation_interval = + db.get_mrib_rpf_revalidation_interval().unwrap_or_else(|e| { + error!( + db.log, + "failed to load mrib_rpf_revalidation_interval \ + from settings: {e}" + ); + mrib::DEFAULT_REVALIDATION_INTERVAL + }); + + db.config.rpf_revalidation_interval_ms.store( + u64::try_from(revalidation_interval.as_millis()).unwrap(), + Ordering::Relaxed, + ); + + // Start RPF revalidator to handle unicast route changes. + // When the poptrie cache rebuilds, revalidate all (S,G) multicast routes. + if let Some(tx) = spawn_rpf_revalidator(db.clone()) { + db.rpf_table.set_rebuild_notifier(tx); + } + + Ok(db) } pub fn set_reaper_interval(&self, interval: std::time::Duration) { @@ -150,6 +277,103 @@ impl Db { *lock!(self.reaper.stale_max) = stale_max; } + pub fn slot(&self) -> Option { + match self.slot.read() { + Ok(v) => *v, + Err(e) => { + error!(self.log, "unable to read switch slot"; "error" => %e); + None + } + } + } + + pub fn set_slot(&mut self, slot: Option) { + let mut value = self.slot.write().unwrap(); + *value = slot; + } + + // ------------------------------------------------------------------------ + // MRIB / RPF revalidator gettings/setters + // ------------------------------------------------------------------------ + + /// Set the interval for periodic RPF revalidation sweeps. + /// + /// This controls how often the revalidator thread walks the MRIB to + /// re-check (S,G) routes even without explicit unicast RIB changes. + /// This is separate from the poptrie rebuild interval (see + /// [`Self::set_mrib_rpf_rebuild_interval`]). + /// + /// This is persisted to the settings tree. + pub fn set_mrib_rpf_revalidation_interval( + &self, + interval: std::time::Duration, + ) -> Result<(), Error> { + let tree = self.persistent.open_tree(SETTINGS)?; + let interval_ms = u64::try_from(interval.as_millis()).unwrap(); + tree.insert( + MRIB_RPF_REVALIDATION_INTERVAL, + &interval_ms.to_be_bytes(), + )?; + tree.flush()?; + self.config + .rpf_revalidation_interval_ms + .store(interval_ms, Ordering::Relaxed); + Ok(()) + } + + /// Get the RPF revalidation sweep interval from the persistent store. + pub fn get_mrib_rpf_revalidation_interval( + &self, + ) -> Result { + let tree = self.persistent.open_tree(SETTINGS)?; + let interval_ms = match tree.get(MRIB_RPF_REVALIDATION_INTERVAL)? { + None => { + u64::try_from(mrib::DEFAULT_REVALIDATION_INTERVAL.as_millis()) + .unwrap() + } + Some(value) => { + let bytes: [u8; 8] = (*value).try_into().map_err(|_| { + Error::DbValue(format!( + "invalid mrib_rpf_revalidation_interval \ + value in db: expected 8 bytes, found {}", + value.len() + )) + })?; + u64::from_be_bytes(bytes) + } + }; + Ok(std::time::Duration::from_millis(interval_ms)) + } + + /// Get the RPF revalidation sweep interval (atomic, for the + /// revalidator thread). + pub fn get_mrib_rpf_revalidation_interval_ms(&self) -> Arc { + Arc::clone(&self.config.rpf_revalidation_interval_ms) + } + + /// Get the IPv4 loc-rib mutex (for revalidation). + pub fn rib4_loc(&self) -> Arc> { + Arc::clone(&self.rib4_loc) + } + + /// Get the IPv6 loc-rib mutex (for revalidation). + pub fn rib6_loc(&self) -> Arc> { + Arc::clone(&self.rib6_loc) + } + + /// Get the bestpath fanout atomic (for revalidation). + pub fn bestpath_fanout_atomic(&self) -> Arc { + Arc::clone(&self.config.bestpath_fanout) + } + + pub fn mrib(&self) -> &Mrib { + &self.mrib + } + + pub fn log(&self) -> &Logger { + &self.log + } + /// Register a routing databse watcher. pub fn watch(&self, tag: String, sender: Sender) { write_lock!(self.watchers).push(Watcher { tag, sender }); @@ -651,20 +875,12 @@ impl Db { rib_loc: &mut Rib4, prefix: &Prefix4, ) { - let fanout = self.get_bestpath_fanout().unwrap_or_else(|e| { - rdb_log!( - self, - error, - "failed to get bestpath fanout: {e}"; - "unit" => UNIT_PERSISTENT - ); - NonZeroU8::new(DEFAULT_BESTPATH_FANOUT).unwrap() - }); + let fanout = self.config.bestpath_fanout.load(Ordering::Relaxed); match rib_in.get(prefix) { // rib-in has paths worth evaluating for loc-rib Some(paths) => { - match bestpaths(paths, fanout.get().into()) { + match bestpaths(paths, fanout as usize) { // bestpath found at least 1 path for loc-rib Some(bp) => { rib_loc.insert(*prefix, bp.clone()); @@ -680,6 +896,11 @@ impl Db { rib_loc.remove(prefix); } } + + // Request RPF table rebuild (may be rate-limited). + // Pass the specific prefix for targeted (S,G) revalidation. + self.rpf_table + .trigger_rebuild_v4(Arc::clone(&self.rib4_loc), Some(*prefix)); } pub fn update_rib6_loc( @@ -688,20 +909,12 @@ impl Db { rib_loc: &mut Rib6, prefix: &Prefix6, ) { - let fanout = self.get_bestpath_fanout().unwrap_or_else(|e| { - rdb_log!( - self, - error, - "failed to get bestpath fanout: {e}"; - "unit" => UNIT_PERSISTENT - ); - NonZeroU8::new(DEFAULT_BESTPATH_FANOUT).unwrap() - }); + let fanout = self.config.bestpath_fanout.load(Ordering::Relaxed); match rib_in.get(prefix) { // rib-in has paths worth evaluating for loc-rib Some(paths) => { - match bestpaths(paths, fanout.get().into()) { + match bestpaths(paths, fanout as usize) { // bestpath found at least 1 path for loc-rib Some(bp) => { rib_loc.insert(*prefix, bp.clone()); @@ -717,6 +930,11 @@ impl Db { rib_loc.remove(prefix); } } + + // Request RPF table rebuild (may be rate-limited). + // Pass the specific prefix for targeted (S,G) revalidation. + self.rpf_table + .trigger_rebuild_v6(Arc::clone(&self.rib6_loc), Some(*prefix)); } // generic helper function to kick off a bestpath run for some @@ -1351,10 +1569,60 @@ impl Db { let tree = self.persistent.open_tree(SETTINGS)?; tree.insert(BESTPATH_FANOUT, &[fanout.get()])?; tree.flush()?; + + // Update cached atomic for RPF revalidator + self.config + .bestpath_fanout + .store(fanout.get(), Ordering::Relaxed); + self.trigger_bestpath_when(|_pfx, _paths| true); Ok(()) } + /// Get the minimum interval between poptrie cache rebuilds. + /// + /// This rate-limits how often the poptrie is rebuilt in response to + /// unicast RIB changes. When rate-limited, lookups fall back to linear + /// scan. This is separate from the revalidation sweep interval (see + /// [`Self::set_mrib_rpf_revalidation_interval`]). + pub fn get_mrib_rpf_rebuild_interval( + &self, + ) -> Result { + let tree = self.persistent.open_tree(SETTINGS)?; + let interval_ms = match tree.get(MRIB_RPF_REBUILD_INTERVAL)? { + None => DEFAULT_MRIB_RPF_REBUILD_INTERVAL_MS, + Some(value) => { + let bytes: [u8; 8] = (*value).try_into().map_err(|_| { + Error::DbValue(format!( + "invalid mrib_rpf_rebuild_interval value in db: expected 8 bytes, found {}", + value.len() + )) + })?; + u64::from_be_bytes(bytes) + } + }; + Ok(std::time::Duration::from_millis(interval_ms)) + } + + /// Set the minimum interval between poptrie cache rebuilds. + /// + /// This rate-limits how often the poptrie is rebuilt in response to + /// unicast RIB changes. When rate-limited, lookups fall back to linear + /// scan. + /// + /// This is persisted to the settings tree. + pub fn set_mrib_rpf_rebuild_interval( + &self, + interval: std::time::Duration, + ) -> Result<(), Error> { + let tree = self.persistent.open_tree(SETTINGS)?; + let interval_ms = u64::try_from(interval.as_millis()).unwrap(); + tree.insert(MRIB_RPF_REBUILD_INTERVAL, &interval_ms.to_be_bytes())?; + tree.flush()?; + self.rpf_table.set_rebuild_interval(interval); + Ok(()) + } + pub fn mark_bgp_peer_stale4(&self, peer: PeerId) { let mut rib = lock!(self.rib4_loc); rib.iter_mut().for_each(|(_prefix, path)| { @@ -1399,19 +1667,300 @@ impl Db { }); } - pub fn slot(&self) -> Option { - match self.slot.read() { - Ok(v) => *v, + // ======================================================================== + // MRIB (Multicast RIB) functionality + // ======================================================================== + + /// Update `mrib_loc` by performing RPF verification for a multicast route. + /// + /// For a route to be promoted from `mrib_in` to `mrib_loc`, it must pass + /// Reverse Path Forwarding (RPF) checks: + /// - For (*,G) routes: always promoted (no source to verify) + /// - For (S,G) routes: derive the RPF neighbor from the unicast RIB. + /// If a route to the source exists, install with the derived neighbor. + /// Otherwise, remove from `mrib_loc`. + /// + /// Both cases use atomic operations to avoid races with concurrent route + /// updates (e.g., adding replication targets). + pub fn update_mrib_loc(&self, key: &MulticastRouteKey) { + // (*,G) always installs - no RPF check needed + let Some(source) = key.source() else { + self.mrib.promote_any_source(key); + return; + }; + + // (S,G): derive rpf_neighbor from unicast RIB + let fanout = self.config.bestpath_fanout.load(Ordering::Relaxed); + let rpf_neighbor = self.rpf_table.lookup( + source, + &self.rib4_loc, + &self.rib6_loc, + fanout as usize, + ); + + if rpf_neighbor.is_none() { + debug!( + self.log, + "deselecting (S,G) route: no unicast path to source"; + "key" => %key + ); + } + + // Atomically update mrib_in and mrib_loc + self.mrib.apply_rpf_result(key, rpf_neighbor); + } + + /// Revalidate (S,G) routes against the unicast RIB. + /// + /// When the unicast RIB changes, re-derive `rpf_neighbor` for affected + /// routes. If `event` is provided with a specific prefix, only routes + /// whose source falls within that prefix are revalidated (targeted + /// revalidation). Otherwise, all (S,G) routes are revalidated (full + /// sweep). + /// + /// Uses atomic operations to avoid races with concurrent route updates. + pub(crate) fn revalidate_mrib( + &self, + event: Option, + ) { + let fanout = self.bestpath_fanout_atomic().load(Ordering::Relaxed); + let rib4_loc = self.rib4_loc(); + let rib6_loc = self.rib6_loc(); + + // Get all (S,G) route keys for revalidation + let keys: Vec<_> = self + .mrib + .get_source_specific_keys() + .into_iter() + .filter_map(|key| { + let source = key.source()?; + // Targeted revalidation (skip routes not affected) + if let Some(ref evt) = event + && !evt.matches_source(source) + { + return None; + } + Some((key, source)) + }) + .collect(); + + for (key, source) in keys { + // Re-derive rpf_neighbor from current unicast RIB + let rpf_neighbor = self.rpf_table.lookup( + source, + &rib4_loc, + &rib6_loc, + fanout as usize, + ); + + if rpf_neighbor.is_none() { + debug!( + self.log, + "revalidation: deselecting (S,G) route, no unicast path"; + "key" => %key + ); + } + + // Atomically update mrib_in and mrib_loc + self.mrib.apply_rpf_result(&key, rpf_neighbor); + } + } + + /// Load persisted static multicast routes into `mrib_in` at startup. + /// + /// After loading each route, we perform RPF verification to promote + /// eligible routes to `mrib_loc`. This ensures routes are installed + /// immediately at startup rather than waiting for the next periodic sweep. + fn load_mcast_static_routes(&self) { + let tree = match self.persistent.open_tree(STATIC_MCAST_ROUTES) { + Ok(t) => t, Err(e) => { - error!(self.log, "unable to read switch slot"; "error" => %e); - None + error!( + self.log, + "failed to open static mcast routes tree: {e}" + ); + return; + } + }; + + for result in tree.iter() { + let (_, value) = match result { + Ok(kv) => kv, + Err(e) => { + error!(self.log, "failed to read mcast route: {e}"); + continue; + } + }; + + let value = String::from_utf8_lossy(&value); + let route = match serde_json::from_str::(&value) { + Ok(r) => r, + Err(e) => { + error!(self.log, "failed to deserialize mcast route: {e}"); + continue; + } + }; + + let key = route.key; + if let Err(e) = self.mrib.add_route(route) { + error!( + self.log, + "failed to load mcast route: {e}"; + "key" => %key + ); + continue; } + + // Perform RPF verification and promote to mrib_loc if eligible + self.update_mrib_loc(&key); } } - pub fn set_slot(&mut self, slot: Option) { - let mut value = self.slot.write().unwrap(); - *value = slot; + /// Add static multicast routes to the MRIB. + /// + /// Routes are persisted to disk and added to `mrib_in`. Then + /// `update_mrib_loc` derives `rpf_neighbor` from the unicast RIB and + /// promotes routes to `mrib_loc` if a valid path exists. + /// + /// Uses upsert semantics: existing routes with the same key are updated. + /// This enables idempotent calls from Nexus RPWs. + pub fn add_static_mcast_routes( + &self, + routes: &[MulticastRoute], + ) -> Result<(), Error> { + let tree = self.persistent.open_tree(STATIC_MCAST_ROUTES)?; + + // Pre-serialize keys and values outside the transaction to + // keep fallible serde operations out of the closure. + let entries: Vec<(Vec, String)> = routes + .iter() + .map(|route| { + Ok((route.key.db_key()?, serde_json::to_string(route)?)) + }) + .collect::>()?; + + // Persist atomically before updating the in-memory MRIB, so a + // partial write never leaves config and state out of sync. + tree.transaction(|tx_db| { + for (key, value) in &entries { + tx_db.insert(key.as_slice(), value.as_str())?; + } + Ok(()) + })?; + + tree.flush()?; + + for route in routes { + self.mrib.add_route(route.clone())?; + } + + // Derive rpf_neighbor and promote to `mrib_loc` + for route in routes { + self.update_mrib_loc(&route.key); + } + + Ok(()) + } + + /// Remove static multicast routes from the MRIB. + /// + /// Routes are removed from persistence and both `mrib_in` and `mrib_loc`. + pub fn remove_static_mcast_routes( + &self, + keys: &[MulticastRouteKey], + ) -> Result<(), Error> { + let tree = self.persistent.open_tree(STATIC_MCAST_ROUTES)?; + + let key_bytes: Vec> = keys + .iter() + .map(|key| key.db_key()) + .collect::>()?; + + // Remove from persistence atomically first. + tree.transaction(|tx_db| { + for kb in &key_bytes { + tx_db.remove(kb.as_slice())?; + } + Ok(()) + })?; + + tree.flush()?; + + for key in keys { + self.mrib.remove_route(key)?; + } + Ok(()) + } + + /// Get all static multicast routes from persistence. + pub fn get_static_mcast_routes( + &self, + ) -> Result, Error> { + let tree = self.persistent.open_tree(STATIC_MCAST_ROUTES)?; + let mut routes = Vec::new(); + + for result in tree.iter() { + let (_, value) = result?; + let value = String::from_utf8_lossy(&value); + let route: MulticastRoute = serde_json::from_str(&value)?; + routes.push(route); + } + + Ok(routes) + } + + /// Get a specific multicast route. + pub fn get_mcast_route( + &self, + key: &MulticastRouteKey, + ) -> Option { + self.mrib.get_route(key) + } + + /// Get the full MRIB input table (all routes from all sources). + pub fn full_mrib(&self) -> crate::mrib::MribTable { + self.mrib.full_mrib() + } + + /// Get the local MRIB table (selected/installed routes). + pub fn loc_mrib(&self) -> crate::mrib::MribTable { + self.mrib.loc_mrib() + } + + /// List MRIB routes with filtering, cloning only matching entries. + /// + /// This is more efficient than `full_mrib()`/`loc_mrib()` when filtering + /// is needed, as it clones only the routes that match the filter. + /// + /// Parameters: + /// - `af`: Filter by address family (`None = all`) + /// - `static_only`: Filter by origin (`None = all`, `Some(true) = static`, + /// `Some(false) = dynamic`) + /// - `installed`: If true, query `mrib_loc`; otherwise `mrib_in` + pub fn mrib_list( + &self, + af: Option, + static_only: Option, + installed: bool, + ) -> Vec { + self.mrib.list_routes(af, static_only, installed) + } + + /// Get a specific multicast route from `mrib_loc` (selected/installed). + pub fn get_selected_mcast_route( + &self, + key: &MulticastRouteKey, + ) -> Option { + self.mrib.get_selected_route(key) + } + + /// Register a watcher for MRIB changes. + pub fn watch_mrib( + &self, + tag: String, + sender: Sender, + ) { + self.mrib.watch(tag, sender); } pub fn mark_bgp_peer_stale(&self, peer: PeerId, af: AddressFamily) { @@ -1450,38 +1999,49 @@ impl Reaper { } fn reap(self: &Arc) { - self.rib - .lock() - .unwrap() - .iter_mut() - .for_each(|(_prefix, paths)| { - paths.retain(|p| { - p.bgp - .as_ref() - .map(|b| { - b.stale - .map(|s| { - Utc::now().signed_duration_since(s) - < *lock!(self.stale_max) - }) - .unwrap_or(true) - }) - .unwrap_or(true) - }) - }); + lock!(self.rib).iter_mut().for_each(|(_prefix, paths)| { + paths.retain(|p| { + p.bgp + .as_ref() + .map(|b| { + b.stale + .map(|s| { + Utc::now().signed_duration_since(s) + < *lock!(self.stale_max) + }) + .unwrap_or(true) + }) + .unwrap_or(true) + }) + }); } } #[cfg(test)] mod test { use crate::{ - AddressFamily, DEFAULT_RIB_PRIORITY_STATIC, Path, Prefix, Prefix4, - Prefix6, StaticRouteKey, db::Db, test::TestDb, types::PrefixDbKey, - types::test_helpers::path_vecs_equal, + AddressFamily, DEFAULT_MULTICAST_VNI, DEFAULT_RIB_PRIORITY_STATIC, + Path, Prefix, Prefix4, Prefix6, StaticRouteKey, + db::Db, + test::{TEST_WAIT_ITERATIONS, TestDb}, + types::{ + MulticastAddr, MulticastAddrV4, MulticastAddrV6, MulticastRoute, + MulticastRouteKey, MulticastSourceProtocol, PrefixDbKey, + UnderlayMulticastIpv6, UnicastAddrV4, UnicastAddrV6, + test_helpers::path_vecs_equal, + }, }; use mg_common::log::*; + use mg_common::test::DEFAULT_INTERVAL; + use mg_common::wait_for; use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; use std::str::FromStr; + use std::time::Duration; + + fn test_underlay() -> UnderlayMulticastIpv6 { + UnderlayMulticastIpv6::new(Ipv6Addr::new(0xff04, 0, 0, 0, 0, 0, 0, 1)) + .expect("valid test underlay address") + } fn get_test_db() -> TestDb { let log = init_file_logger("rib.log"); @@ -1801,6 +2361,297 @@ mod test { assert!(db.loc_rib(None).is_empty()); } + #[test] + fn test_mrib_revalidation_on_rib_change() { + // Inlined helper to test revalidation for a given address family. + // `rpf_neighbor` is derived from the unicast RIB. A route is + // selected when the unicast path exists and deselected when + // a unicast path is removed + fn test_af + Copy>( + db: &Db, + s_ip: IpAddr, + prefix: P, + nexthop: IpAddr, + group: MulticastAddr, + ) { + let srk = StaticRouteKey { + prefix: prefix.into(), + nexthop, + vlan_id: None, + rib_priority: DEFAULT_RIB_PRIORITY_STATIC, + }; + db.add_static_routes(&[srk]).unwrap(); + + let key = MulticastRouteKey::new( + Some(s_ip), + group, + DEFAULT_MULTICAST_VNI, + ) + .expect("AF match"); + let route = MulticastRoute::new( + key, + test_underlay(), + MulticastSourceProtocol::Static, + ); + db.add_static_mcast_routes(&[route]).unwrap(); + + // Initially should be selected + wait_for!( + db.get_selected_mcast_route(&key).is_some(), + DEFAULT_INTERVAL, + TEST_WAIT_ITERATIONS, + "(S,G) was not selected initially" + ); + + // Verify `rpf_neighbor` was derived + let selected = db.get_selected_mcast_route(&key).unwrap(); + assert_eq!( + selected.rpf_neighbor, + Some(nexthop), + "rpf_neighbor should be derived from unicast RIB" + ); + + // Remove unicast route; MRIB should be de-selected + db.remove_static_routes(&[srk]).unwrap(); + + wait_for!( + db.get_selected_mcast_route(&key).is_none(), + DEFAULT_INTERVAL, + TEST_WAIT_ITERATIONS, + "(S,G) remained selected after unicast route removed" + ); + + // Re-add unicast route + db.add_static_routes(&[srk]).unwrap(); + + // MRIB should be selected again + wait_for!( + db.get_selected_mcast_route(&key).is_some(), + DEFAULT_INTERVAL, + TEST_WAIT_ITERATIONS, + "(S,G) not re-selected after unicast route restored" + ); + + // Cleanup + db.remove_static_routes(&[srk]).unwrap(); + db.remove_static_mcast_routes(&[key]).unwrap(); + } + + let log = init_file_logger("mrib_reval.log"); + let db = + crate::test::get_test_db("mrib_reval", log).expect("create db"); + db.set_mrib_rpf_rebuild_interval(std::time::Duration::ZERO) + .unwrap(); + + // IPv4 + test_af( + &db, + IpAddr::V4(Ipv4Addr::new(192, 0, 2, 10)), + "192.0.2.0/24".parse::().unwrap(), + IpAddr::V4(Ipv4Addr::new(198, 51, 100, 1)), + MulticastAddr::new_v4(225, 1, 1, 1).expect("valid mcast"), + ); + + // IPv6 + test_af( + &db, + IpAddr::V6(Ipv6Addr::new(0x2001, 0xdb8, 0, 0, 0, 0, 0, 10)), + "2001:db8::/32".parse::().unwrap(), + IpAddr::V6(Ipv6Addr::new(0xfe80, 0, 0, 0, 0, 0, 0, 1)), + MulticastAddr::new_v6([0xff0e, 0, 0, 0, 0, 0, 0, 1]) + .expect("valid mcast"), + ); + } + + /// Test (*,G) vs (S,G) selection behavior. + /// + /// - (*,G) routes are always selected (no RPF check needed) + /// - (S,G) routes require a unicast route to the source for RPF + #[test] + fn test_mrib_any_source_vs_source_specific() { + let log = init_file_logger("mrib_asm_ssm.log"); + let db = + crate::test::get_test_db("mrib_asm_ssm", log).expect("create db"); + db.set_mrib_rpf_rebuild_interval(Duration::ZERO).unwrap(); + + // Case: (*,G) with ASM address goes to `mrib_loc` immediately + // (no unicast route needed) + let asm_group = + MulticastAddr::new_v4(225, 5, 5, 5).expect("valid mcast"); + let star_g_key = MulticastRouteKey::any_source(asm_group); + let star_g_route = MulticastRoute::new( + star_g_key, + test_underlay(), + MulticastSourceProtocol::Static, + ); + + db.add_static_mcast_routes(&[star_g_route]).unwrap(); + + // (*,G) should be in both `mrib_in` AND `mrib_loc` immediately + wait_for!( + db.get_selected_mcast_route(&star_g_key).is_some(), + DEFAULT_INTERVAL, + TEST_WAIT_ITERATIONS, + "(*,G) should be in mrib_loc immediately" + ); + assert!( + db.get_mcast_route(&star_g_key).is_some(), + "(*,G) should also be in mrib_in" + ); + + // Case: (S,G) with SSM address (232.x) - requires unicast route + let ssm_group = MulticastAddrV4::new(Ipv4Addr::new(232, 1, 1, 1)) + .expect("valid mcast"); // SSM range + let source = UnicastAddrV4::new(Ipv4Addr::new(10, 0, 0, 100)) + .expect("valid unicast"); + let sg_key = MulticastRouteKey::source_specific_v4(source, ssm_group); + let sg_route = MulticastRoute::new( + sg_key, + test_underlay(), + MulticastSourceProtocol::Static, + ); + + db.add_static_mcast_routes(&[sg_route]).unwrap(); + + // (S,G) should be in `mrib_in` but NOT in `mrib_loc` yet + assert!( + db.get_mcast_route(&sg_key).is_some(), + "(S,G) should be in mrib_in" + ); + assert!( + db.get_selected_mcast_route(&sg_key).is_none(), + "(S,G) should NOT be in mrib_loc without unicast route" + ); + + // Add unicast route to source, now (S,G) should be selected + let srk = StaticRouteKey { + prefix: "10.0.0.0/24".parse::().unwrap().into(), + nexthop: IpAddr::V4(Ipv4Addr::new(198, 51, 100, 1)), + vlan_id: None, + rib_priority: DEFAULT_RIB_PRIORITY_STATIC, + }; + db.add_static_routes(&[srk]).unwrap(); + + wait_for!( + db.get_selected_mcast_route(&sg_key).is_some(), + DEFAULT_INTERVAL, + TEST_WAIT_ITERATIONS, + "(S,G) should be selected after adding unicast route" + ); + + // Case: IPv6 (*,G) with global scope - goes to `mrib_loc` immediately + let v6_group = + MulticastAddr::new_v6([0xff0e, 0, 0, 0, 0, 0, 0, 0x5555]) + .expect("valid mcast"); + let v6_star_g_key = MulticastRouteKey::any_source(v6_group); + let v6_star_g_route = MulticastRoute::new( + v6_star_g_key, + test_underlay(), + MulticastSourceProtocol::Static, + ); + + db.add_static_mcast_routes(&[v6_star_g_route]).unwrap(); + + wait_for!( + db.get_selected_mcast_route(&v6_star_g_key).is_some(), + DEFAULT_INTERVAL, + TEST_WAIT_ITERATIONS, + "IPv6 (*,G) should be selected immediately" + ); + + // Case: IPv6 (S,G) with SSM address (ff3e::) + let v6_ssm_group = MulticastAddrV6::new(Ipv6Addr::new( + 0xff3e, 0, 0, 0, 0, 0, 0, 0x1234, + )) + .expect("valid mcast"); + let v6_source = UnicastAddrV6::new(Ipv6Addr::new( + 0x2001, 0xdb8, 0, 0, 0, 0, 0, 0x100, + )) + .expect("valid unicast"); + let v6_sg_key = + MulticastRouteKey::source_specific_v6(v6_source, v6_ssm_group); + let v6_sg_route = MulticastRoute::new( + v6_sg_key, + test_underlay(), + MulticastSourceProtocol::Static, + ); + + db.add_static_mcast_routes(&[v6_sg_route]).unwrap(); + + // IPv6 (S,G) should be in `mrib_in` but NOT in `mrib_loc` yet + // (operations are synchronous, no sleep needed) + assert!( + db.get_mcast_route(&v6_sg_key).is_some(), + "IPv6 (S,G) should be in mrib_in" + ); + assert!( + db.get_selected_mcast_route(&v6_sg_key).is_none(), + "IPv6 (S,G) should NOT be in mrib_loc without unicast route" + ); + + // Add unicast route + let v6_srk = StaticRouteKey { + prefix: "2001:db8::/32".parse::().unwrap().into(), + nexthop: IpAddr::V6(Ipv6Addr::new(0xfe80, 0, 0, 0, 0, 0, 0, 1)), + vlan_id: None, + rib_priority: DEFAULT_RIB_PRIORITY_STATIC, + }; + db.add_static_routes(&[v6_srk]).unwrap(); + + wait_for!( + db.get_selected_mcast_route(&v6_sg_key).is_some(), + DEFAULT_INTERVAL, + TEST_WAIT_ITERATIONS, + "IPv6 (S,G) should be selected after adding unicast route" + ); + + // Cleanup + db.remove_static_routes(&[srk, v6_srk]).unwrap(); + db.remove_static_mcast_routes(&[ + star_g_key, + sg_key, + v6_star_g_key, + v6_sg_key, + ]) + .unwrap(); + } + + #[test] + fn test_mrib_static_persistence() { + let db_path = "/tmp/mrib_persist_test.db"; + let _ = std::fs::remove_dir_all(db_path); + + let group = MulticastAddr::new_v4(225, 2, 2, 2).expect("valid mcast"); + let key = MulticastRouteKey::any_source(group); + let route = MulticastRoute::new( + key, + test_underlay(), + MulticastSourceProtocol::Static, + ); + + // Create Db and add static multicast route + { + let log = init_file_logger("mrib_persist1.log"); + let db = Db::new(db_path, log).expect("create db"); + db.add_static_mcast_routes(std::slice::from_ref(&route)) + .expect("add static mcast route"); + assert_eq!(db.get_static_mcast_routes().unwrap().len(), 1); + assert!(db.get_mcast_route(&key).is_some()); + } + + // Reopen Db and verify route was loaded from persistence + { + let log = init_file_logger("mrib_persist2.log"); + let db = Db::new(db_path, log).expect("reopen db"); + assert_eq!(db.full_mrib().len(), 1); + assert!(db.get_mcast_route(&key).is_some()); + assert_eq!(db.get_static_mcast_routes().unwrap().len(), 1); + } + + // Cleanup + let _ = std::fs::remove_dir_all(db_path); + } + #[test] fn test_static_routing_ipv4_basic() { let db = get_test_db(); diff --git a/rdb/src/error.rs b/rdb/src/error.rs index e562a36e..fab696e2 100644 --- a/rdb/src/error.rs +++ b/rdb/src/error.rs @@ -2,6 +2,8 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +// Copyright 2026 Oxide Computer Company + #[derive(thiserror::Error, Debug)] pub enum Error { #[error("datastore error {0}")] @@ -13,6 +15,9 @@ pub enum Error { #[error("serialization error {0}")] Serialization(#[from] serde_json::Error), + #[error("io error {0}")] + Io(#[from] std::io::Error), + #[error("db key error {0}")] DbKey(String), @@ -24,4 +29,10 @@ pub enum Error { #[error("Parsing error {0}")] Parsing(String), + + #[error("Not found: {0}")] + NotFound(String), + + #[error("Validation error: {0}")] + Validation(String), } diff --git a/rdb/src/lib.rs b/rdb/src/lib.rs index 58a494f0..bd606767 100644 --- a/rdb/src/lib.rs +++ b/rdb/src/lib.rs @@ -2,10 +2,14 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +// Copyright 2026 Oxide Computer Company + pub mod db; +pub mod mrib; pub mod types; pub use db::Db; +pub use mrib::Mrib; pub use types::*; pub mod bestpath; pub mod error; diff --git a/rdb/src/mrib/mod.rs b/rdb/src/mrib/mod.rs new file mode 100644 index 00000000..87caf8b6 --- /dev/null +++ b/rdb/src/mrib/mod.rs @@ -0,0 +1,637 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Copyright 2026 Oxide Computer Company + +//! Multicast Routing Information Base (MRIB). +//! +//! The MRIB manages in-memory multicast routing state, including: +//! - (*,G) entries (any-source multicast) +//! - (S,G) entries (source-specific multicast) +//! - Replication targets (local interfaces and remote nexthops) +//! - TODO: IGMP/MLD-learned routes (dynamic) +//! +//! ## Lock Ordering +//! +//! When acquiring multiple locks, we acquire them in this ordering: +//! 1. `mrib_in` +//! 2. `mrib_loc` +//! 3. `watchers` + +use std::collections::BTreeMap; +use std::collections::btree_map::Entry; +use std::net::IpAddr; +use std::sync::atomic::Ordering; +use std::sync::mpsc::{self, RecvTimeoutError, Sender}; +use std::sync::{Arc, Mutex, RwLock}; +use std::thread; +use std::time::Duration; + +use slog::{Logger, error, info}; + +use mg_common::{lock, read_lock, write_lock}; + +use crate::error::Error; +use crate::types::{ + AddressFamily, MribChangeNotification, MulticastAddr, MulticastRoute, + MulticastRouteKey, MulticastSourceProtocol, +}; + +pub mod rpf; + +// Re-export from rpf module +pub use rpf::DEFAULT_REVALIDATION_INTERVAL; + +/// The MRIB table type: maps multicast route keys to route entries. +/// Each entry maps a [MulticastRouteKey] to a [MulticastRoute]. +pub type MribTable = BTreeMap; + +/// The Multicast Routing Information Base. +/// +/// Pure in-memory multicast routing tables, matching the unicast RIB pattern. +/// Persistence is handled by [`crate::Db`]. +/// +/// The MRIB maintains two tables: +/// - `mrib_in`: All multicast routes from all sources (static, IGMP) +/// - `mrib_loc`: Selected routes that pass Reverse Path Forwarding (RPF) +/// checks and are installed in the data plane. +/// +/// Note: `(*,G)` routes have no source address, so they always pass +/// to `mrib_loc` immediately (RPF only applies to `(S,G)` routes). +#[derive(Clone)] +pub struct Mrib { + /// All multicast routes from all sources (static, IGMP). + mrib_in: Arc>, + + /// Selected multicast routes that have passed RPF verification. + mrib_loc: Arc>, + + /// Watchers notified of MRIB changes. + watchers: Arc>>, + + log: Logger, +} + +#[derive(Clone)] +struct MribWatcher { + tag: String, + sender: Sender, +} + +impl Mrib { + pub fn new(log: Logger) -> Self { + Self { + mrib_in: Arc::new(Mutex::new(MribTable::new())), + mrib_loc: Arc::new(Mutex::new(MribTable::new())), + watchers: Arc::new(RwLock::new(Vec::new())), + log, + } + } + + /// Register a watcher for MRIB changes. + pub fn watch(&self, tag: String, sender: Sender) { + write_lock!(self.watchers).push(MribWatcher { tag, sender }); + } + + /// Remove a watcher by tag. + pub fn unwatch(&self, tag: &str) { + write_lock!(self.watchers).retain(|w| w.tag != tag); + } + + /// Notify all watchers of MRIB changes. + /// + /// Automatically removes watchers whose channels have been closed. + /// + /// This function releases the lock before sending to avoid potential + /// deadlocks if a watcher's receiver calls back into the MRIB. + fn notify(&self, n: MribChangeNotification) { + // Snapshot watchers under lock, then release before sending + let snapshot: Vec<_> = + read_lock!(self.watchers).iter().cloned().collect(); + + // Send to all watchers (lock released, no deadlock risk) + let mut dead_tags = Vec::new(); + for MribWatcher { tag, sender } in &snapshot { + if let Err(e) = sender.send(n.clone()) { + error!(self.log, "watcher '{tag}' disconnected, removing: {e}"); + dead_tags.push(tag.clone()); + } + } + + // Remove dead watchers + if !dead_tags.is_empty() { + write_lock!(self.watchers).retain(|w| !dead_tags.contains(&w.tag)); + } + } + + /// Get a copy of the full MRIB input table (all routes from all sources). + pub fn full_mrib(&self) -> MribTable { + lock!(self.mrib_in).clone() + } + + /// Get a copy of the local MRIB table (selected/installed routes). + pub fn loc_mrib(&self) -> MribTable { + lock!(self.mrib_loc).clone() + } + + /// List routes with filtering, cloning only matching entries. + /// + /// Arguments: + /// - `af`: Filter by address family (`None = all`) + /// - `static_only`: Filter by origin (`None = all`, `Some(true) = static`, + /// Some(false) = dynamic) + /// - `installed`: If true, query `mrib_loc`; otherwise `mrib_in` + pub fn list_routes( + &self, + af: Option, + static_only: Option, + installed: bool, + ) -> Vec { + let filter = |route: &&MulticastRoute| -> bool { + // Address family filter + let af_match = match af { + None => true, + Some(AddressFamily::Ipv4) => { + matches!(route.key.group(), MulticastAddr::V4(_)) + } + Some(AddressFamily::Ipv6) => { + matches!(route.key.group(), MulticastAddr::V6(_)) + } + }; + // Origin filter + let origin_match = match static_only { + None => true, + Some(true) => { + matches!(route.source, MulticastSourceProtocol::Static) + } + Some(false) => { + !matches!(route.source, MulticastSourceProtocol::Static) + } + }; + af_match && origin_match + }; + + if installed { + lock!(self.mrib_loc) + .values() + .filter(filter) + .cloned() + .collect() + } else { + lock!(self.mrib_in) + .values() + .filter(filter) + .cloned() + .collect() + } + } + + /// Get a specific multicast route from `mrib_in`. + /// + /// Returns a cloned [MulticastRoute], if present. + pub fn get_route(&self, key: &MulticastRouteKey) -> Option { + lock!(self.mrib_in).get(key).cloned() + } + + /// Get a specific multicast route from `mrib_loc` (selected/installed). + /// + /// Returns a cloned [MulticastRoute], if present. + pub fn get_selected_route( + &self, + key: &MulticastRouteKey, + ) -> Option { + lock!(self.mrib_loc).get(key).cloned() + } + + /// Atomically promote a (*,G) route from `mrib_in` to `mrib_loc`. + /// + /// (*,G) routes have no source address, so they always pass RPF checks. + /// This method atomically copies the fresh route data to `mrib_loc`, + /// avoiding races with concurrent route updates. + /// + /// Returns `true` if the route was found and promoted. + pub(crate) fn promote_any_source(&self, key: &MulticastRouteKey) -> bool { + let changed = { + let mrib_in = lock!(self.mrib_in); + let mut mrib_loc = lock!(self.mrib_loc); + + let Some(route) = mrib_in.get(key) else { + return false; + }; + + match mrib_loc.entry(*key) { + Entry::Occupied(mut e) => { + let unchanged = e.get().rpf_neighbor == route.rpf_neighbor + && e.get().underlay_group == route.underlay_group + && e.get().source == route.source; + if !unchanged { + e.insert(route.clone()); + } + !unchanged + } + Entry::Vacant(e) => { + e.insert(route.clone()); + true + } + } + }; + + if changed { + self.notify(MribChangeNotification::from(*key)); + } + true + } + + /// Apply an RPF verification result atomically for (S,G) routes. + /// + /// Updates `rpf_neighbor` in `mrib_in` (so API queries show the derived + /// neighbor) and then promotes/removes the fresh route to/from `mrib_loc`. + /// + /// By holding both locks and re-fetching from `mrib_in`, we avoid a race + /// where concurrent route updates (e.g., adding underlay nexthops) could + /// be lost if we used a stale snapshot. + /// + /// Only notifies watchers if `mrib_loc` actually changed. + pub(crate) fn apply_rpf_result( + &self, + key: &MulticastRouteKey, + neighbor: Option, + ) { + let changed = { + let mut mrib_in = lock!(self.mrib_in); + let mut mrib_loc = lock!(self.mrib_loc); + + match mrib_in.get_mut(key) { + None => { + // Route removed from mrib_in, ensure gone from mrib_loc + mrib_loc.remove(key).is_some() + } + Some(route) => { + // Update rpf_neighbor in mrib_in + route.rpf_neighbor = neighbor; + + // Promote or remove from mrib_loc based on RPF result + if neighbor.is_some() { + match mrib_loc.entry(*key) { + Entry::Occupied(mut e) => { + let unchanged = e.get().rpf_neighbor + == route.rpf_neighbor + && e.get().underlay_group + == route.underlay_group + && e.get().source == route.source; + if !unchanged { + e.insert(route.clone()); + } + !unchanged + } + Entry::Vacant(e) => { + e.insert(route.clone()); + true + } + } + } else { + // No unicast route to source -> remove from mrib_loc + mrib_loc.remove(key).is_some() + } + } + } + }; + + if changed { + self.notify(MribChangeNotification::from(*key)); + } + } + + /// Add or update a multicast route in `mrib_in`. + /// + /// The route is added to `mrib_in` only. The caller (`Db`) is responsible + /// for calling [`crate::Db::update_mrib_loc()`] to perform RPF verification + /// and potentially promote the route to `mrib_loc`. + /// + /// Accepts a full [MulticastRoute]. + pub fn add_route(&self, route: MulticastRoute) -> Result<(), Error> { + let key = route.key; + let changed = { + let mut mrib_in = lock!(self.mrib_in); + let changed = match mrib_in.get(&key) { + Some(existing) => { + // Check if route actually changed + existing.rpf_neighbor != route.rpf_neighbor + || existing.source != route.source + || existing.underlay_group != route.underlay_group + } + None => true, // New route + }; + mrib_in.insert(key, route); + changed + }; + + if changed { + self.notify(MribChangeNotification::from(key)); + } + Ok(()) + } + + /// Remove a multicast route from both `mrib_in` and `mrib_loc`. + pub fn remove_route(&self, key: &MulticastRouteKey) -> Result { + // Acquire both locks following documented order to ensure atomicity + let removed = { + let mut mrib_in = lock!(self.mrib_in); + let mut mrib_loc = lock!(self.mrib_loc); + let removed_in = mrib_in.remove(key).is_some(); + let removed_loc = mrib_loc.remove(key).is_some(); + removed_in || removed_loc + }; + + if removed { + self.notify(MribChangeNotification::from(*key)); + } + Ok(removed) + } + + /// Get all routes for a specific multicast group from `mrib_in`. + pub fn get_routes_for_group( + &self, + group: &MulticastAddr, + ) -> Vec { + lock!(self.mrib_in) + .values() + .filter(|route| &route.key.group() == group) + .cloned() + .collect() + } + + /// Get all routes with a specific source from `mrib_in`. + pub fn get_routes_for_source( + &self, + source: &IpAddr, + ) -> Vec { + lock!(self.mrib_in) + .values() + .filter(|route| route.key.source().as_ref() == Some(source)) + .cloned() + .collect() + } + + /// Get all any-source (*,G) routes from `mrib_in`. + pub fn get_any_source_routes(&self) -> Vec { + lock!(self.mrib_in) + .values() + .filter(|route| route.key.source().is_none()) + .cloned() + .collect() + } + + /// Get keys for all source-specific (S,G) routes. + pub fn get_source_specific_keys(&self) -> Vec { + lock!(self.mrib_in) + .keys() + .filter(|key| key.source().is_some()) + .copied() + .collect() + } +} + +/// Spawn the RPF revalidator background thread. +/// +/// Listens for RPF cache rebuild events and re-checks RPF validity for all +/// source-specific (S,G) multicast routes. Routes that pass RPF validation +/// are installed in `mrib_loc`, while routes that fail are removed. +/// +/// Returns the sender for rebuild events if spawn succeeded, `None` if failed. +/// The caller should only install the notifier in `RpfTable` if this returns +/// `Some`, ensuring the channel receiver is actually running. +pub(crate) fn spawn_rpf_revalidator( + db: crate::Db, +) -> Option> { + let err_log = db.log().clone(); + let sweep_interval_ms = db.get_mrib_rpf_revalidation_interval_ms(); + let (tx, rx) = mpsc::channel::(); + + match thread::Builder::new() + .name("rpf-revalidator".to_string()) + .spawn(move || { + loop { + let ms = sweep_interval_ms.load(Ordering::Relaxed); + let timeout = if ms == 0 { + DEFAULT_REVALIDATION_INTERVAL + } else { + Duration::from_millis(ms) + }; + + // Wait for an event or timeout + let first_event = match rx.recv_timeout(timeout) { + Ok(evt) => Some(evt), + Err(RecvTimeoutError::Timeout) => None, + Err(RecvTimeoutError::Disconnected) => break, + }; + + // Drain any queued events to avoid redundant full MRIB scans + // when many events arrive in quick succession. + let mut extra_events = 0usize; + while rx.try_recv().is_ok() { + extra_events += 1; + } + + // If we coalesced multiple events, do a full sweep. + // Otherwise use the specific event for targeted revalidation. + let event = if extra_events > 0 { None } else { first_event }; + db.revalidate_mrib(event); + } + info!(db.log(), "rpf revalidator shutting down"); + }) { + Ok(_) => Some(tx), + Err(e) => { + error!(err_log, "failed to spawn rpf-revalidator: {e}"); + None + } + } +} + +#[cfg(test)] +mod test { + use super::*; + + use std::net::{Ipv4Addr, Ipv6Addr}; + + use mg_common::log::*; + + use crate::types::{ + MulticastAddrV4, MulticastAddrV6, UnderlayMulticastIpv6, UnicastAddrV4, + UnicastAddrV6, + }; + + fn test_underlay() -> UnderlayMulticastIpv6 { + UnderlayMulticastIpv6::new(Ipv6Addr::new(0xff04, 0, 0, 0, 0, 0, 0, 1)) + .expect("valid test underlay address") + } + + #[test] + fn test_mrib_basic() { + let log = init_file_logger("mrib_test.log"); + let mrib = Mrib::new(log); + + // Test ASM route (*,G) + let group = MulticastAddr::new_v4(225, 1, 1, 1).expect("valid mcast"); + let key = MulticastRouteKey::any_source(group); + let route = MulticastRoute::new( + key, + test_underlay(), + MulticastSourceProtocol::Static, + ); + + mrib.add_route(route.clone()).expect("add route"); + assert!(mrib.get_route(&key).is_some()); + + // Test source-specific multicast route (S,G) + let source = UnicastAddrV4::new(Ipv4Addr::new(10, 0, 0, 1)) + .expect("valid unicast"); + let group_v4 = MulticastAddrV4::new(Ipv4Addr::new(225, 1, 1, 1)) + .expect("valid mcast"); + let key_sg = MulticastRouteKey::source_specific_v4(source, group_v4); + let route_sg = MulticastRoute::new( + key_sg, + test_underlay(), + MulticastSourceProtocol::Static, + ); + + mrib.add_route(route_sg.clone()).expect("add S,G route"); + assert_eq!(mrib.full_mrib().len(), 2); + + // Test queries + let group_routes = mrib.get_routes_for_group(&group); + assert_eq!(group_routes.len(), 2); + + let any_source = mrib.get_any_source_routes(); + assert_eq!(any_source.len(), 1); + + let source_specific = mrib.get_source_specific_keys(); + assert_eq!(source_specific.len(), 1); + + // Test removal + mrib.remove_route(&key).expect("remove *,G route"); + assert_eq!(mrib.full_mrib().len(), 1); + assert!(mrib.get_route(&key).is_none()); + } + + #[test] + fn test_mrib_watchers() { + use std::sync::mpsc::channel; + + let log = init_file_logger("mrib_watcher_test.log"); + let mrib = Mrib::new(log); + + // Register watcher + let (tx, rx) = channel(); + mrib.watch("test-watcher".to_string(), tx); + + // Add a route and verify notification + let group = MulticastAddr::new_v4(225, 3, 3, 3).expect("valid mcast"); + let key = MulticastRouteKey::any_source(group); + let route = MulticastRoute::new( + key, + test_underlay(), + MulticastSourceProtocol::Static, + ); + + mrib.add_route(route.clone()).expect("add route"); + + // Should receive notification + let notification = rx.recv().expect("receive notification"); + assert_eq!(notification.changed.len(), 1); + assert!(notification.changed.contains(&key)); + + // Remove route and verify notification + mrib.remove_route(&key).expect("remove route"); + + let notification = rx.recv().expect("receive notification"); + assert_eq!(notification.changed.len(), 1); + assert!(notification.changed.contains(&key)); + } + + #[test] + fn test_mrib_in_vs_loc() { + let log = init_file_logger("mrib_in_loc_test.log"); + let mrib = Mrib::new(log); + + // Add a (*,G) route to mrib_in only + let group = MulticastAddr::new_v4(225, 4, 4, 4).expect("valid mcast"); + let key = MulticastRouteKey::any_source(group); + let route = MulticastRoute::new( + key, + test_underlay(), + MulticastSourceProtocol::Static, + ); + + mrib.add_route(route.clone()).expect("add route"); + + // Verify route exists in `mrib_in` but not in `mrib_loc` + assert_eq!(mrib.full_mrib().len(), 1); + assert_eq!(mrib.loc_mrib().len(), 0); + assert!(mrib.get_route(&key).is_some()); + assert!(mrib.get_selected_route(&key).is_none()); + + // Promote (*,G) route to `mrib_loc` + assert!(mrib.promote_any_source(&key)); + + // Now verify route exists in both tables + assert_eq!(mrib.full_mrib().len(), 1); + assert_eq!(mrib.loc_mrib().len(), 1); + assert!(mrib.get_route(&key).is_some()); + assert!(mrib.get_selected_route(&key).is_some()); + + // Remove route completely (from both tables) + mrib.remove_route(&key).expect("remove route"); + assert_eq!(mrib.full_mrib().len(), 0); + assert_eq!(mrib.loc_mrib().len(), 0); + assert!(mrib.get_route(&key).is_none()); + assert!(mrib.get_selected_route(&key).is_none()); + } + + #[test] + fn test_mrib_ipv6_groups() { + let log = init_file_logger("mrib_v6_test.log"); + let mrib = Mrib::new(log); + + // IPv6 ASM route (*,G) + let group = MulticastAddr::new_v6([0xff0e, 0, 0, 0, 0, 0, 0, 1]) + .expect("valid mcast"); + let key = MulticastRouteKey::any_source(group); + let route = MulticastRoute::new( + key, + test_underlay(), + MulticastSourceProtocol::Static, + ); + + mrib.add_route(route.clone()).expect("add v6 route"); + assert!(mrib.get_route(&key).is_some()); + + // IPv6 source-specific multicast route (S,G) + let source = + UnicastAddrV6::new(Ipv6Addr::new(0x2001, 0xdb8, 0, 0, 0, 0, 0, 1)) + .expect("valid unicast"); + let group_v6 = + MulticastAddrV6::new(Ipv6Addr::new(0xff0e, 0, 0, 0, 0, 0, 0, 1)) + .expect("valid mcast"); + let key_sg = MulticastRouteKey::source_specific_v6(source, group_v6); + let route_sg = MulticastRoute::new( + key_sg, + test_underlay(), + MulticastSourceProtocol::Static, + ); + + mrib.add_route(route_sg).expect("add v6 S,G route"); + assert_eq!(mrib.full_mrib().len(), 2); + + // Verify address family filtering + let v6_routes: Vec<_> = + mrib.get_routes_for_group(&group).into_iter().collect(); + assert_eq!(v6_routes.len(), 2); + + // Cleanup + mrib.remove_route(&key).expect("remove *,G"); + mrib.remove_route(&key_sg).expect("remove S,G"); + assert_eq!(mrib.full_mrib().len(), 0); + } +} diff --git a/rdb/src/mrib/rpf.rs b/rdb/src/mrib/rpf.rs new file mode 100644 index 00000000..76b8870d --- /dev/null +++ b/rdb/src/mrib/rpf.rs @@ -0,0 +1,1274 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Copyright 2026 Oxide Computer Company + +//! [Reverse Path Forwarding][RPF] (RPF) verification for multicast routing. +//! +//! RPF verification ensures that multicast packets arrive from the expected +//! upstream direction, preventing loops in multicast distribution trees. +//! See [RFD 488] for the overall multicast routing design. +//! +//! This module provides an optimized RPF implementation using Oxide's +//! [poptrie] implementation for O(1) longest-prefix matching (LPM), with a lazy +//! rebuild strategy and fallback to linear scan during rebuilds. RPF lookups +//! happen frequently during multicast route installation and unicast RIB +//! changes, requiring LPM against the unicast RIB. +//! +//! ## (S,G) vs (*,G) Routes +//! +//! RPF verification only applies to (S,G) routes where a specific source +//! address is known. The source address is looked up in the unicast RIB to +//! find the expected upstream neighbor(s). +//! +//! (*,G) routes have no source address to verify, so RPF is skipped entirely +//! and routes are directly "installed." +//! +//! ## Revalidator Integration +//! +//! The RPF revalidator (spawned in `db.rs`) listens for rebuild events and +//! re-checks (S,G) routes when unicast RIB changes. Lock ordering: +//! +//! 1. Revalidator reads unicast RIB (`rib4_loc`/`rib6_loc`) +//! 2. Revalidator writes MRIB (`mrib_in`/`mrib_loc`) +//! +//! This matches the lock order in `mrib/mod.rs`. RPF lookups hold at most one +//! lock at a time: they try poptrie first (read lock), release it, then fall +//! back to linear scan (RIB lock) if needed. No path holds both locks. +//! +//! ## Lock Poisoning +//! +//! The poptrie cache uses asymmetric poison handling: +//! +//! - **Write side** (background rebuild thread): Panics on poison via +//! `write_lock!`. +//! +//! - **Read side**: Uses `.ok()` for graceful fallback to linear +//! scan if the cache is poisoned. This avoids crashing during RPF checks +//! while the linear scan fallback remains intact. +//! +//! Once an `RwLock` is poisoned, it **cannot be unpoisoned**. +//! Subsequent rebuild attempts will also panic on `write_lock!`, so the cache +//! remains permanently disabled. Reads continue to work via the +//! linear-scan fallback, keeping the system functional until SMF restarts the +//! daemon (which is the normal recovery path here). +//! +//! ## Threading Model +//! +//! Poptrie cache rebuilds run in short-lived, named background threads +//! ("rpf-poptrie-v4"/"rpf-poptrie-v6"). These threads are fire-and-forget: +//! we deliberately drop their `JoinHandle`s. If a rebuild thread panics, the +//! cache is simply not updated and RPF verification transparently falls back +//! to the linear-scan path until the next successful rebuild. +//! +//! [RPF]: https://datatracker.ietf.org/doc/html/rfc5110 +//! [RFD 488]: https://rfd.shared.oxide.computer/rfd/0488 +//! [poptrie]: https://conferences.sigcomm.org/sigcomm/2015/pdf/papers/p57.pdf + +use std::collections::{BTreeMap, BTreeSet}; +use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; +use std::sync::atomic::{AtomicU8, AtomicU64, Ordering}; +use std::sync::{Arc, Mutex, RwLock, mpsc}; +use std::thread; +use std::time::{Duration, Instant}; + +use poptrie::Poptrie; +use slog::{Logger, debug, error}; + +use mg_common::{lock, write_lock}; + +use crate::bestpath::bestpaths; +use crate::db::{Rib4, Rib6}; +use crate::types::{Path, Prefix, PrefixContains}; +use crate::{Prefix4, Prefix6}; + +/// Default interval for periodic RPF revalidation sweeps. +pub const DEFAULT_REVALIDATION_INTERVAL: Duration = Duration::from_secs(60); + +/// Event emitted when RPF revalidation is needed. +/// +/// This is emitted when a poptrie rebuild completes, or when a rebuild +/// request was rate-limited but multicast RPF revalidation should still +/// proceed using the linear-scan fallback. +/// +/// The optional prefix ([`Prefix4`]/[`Prefix6`]) indicates which unicast route +/// changed, enabling targeted (S,G) revalidation. If `None`, a full sweep is +/// performed. +#[derive(Clone, Copy, Debug)] +pub(crate) enum RebuildEvent { + /// IPv4 unicast routing changed. If a prefix is provided, only (S,G) + /// routes with sources matching that prefix need revalidation. + V4(Option), + /// IPv6 unicast routing changed. If a prefix is provided, only (S,G) + /// routes with sources matching that prefix need revalidation. + V6(Option), +} + +impl RebuildEvent { + /// Convert to a full-sweep event (no specific prefix). + /// + /// Used when multiple prefixes may have changed during pending rebuilds. + fn to_full_sweep(self) -> Self { + match self { + Self::V4(_) => Self::V4(None), + Self::V6(_) => Self::V6(None), + } + } + + /// Check if a source address is potentially affected by this event. + /// + /// Returns true if the source falls within the changed prefix (targeted), + /// or if no specific prefix is provided (full sweep). + pub(crate) fn matches_source(&self, source: IpAddr) -> bool { + match (source, self) { + (src, RebuildEvent::V4(Some(prefix))) => { + Prefix::V4(*prefix).contains(src).is_some() + } + (src, RebuildEvent::V6(Some(prefix))) => { + Prefix::V6(*prefix).contains(src).is_some() + } + // No specific prefix = full sweep for this AF + (IpAddr::V4(_), RebuildEvent::V4(None)) => true, + (IpAddr::V6(_), RebuildEvent::V6(None)) => true, + // Wrong AF = skip + (IpAddr::V4(_), RebuildEvent::V6(_)) => false, + (IpAddr::V6(_), RebuildEvent::V4(_)) => false, + } + } +} + +/// Set of paths for a prefix, stored in the poptrie cache. +/// +/// We store full [`Path`] objects (not just nexthops) so that we can apply +/// bestpath selection at lookup time. This ensures consistent behavior +/// between the poptrie fast path and linear scan fallback, regardless of +/// the configured fanout value. +pub(crate) type CachedPaths = BTreeSet; + +/// State machine for coordinating poptrie rebuilds. +#[repr(u8)] +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +enum RebuildState { + /// No rebuild in progress. + Idle = 0, + /// Rebuild thread is running. + Running = 1, + /// Rebuild thread is running and more changes arrived. + RunningPending = 2, +} + +/// Coordinator for poptrie rebuild threads. +/// +/// Provides atomic state transitions to prevent race conditions where +/// pending work could be missed between checking the pending flag and +/// releasing the in-progress lock. +#[derive(Debug)] +struct RebuildCoordinator(AtomicU8); + +impl RebuildCoordinator { + /// Create a new coordinator in the idle state. + fn new() -> Self { + Self(AtomicU8::new(RebuildState::Idle as u8)) + } + + /// Try to start a rebuild. Returns `true` if this thread should do work. + /// + /// Atomically transitions `Idle → Running`. + fn try_start(&self) -> bool { + self.0 + .compare_exchange( + RebuildState::Idle as u8, + RebuildState::Running as u8, + Ordering::AcqRel, + Ordering::Acquire, + ) + .is_ok() + } + + /// Signal that more work arrived while a rebuild is in progress. + /// + /// Atomically transitions `Running → RunningPending`. If already + /// `RunningPending` or `Idle`, this is a no-op. + fn signal_pending(&self) { + // Only transition Running → RunningPending + let _ = self.0.compare_exchange( + RebuildState::Running as u8, + RebuildState::RunningPending as u8, + Ordering::AcqRel, + Ordering::Relaxed, + ); + } + + /// Check if more work is pending and atomically clear the pending flag. + /// + /// Returns `true` if we should continue working (previously `RunningPending`). + /// Atomically transitions `RunningPending → Running`. + fn check_pending(&self) -> bool { + self.0 + .compare_exchange( + RebuildState::RunningPending as u8, + RebuildState::Running as u8, + Ordering::AcqRel, + Ordering::Acquire, + ) + .is_ok() + } + + /// Mark the rebuild as complete. + /// + /// Transitions to `Idle`. Should only be called by the rebuild thread. + fn finish(&self) { + self.0.store(RebuildState::Idle as u8, Ordering::Release); + } +} + +/// Scope guard to mark rebuild as finished on drop. +/// +/// This ensures the coordinator transitions to `Idle` even if the rebuild +/// thread panics, preventing deadlock. +struct RebuildGuard(Arc); + +impl Drop for RebuildGuard { + fn drop(&mut self) { + self.0.finish(); + } +} + +/// Address-family related rebuild job. +/// +/// Encapsulates the RIB source and cache destination for a poptrie rebuild, +/// allowing the rebuild logic to be shared between IPv4 and IPv6. +enum RebuildJob { + V4 { + rib: Arc>, + cache: Arc>>>, + }, + V6 { + rib: Arc>, + cache: Arc>>>, + }, +} + +impl RebuildJob { + /// Take a snapshot of the RIB and build a fresh poptrie cache. + fn rebuild(&self) { + match self { + Self::V4 { rib, cache } => { + let snapshot = { + let r = lock!(rib); + RpfTable::snapshot_rib(&r, |p| (p.value.octets(), p.length)) + }; + let mut table = poptrie::Ipv4RoutingTable::default(); + for (addr, len, paths) in snapshot { + table.insert((addr, len), paths); + } + *write_lock!(cache) = Some(Poptrie::from(table)); + } + Self::V6 { rib, cache } => { + let snapshot = { + let r = lock!(rib); + RpfTable::snapshot_rib(&r, |p| (p.value.octets(), p.length)) + }; + let mut table = poptrie::Ipv6RoutingTable::default(); + for (addr, len, paths) in snapshot { + table.insert((addr, len), paths); + } + *write_lock!(cache) = Some(Poptrie::from(table)); + } + } + } + + /// Thread name for debugging. + fn thread_name(&self) -> &'static str { + match self { + Self::V4 { .. } => "rpf-poptrie-v4", + Self::V6 { .. } => "rpf-poptrie-v6", + } + } +} + +/// RPF verification table using poptrie for O(1) LPM (longest-prefix matching). +/// +/// This table maintains a poptrie-based cache of the RIB for fast RPF lookups. +/// The cache is rebuilt asynchronously in the background when triggered, with +/// rate limiting to avoid excessive rebuilds. Falls back to linear scan during +/// rebuilds or if poptrie is unavailable. +/// +/// The poptrie stores full [`Path`] objects (not just nexthops) so that +/// bestpath selection can be applied at lookup time. This ensures consistent +/// RPF verification behavior regardless of whether poptrie or linear scan is +/// used. +#[derive(Clone)] +pub(crate) struct RpfTable { + /// IPv4 poptrie cache. + cache_v4: Arc>>>, + /// IPv6 poptrie cache. + cache_v6: Arc>>>, + /// Last rebuild completion time for rate limiting. + /// + /// Shared between v4 and v6 rebuilds. During route updates + /// affecting both address families, this prevents simultaneous + /// rebuilds and spreads the CPU load. The fallback is a linear scan. + last_rebuild: Arc>>, + /// Configurable minimum interval between rebuilds (milliseconds). + /// A value of 0 disables rate-limiting. + rebuild_interval_ms: Arc, + /// Optional notifier for rebuild-complete events. + rebuild_notifier: Arc>>>, + /// Coordinator for IPv4 poptrie rebuilds. + rebuild_v4: Arc, + /// Coordinator for IPv6 poptrie rebuilds. + rebuild_v6: Arc, + /// Logger for error reporting. + log: Logger, +} + +impl RpfTable { + /// Default minimum time between rebuilds (milliseconds). + const DEFAULT_REBUILD_INTERVAL_MS: u64 = 1000; + + /// Create a new empty RPF table with default rebuild interval. + pub fn new(log: Logger) -> Self { + Self { + cache_v4: Arc::new(RwLock::new(None)), + cache_v6: Arc::new(RwLock::new(None)), + last_rebuild: Arc::new(Mutex::new(None)), + rebuild_interval_ms: Arc::new(AtomicU64::new( + Self::DEFAULT_REBUILD_INTERVAL_MS, + )), + rebuild_notifier: Arc::new(Mutex::new(None)), + rebuild_v4: Arc::new(RebuildCoordinator::new()), + rebuild_v6: Arc::new(RebuildCoordinator::new()), + log, + } + } + + /// Set the minimum interval between rebuilds. A zeroed duration + /// disables rate-limiting, causing every trigger to rebuild + /// immediately. + pub fn set_rebuild_interval(&self, interval: Duration) { + self.rebuild_interval_ms.store( + u64::try_from(interval.as_millis()).unwrap(), + Ordering::Relaxed, + ); + } + + /// Check if enough time has passed since the last rebuild. + /// Returns `true` if rebuild should proceed, `false` if rate limited. + fn check_rate_limit(&self) -> bool { + let min_interval_ms = self.rebuild_interval_ms.load(Ordering::Relaxed); + let min_interval = Duration::from_millis(min_interval_ms); + + if let Ok(last) = self.last_rebuild.lock() + && let Some(last_instant) = *last + && last_instant.elapsed() < min_interval + { + return false; // Skip rebuild, too soon + } + true + } + + /// Send a rebuild event notification if configured. + fn notify(&self, event: RebuildEvent) { + if let Ok(guard) = self.rebuild_notifier.lock() + && let Some(tx) = &*guard + && tx.send(event).is_err() + { + debug!(self.log, "rpf revalidator not running"); + } + } + + /// Snapshot a RIB for poptrie rebuild. + /// + /// Extracts (addr_bits, prefix_len, paths) tuples from the RIB. + /// The `to_bits` closure converts the prefix to address bits. + fn snapshot_rib( + rib: &BTreeMap>, + to_bits: F, + ) -> Vec<(A, u8, BTreeSet)> + where + F: Fn(&P) -> (A, u8), + { + rib.iter() + .filter(|(_, paths)| !paths.is_empty()) + .map(|(prefix, paths)| { + let (bits, len) = to_bits(prefix); + (bits, len, paths.clone()) + }) + .collect() + } + + /// Install a notifier to be called on rebuild completion. + pub fn set_rebuild_notifier(&self, tx: mpsc::Sender) { + if let Ok(mut guard) = self.rebuild_notifier.lock() { + *guard = Some(tx); + } + } + + /// Spawn a background thread to execute a rebuild job. + /// + /// This is the shared implementation for both IPv4 and IPv6 rebuilds. + /// The job encapsulates the address-family-specific parts (RIB, cache), + /// while this method handles the shared logic (coordinator, timing, notify). + /// + /// The `event` parameter is used for targeted revalidation: if we complete + /// without looping, we send the original prefix so only affected (S,G) + /// routes are re-checked. If pending changes caused us to loop, we send + /// a full-sweep event (`None` prefix) since multiple prefixes may have + /// changed. + fn spawn_rebuild( + &self, + job: RebuildJob, + coordinator: Arc, + event: RebuildEvent, + ) { + let last_rebuild = self.last_rebuild.clone(); + let notifier = self.rebuild_notifier.clone(); + let log = self.log.clone(); + let thread_name = job.thread_name().to_string(); + let thread_coord = Arc::clone(&coordinator); + + if let Err(e) = + thread::Builder::new() + .name(thread_name.clone()) + .spawn(move || { + let _guard = RebuildGuard(Arc::clone(&thread_coord)); + + // Track whether we looped due to pending changes. + let mut looped = false; + + // Loop while there are pending rebuilds. This ensures we + // capture all RIB changes that occurred during the rebuild. + loop { + job.rebuild(); + + if let Ok(mut last) = last_rebuild.lock() { + *last = Some(Instant::now()); + } + + // Atomically check if more changes arrived during rebuild. + // If so, loop again to capture them. + if !thread_coord.check_pending() { + break; + } + looped = true; + } + + // Notify revalidator. If we looped, multiple prefixes may + // have changed so we send a full-sweep event. + if let Ok(guard) = notifier.lock() + && let Some(tx) = &*guard + { + let final_event = + if looped { event.to_full_sweep() } else { event }; + let _ = tx.send(final_event); + } + }) + { + error!(log, "failed to spawn {thread_name}: {e}"); + coordinator.finish(); + self.notify(event); + } + } + + /// Trigger a background rebuild of the IPv4 RPF cache. + /// + /// The RIB snapshot is taken lazily in the background thread, reducing + /// lock contention during RIB updates. + /// + /// The `changed_prefix` ([`Prefix4`]) parameter enables targeted + /// revalidation: only (S,G) routes whose source falls within this prefix + /// need RPF re-checking. + /// + /// This trigger is rate limited based on configured interval. Only one + /// rebuild can be in progress at a time per address family. + pub fn trigger_rebuild_v4( + &self, + rib4_loc: Arc>, + changed_prefix: Option, + ) { + if !self.check_rate_limit() { + // Clear cache to force linear-scan fallback until next rebuild. + // This ensures lookups use fresh RIB data rather than stale cache. + if let Ok(mut guard) = self.cache_v4.write() { + *guard = None; + } + self.notify(RebuildEvent::V4(changed_prefix)); + return; + } + + if !self.rebuild_v4.try_start() { + self.rebuild_v4.signal_pending(); + return; + } + + let job = RebuildJob::V4 { + rib: rib4_loc, + cache: self.cache_v4.clone(), + }; + + self.spawn_rebuild( + job, + Arc::clone(&self.rebuild_v4), + RebuildEvent::V4(changed_prefix), + ); + } + + /// Trigger a background rebuild of the IPv6 RPF cache. + /// + /// The RIB snapshot is taken lazily in the background thread, reducing + /// lock contention during RIB updates. + /// + /// The `changed_prefix` ([`Prefix6`]) parameter enables targeted + /// revalidation: only (S,G) routes whose source falls within this prefix + /// need RPF re-checking. + /// + /// This trigger is rate limited based on configured interval. Only one + /// rebuild can be in progress at a time per address family. + pub fn trigger_rebuild_v6( + &self, + rib6_loc: Arc>, + changed_prefix: Option, + ) { + if !self.check_rate_limit() { + // Clear cache to force linear-scan fallback until next rebuild. + // This ensures lookups use fresh RIB data rather than stale cache. + if let Ok(mut guard) = self.cache_v6.write() { + *guard = None; + } + self.notify(RebuildEvent::V6(changed_prefix)); + return; + } + + if !self.rebuild_v6.try_start() { + self.rebuild_v6.signal_pending(); + return; + } + + let job = RebuildJob::V6 { + rib: rib6_loc, + cache: self.cache_v6.clone(), + }; + + self.spawn_rebuild( + job, + Arc::clone(&self.rebuild_v6), + RebuildEvent::V6(changed_prefix), + ); + } + + /// Look up the RPF neighbor for a multicast source address. + /// + /// Returns the best nexthop from the unicast RIB for reaching the source, + /// which is the valid RPF neighbor for (S,G) routes. Returns `None` if + /// no route exists for the source. + /// + /// Uses poptrie for O(1) lookup with linear scan fallback. + pub fn lookup( + &self, + source: IpAddr, + rib4_loc: &Arc>, + rib6_loc: &Arc>, + fanout: usize, + ) -> Option { + // Try poptrie lookup first + let cached_paths = match source { + IpAddr::V4(addr) => self.cache_v4.read().ok().and_then(|cache| { + cache.as_ref().and_then(|pt| pt.match_v4(u32::from(addr))) + }), + IpAddr::V6(addr) => self.cache_v6.read().ok().and_then(|cache| { + cache.as_ref().and_then(|pt| pt.match_v6(u128::from(addr))) + }), + }; + + if let Some(paths) = cached_paths { + return Self::get_rpf_neighbor(&paths, fanout); + } + + // Fallback to linear scan + match source { + IpAddr::V4(addr) => Self::lookup_v4(addr, rib4_loc, fanout), + IpAddr::V6(addr) => Self::lookup_v6(addr, rib6_loc, fanout), + } + } + + /// IPv4 RPF lookup (linear scan fallback when poptrie unavailable). + /// + /// This O(n) scan is acceptable for deployments where the + /// unicast RIB is small. + fn lookup_v4( + source: Ipv4Addr, + rib4_loc: &Arc>, + fanout: usize, + ) -> Option { + let rib = rib4_loc.lock().ok()?; + + // Find best matching prefix (longest-prefix match) + let mut best_paths: Option<&BTreeSet> = None; + let mut best_len = 0u8; + + let source_bits = u32::from(source); + for (prefix, paths) in rib.iter() { + let prefix_bits = u32::from(prefix.value); + let mask = prefix.mask(); + if (prefix_bits & mask) == (source_bits & mask) + && prefix.length > best_len + { + best_len = prefix.length; + best_paths = Some(paths); + } + } + + best_paths.and_then(|paths| Self::get_rpf_neighbor(paths, fanout)) + } + + /// IPv6 RPF lookup (linear scan fallback when poptrie unavailable). + /// + /// This O(n) scan is acceptable for deployments where the + /// unicast RIB is small. + fn lookup_v6( + source: Ipv6Addr, + rib6_loc: &Arc>, + fanout: usize, + ) -> Option { + let rib = rib6_loc.lock().ok()?; + + // Find best matching prefix (longest-prefix match) + let mut best_paths: Option<&BTreeSet> = None; + let mut best_len = 0u8; + + let source_bits = u128::from(source); + for (prefix, paths) in rib.iter() { + let prefix_bits = u128::from(prefix.value); + let mask = prefix.mask(); + if (prefix_bits & mask) == (source_bits & mask) + && prefix.length > best_len + { + best_len = prefix.length; + best_paths = Some(paths); + } + } + + best_paths.and_then(|paths| Self::get_rpf_neighbor(paths, fanout)) + } + + /// Extract the RPF neighbor from a set of paths. + /// + /// For fanout == 1, returns the single bestpath nexthop. + /// For fanout > 1, returns the first active nexthop. All paths in loc-rib + /// are valid ECMP paths (already bestpath-selected), so any one suffices + /// for RPF verification. + fn get_rpf_neighbor( + paths: &BTreeSet, + fanout: usize, + ) -> Option { + let active_paths: BTreeSet = + paths.iter().filter(|p| !p.shutdown).cloned().collect(); + + if active_paths.is_empty() { + return None; + } + + if fanout == 1 { + bestpaths(&active_paths, 1) + .and_then(|selected| selected.iter().next().map(|p| p.nexthop)) + } else { + active_paths.iter().next().map(|p| p.nexthop) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + use std::collections::BTreeMap; + + use mg_common::log::*; + use mg_common::test::DEFAULT_INTERVAL; + use mg_common::wait_for; + + use crate::test::TEST_WAIT_ITERATIONS; + use crate::{DEFAULT_RIB_PRIORITY_BGP, DEFAULT_RIB_PRIORITY_STATIC}; + + /// Helper to create empty Rib4 for tests + fn empty_rib4() -> Arc> { + Arc::new(Mutex::new(BTreeMap::new())) + } + + /// Helper to create empty Rib6 for tests + fn empty_rib6() -> Arc> { + Arc::new(Mutex::new(BTreeMap::new())) + } + + /// Extract nexthops from paths (filters out shutdown paths). + fn nexthops_from_paths(paths: &BTreeSet) -> BTreeSet { + paths + .iter() + .filter(|p| !p.shutdown) + .map(|p| p.nexthop) + .collect() + } + + #[test] + fn test_nexthops_from_paths() { + let mut paths = BTreeSet::new(); + let path1 = Path { + nexthop: IpAddr::V4(Ipv4Addr::new(192, 0, 2, 1)), + rib_priority: 1, + shutdown: false, + bgp: None, + vlan_id: None, + nexthop_interface: None, + }; + let path2 = Path { + nexthop: IpAddr::V4(Ipv4Addr::new(192, 0, 2, 2)), + rib_priority: 1, + shutdown: false, + bgp: None, + vlan_id: None, + nexthop_interface: None, + }; + paths.insert(path1); + paths.insert(path2); + + let next_hops = nexthops_from_paths(&paths); + assert_eq!(next_hops.len(), 2); + assert!(next_hops.contains(&IpAddr::V4(Ipv4Addr::new(192, 0, 2, 1)))); + assert!(next_hops.contains(&IpAddr::V4(Ipv4Addr::new(192, 0, 2, 2)))); + assert!(!next_hops.contains(&IpAddr::V4(Ipv4Addr::new(192, 0, 2, 3)))); + } + + #[test] + fn test_rpf_table_linear_scan() { + let mut rib4_inner: Rib4 = BTreeMap::new(); + let prefix: Prefix4 = "192.0.2.0/24".parse().unwrap(); + + let mut paths = BTreeSet::new(); + let path = Path { + nexthop: IpAddr::V4(Ipv4Addr::new(198, 51, 100, 1)), + rib_priority: 1, + shutdown: false, + bgp: None, + vlan_id: None, + nexthop_interface: None, + }; + paths.insert(path); + rib4_inner.insert(prefix, paths); + + let rib4_loc = Arc::new(Mutex::new(rib4_inner)); + let rib6_loc = empty_rib6(); + let log = init_file_logger("rpf_linear_scan.log"); + let rpf_table = RpfTable::new(log); + + // Without poptrie cache, should use linear scan + let source = IpAddr::V4(Ipv4Addr::new(192, 0, 2, 50)); + let expected = IpAddr::V4(Ipv4Addr::new(198, 51, 100, 1)); + assert_eq!( + rpf_table.lookup(source, &rib4_loc, &rib6_loc, 1), + Some(expected) + ); + } + + #[test] + fn test_rpf_table_with_poptrie() { + let mut rib4_inner: Rib4 = BTreeMap::new(); + let prefix: Prefix4 = "192.0.2.0/24".parse().unwrap(); + + let mut paths = BTreeSet::new(); + let path = Path { + nexthop: IpAddr::V4(Ipv4Addr::new(198, 51, 100, 1)), + rib_priority: 1, + shutdown: false, + bgp: None, + vlan_id: None, + nexthop_interface: None, + }; + paths.insert(path); + rib4_inner.insert(prefix, paths.clone()); + + let rib4_loc = Arc::new(Mutex::new(rib4_inner)); + let rib6_loc = empty_rib6(); + + let log = init_file_logger("rpf_poptrie.log"); + let rpf_table = RpfTable::new(log); + rpf_table.trigger_rebuild_v4(Arc::clone(&rib4_loc), None); + + // Wait for rebuild to complete + wait_for!( + rpf_table.cache_v4.read().unwrap().is_some(), + DEFAULT_INTERVAL, + TEST_WAIT_ITERATIONS, + "poptrie v4 rebuild timed out" + ); + + // Should now use poptrie cache + let source = IpAddr::V4(Ipv4Addr::new(192, 0, 2, 50)); + let expected = IpAddr::V4(Ipv4Addr::new(198, 51, 100, 1)); + assert_eq!( + rpf_table.lookup(source, &rib4_loc, &rib6_loc, 1), + Some(expected) + ); + } + + #[test] + fn test_rpf_table_shutdown_paths() { + // Test that shutdown paths are filtered out + let mut rib4_inner: Rib4 = BTreeMap::new(); + let prefix: Prefix4 = "192.0.2.0/24".parse().unwrap(); + + let mut paths = BTreeSet::new(); + // Active path + let active_path = Path { + nexthop: IpAddr::V4(Ipv4Addr::new(198, 51, 100, 1)), + rib_priority: 10, + shutdown: false, + bgp: None, + vlan_id: None, + nexthop_interface: None, + }; + // Shutdown path + let shutdown_path = Path { + nexthop: IpAddr::V4(Ipv4Addr::new(198, 51, 100, 2)), + rib_priority: 20, + shutdown: true, + bgp: None, + vlan_id: None, + nexthop_interface: None, + }; + paths.insert(active_path); + paths.insert(shutdown_path); + rib4_inner.insert(prefix, paths); + + let log = init_file_logger("rpf_shutdown.log"); + let rpf_table = RpfTable::new(log); + let source = IpAddr::V4(Ipv4Addr::new(192, 0, 2, 50)); + let rib4_loc = Arc::new(Mutex::new(rib4_inner)); + let rib6_loc = empty_rib6(); + let active_neighbor = IpAddr::V4(Ipv4Addr::new(198, 51, 100, 1)); + + // Linear scan should return active path, not shutdown + assert_eq!( + rpf_table.lookup(source, &rib4_loc, &rib6_loc, 1), + Some(active_neighbor) + ); + + // Rebuild poptrie and test again + rpf_table.trigger_rebuild_v4(Arc::clone(&rib4_loc), None); + wait_for!( + rpf_table.cache_v4.read().unwrap().is_some(), + DEFAULT_INTERVAL, + TEST_WAIT_ITERATIONS, + "poptrie v4 rebuild timed out" + ); + + // Poptrie should also return active path + assert_eq!( + rpf_table.lookup(source, &rib4_loc, &rib6_loc, 1), + Some(active_neighbor) + ); + } + + #[test] + fn test_rpf_table_all_shutdown() { + // Test that a prefix with ALL paths shutdown returns None + let mut rib4_inner: Rib4 = BTreeMap::new(); + let prefix: Prefix4 = "192.0.2.0/24".parse().unwrap(); + + let mut paths = BTreeSet::new(); + let shutdown_path = Path { + nexthop: IpAddr::V4(Ipv4Addr::new(198, 51, 100, 1)), + rib_priority: 1, + shutdown: true, + bgp: None, + vlan_id: None, + nexthop_interface: None, + }; + paths.insert(shutdown_path); + rib4_inner.insert(prefix, paths); + + let log = init_file_logger("rpf_all_shutdown.log"); + let rpf_table = RpfTable::new(log); + let source = IpAddr::V4(Ipv4Addr::new(192, 0, 2, 50)); + let rib4_loc = Arc::new(Mutex::new(rib4_inner)); + let rib6_loc = empty_rib6(); + + // Linear scan - should return `None` (all paths shutdown) + assert_eq!(rpf_table.lookup(source, &rib4_loc, &rib6_loc, 1), None); + + // Rebuild poptrie + rpf_table.trigger_rebuild_v4(Arc::clone(&rib4_loc), None); + wait_for!( + rpf_table.cache_v4.read().unwrap().is_some(), + DEFAULT_INTERVAL, + TEST_WAIT_ITERATIONS, + "poptrie v4 rebuild timed out" + ); + + // Poptrie finds the route but all paths shutdown, still `None` + assert_eq!(rpf_table.lookup(source, &rib4_loc, &rib6_loc, 1), None); + } + + #[test] + fn test_rpf_ecmp_different_priorities() { + // Test that bestpath selection prefers lower rib_priority + + let mut rib4_inner: Rib4 = BTreeMap::new(); + let prefix: Prefix4 = "192.0.2.0/24".parse().unwrap(); + + // Static route (priority 1) + let static_nexthop = IpAddr::V4(Ipv4Addr::new(198, 51, 100, 1)); + let static_path = Path { + nexthop: static_nexthop, + rib_priority: DEFAULT_RIB_PRIORITY_STATIC, + shutdown: false, + bgp: None, + vlan_id: None, + nexthop_interface: None, + }; + + // BGP route (priority 20) + let bgp_nexthop = IpAddr::V4(Ipv4Addr::new(198, 51, 100, 2)); + let bgp_path = Path { + nexthop: bgp_nexthop, + rib_priority: DEFAULT_RIB_PRIORITY_BGP, + shutdown: false, + bgp: None, + vlan_id: None, + nexthop_interface: None, + }; + + let mut paths = BTreeSet::new(); + paths.insert(static_path); + paths.insert(bgp_path); + rib4_inner.insert(prefix, paths); + + let log = init_file_logger("rpf_ecmp_priority.log"); + let rpf_table = RpfTable::new(log); + let rib4_loc = Arc::new(Mutex::new(rib4_inner)); + let rib6_loc = empty_rib6(); + let source = IpAddr::V4(Ipv4Addr::new(192, 0, 2, 50)); + + // fanout=1: returns static (best priority) + assert_eq!( + rpf_table.lookup(source, &rib4_loc, &rib6_loc, 1), + Some(static_nexthop) + ); + + rpf_table.trigger_rebuild_v4(Arc::clone(&rib4_loc), None); + wait_for!( + rpf_table.cache_v4.read().unwrap().is_some(), + DEFAULT_INTERVAL, + TEST_WAIT_ITERATIONS, + "poptrie v4 rebuild timed out" + ); + + // Same with poptrie + assert_eq!( + rpf_table.lookup(source, &rib4_loc, &rib6_loc, 1), + Some(static_nexthop) + ); + } + + #[test] + fn test_rpf_table_linear_scan_v6() { + let mut rib6_inner: Rib6 = BTreeMap::new(); + let prefix: Prefix6 = "2001:db8::/32".parse().unwrap(); + + let mut paths = BTreeSet::new(); + let path = Path { + nexthop: IpAddr::V6(Ipv6Addr::new(0xfe80, 0, 0, 0, 0, 0, 0, 1)), + rib_priority: 1, + shutdown: false, + bgp: None, + vlan_id: None, + nexthop_interface: None, + }; + paths.insert(path); + rib6_inner.insert(prefix, paths); + + let rib4_loc = empty_rib4(); + let rib6_loc = Arc::new(Mutex::new(rib6_inner)); + let log = init_file_logger("rpf_linear_scan_v6.log"); + let rpf_table = RpfTable::new(log); + + // Without poptrie cache, should use linear scan + let source = + IpAddr::V6(Ipv6Addr::new(0x2001, 0xdb8, 0, 0, 0, 0, 0, 50)); + let expected = IpAddr::V6(Ipv6Addr::new(0xfe80, 0, 0, 0, 0, 0, 0, 1)); + assert_eq!( + rpf_table.lookup(source, &rib4_loc, &rib6_loc, 1), + Some(expected) + ); + } + + #[test] + fn test_rpf_table_with_poptrie_v6() { + let mut rib6_inner: Rib6 = BTreeMap::new(); + let prefix: Prefix6 = "2001:db8::/32".parse().unwrap(); + + let mut paths = BTreeSet::new(); + let path = Path { + nexthop: IpAddr::V6(Ipv6Addr::new(0xfe80, 0, 0, 0, 0, 0, 0, 1)), + rib_priority: 1, + shutdown: false, + bgp: None, + vlan_id: None, + nexthop_interface: None, + }; + paths.insert(path); + rib6_inner.insert(prefix, paths.clone()); + + let rib4_loc = empty_rib4(); + let rib6_loc = Arc::new(Mutex::new(rib6_inner)); + + let log = init_file_logger("rpf_poptrie_v6.log"); + let rpf_table = RpfTable::new(log); + rpf_table.trigger_rebuild_v6(Arc::clone(&rib6_loc), None); + + // Wait for rebuild to complete + wait_for!( + rpf_table.cache_v6.read().unwrap().is_some(), + DEFAULT_INTERVAL, + TEST_WAIT_ITERATIONS, + "poptrie v6 rebuild timed out" + ); + + // Should now use poptrie cache + let source = + IpAddr::V6(Ipv6Addr::new(0x2001, 0xdb8, 0, 0, 0, 0, 0, 50)); + let expected = IpAddr::V6(Ipv6Addr::new(0xfe80, 0, 0, 0, 0, 0, 0, 1)); + assert_eq!( + rpf_table.lookup(source, &rib4_loc, &rib6_loc, 1), + Some(expected) + ); + } + + #[test] + fn test_rpf_lpm() { + // Test longest-prefix match -> the more specific route wins + let mut rib4_inner: Rib4 = BTreeMap::new(); + + // Less specific: 192.0.2.0/24 -> nexthop1 + let prefix_24: Prefix4 = "192.0.2.0/24".parse().unwrap(); + let nexthop1 = IpAddr::V4(Ipv4Addr::new(198, 51, 100, 1)); + let mut paths1 = BTreeSet::new(); + paths1.insert(Path { + nexthop: nexthop1, + rib_priority: 1, + shutdown: false, + bgp: None, + vlan_id: None, + nexthop_interface: None, + }); + rib4_inner.insert(prefix_24, paths1); + + // More specific: 192.0.2.128/25 -> nexthop2 + let prefix_25: Prefix4 = "192.0.2.128/25".parse().unwrap(); + let nexthop2 = IpAddr::V4(Ipv4Addr::new(198, 51, 100, 2)); + let mut paths2 = BTreeSet::new(); + paths2.insert(Path { + nexthop: nexthop2, + rib_priority: 1, + shutdown: false, + bgp: None, + vlan_id: None, + nexthop_interface: None, + }); + rib4_inner.insert(prefix_25, paths2); + + let log = init_file_logger("rpf_lpm.log"); + let rpf_table = RpfTable::new(log); + let rib4_loc = Arc::new(Mutex::new(rib4_inner.clone())); + let rib6_loc = empty_rib6(); + + // Source in /25 should match more specific route + let source_in_25 = IpAddr::V4(Ipv4Addr::new(192, 0, 2, 200)); + assert_eq!( + rpf_table.lookup(source_in_25, &rib4_loc, &rib6_loc, 1), + Some(nexthop2) + ); + + // Source in /24 but not /25 should match less specific route + let source_in_24 = IpAddr::V4(Ipv4Addr::new(192, 0, 2, 50)); + assert_eq!( + rpf_table.lookup(source_in_24, &rib4_loc, &rib6_loc, 1), + Some(nexthop1) + ); + + // Test with poptrie too + rpf_table.trigger_rebuild_v4(Arc::clone(&rib4_loc), None); + wait_for!( + rpf_table.cache_v4.read().unwrap().is_some(), + DEFAULT_INTERVAL, + TEST_WAIT_ITERATIONS, + "poptrie v4 rebuild timed out" + ); + + assert_eq!( + rpf_table.lookup(source_in_25, &rib4_loc, &rib6_loc, 1), + Some(nexthop2) + ); + assert_eq!( + rpf_table.lookup(source_in_24, &rib4_loc, &rib6_loc, 1), + Some(nexthop1) + ); + } + + #[test] + fn test_rpf_ecmp_v6() { + // Test IPv6 ECMP: lookup returns one of the equal-priority paths + let mut rib6_inner: Rib6 = BTreeMap::new(); + let prefix: Prefix6 = "2001:db8::/32".parse().unwrap(); + + let nexthop1 = IpAddr::V6(Ipv6Addr::new(0xfe80, 0, 0, 0, 0, 0, 0, 1)); + let nexthop2 = IpAddr::V6(Ipv6Addr::new(0xfe80, 0, 0, 0, 0, 0, 0, 2)); + + let path1 = Path { + nexthop: nexthop1, + rib_priority: 1, + shutdown: false, + bgp: None, + vlan_id: None, + nexthop_interface: None, + }; + let path2 = Path { + nexthop: nexthop2, + rib_priority: 1, + shutdown: false, + bgp: None, + vlan_id: None, + nexthop_interface: None, + }; + + let mut paths = BTreeSet::new(); + paths.insert(path1); + paths.insert(path2); + rib6_inner.insert(prefix, paths); + + let log = init_file_logger("rpf_ecmp_v6.log"); + let rpf_table = RpfTable::new(log); + let rib4_loc = empty_rib4(); + let rib6_loc = Arc::new(Mutex::new(rib6_inner)); + let source = + IpAddr::V6(Ipv6Addr::new(0x2001, 0xdb8, 0, 0, 0, 0, 0, 50)); + + // fanout=1: returns one of the equal-priority paths + let result = rpf_table.lookup(source, &rib4_loc, &rib6_loc, 1); + assert!( + result == Some(nexthop1) || result == Some(nexthop2), + "expected one of the ECMP nexthops" + ); + } + + #[test] + fn test_rpf_v6_with_nexthop_interface() { + // RPF with link-local nexthops and interface binding + // (BGP unnumbered underlay for multicast). + // + // This verifies both linear scan and poptrie paths return the correct + // nexthop. + let mut rib6_inner: Rib6 = BTreeMap::new(); + let prefix: Prefix6 = "2001:db8:1::/48".parse().unwrap(); + + let nexthop = IpAddr::V6(Ipv6Addr::new(0xfe80, 0, 0, 0, 0, 0, 0, 1)); + let path = Path { + nexthop, + rib_priority: 1, + shutdown: false, + bgp: None, + vlan_id: None, + nexthop_interface: Some("qsfp0".to_string()), + }; + + let mut paths = BTreeSet::new(); + paths.insert(path); + rib6_inner.insert(prefix, paths); + + let log = init_file_logger("rpf_v6_interface.log"); + let rpf_table = RpfTable::new(log); + let rib4_loc = empty_rib4(); + let rib6_loc = Arc::new(Mutex::new(rib6_inner)); + + let source = + IpAddr::V6(Ipv6Addr::new(0x2001, 0xdb8, 1, 0, 0, 0, 0, 100)); + + // Linear scan + assert_eq!( + rpf_table.lookup(source, &rib4_loc, &rib6_loc, 1), + Some(nexthop), + ); + + // Poptrie + rpf_table.trigger_rebuild_v6(Arc::clone(&rib6_loc), None); + wait_for!( + rpf_table.cache_v6.read().unwrap().is_some(), + DEFAULT_INTERVAL, + TEST_WAIT_ITERATIONS, + "poptrie v6 rebuild timed out" + ); + + assert_eq!( + rpf_table.lookup(source, &rib4_loc, &rib6_loc, 1), + Some(nexthop), + ); + } + + #[test] + fn test_rpf_v6_lpm() { + const NEXTHOP1: IpAddr = + IpAddr::V6(Ipv6Addr::new(0x2001, 0xdb8, 0xff, 0, 0, 0, 0, 1)); + const NEXTHOP2: IpAddr = + IpAddr::V6(Ipv6Addr::new(0x2001, 0xdb8, 0xff, 0, 0, 0, 0, 2)); + + let mut rib6_inner: Rib6 = BTreeMap::new(); + + // Less specific: 2001:db8::/32 -> NEXTHOP1 + let prefix_32: Prefix6 = "2001:db8::/32".parse().unwrap(); + let mut paths1 = BTreeSet::new(); + paths1.insert(Path { + nexthop: NEXTHOP1, + rib_priority: 1, + shutdown: false, + bgp: None, + vlan_id: None, + nexthop_interface: None, + }); + rib6_inner.insert(prefix_32, paths1); + + // More specific: 2001:db8:1::/48 -> NEXTHOP2 + let prefix_48: Prefix6 = "2001:db8:1::/48".parse().unwrap(); + let mut paths2 = BTreeSet::new(); + paths2.insert(Path { + nexthop: NEXTHOP2, + rib_priority: 1, + shutdown: false, + bgp: None, + vlan_id: None, + nexthop_interface: None, + }); + rib6_inner.insert(prefix_48, paths2); + + let log = init_file_logger("rpf_v6_lpm.log"); + let rpf_table = RpfTable::new(log); + let rib4_loc = empty_rib4(); + let rib6_loc = Arc::new(Mutex::new(rib6_inner)); + + // Source in /48 should match more specific route + let source_in_48 = + IpAddr::V6(Ipv6Addr::new(0x2001, 0xdb8, 1, 0, 0, 0, 0, 50)); + assert_eq!( + rpf_table.lookup(source_in_48, &rib4_loc, &rib6_loc, 1), + Some(NEXTHOP2) + ); + + // Source in /32 but not /48 should match less specific route + let source_in_32 = + IpAddr::V6(Ipv6Addr::new(0x2001, 0xdb8, 2, 0, 0, 0, 0, 50)); + assert_eq!( + rpf_table.lookup(source_in_32, &rib4_loc, &rib6_loc, 1), + Some(NEXTHOP1) + ); + } +} diff --git a/rdb/src/proptest.rs b/rdb/src/proptest.rs index a4bba03f..aef6f023 100644 --- a/rdb/src/proptest.rs +++ b/rdb/src/proptest.rs @@ -2,6 +2,8 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +// Copyright 2026 Oxide Computer Company + //! Property-based tests for Prefix types using proptest //! //! These tests verify key invariants of the Prefix types to ensure @@ -11,13 +13,38 @@ use crate::{ BgpNeighborParameters, types::{ - BgpNeighborInfo, ImportExportPolicy4, ImportExportPolicy6, Prefix, - Prefix4, Prefix6, StaticRouteKey, + BgpNeighborInfo, ImportExportPolicy4, ImportExportPolicy6, + MulticastAddr, MulticastAddrV4, MulticastAddrV6, MulticastRoute, + MulticastRouteKey, MulticastRouteKeyV4, MulticastRouteKeyV6, + MulticastSourceProtocol, Prefix, Prefix4, Prefix6, StaticRouteKey, + UnderlayMulticastIpv6, UnicastAddrV4, UnicastAddrV6, }, }; +use omicron_common::address::{ + IPV4_MULTICAST_RANGE, IPV4_SSM_SUBNET, IPV6_ADMIN_SCOPED_MULTICAST_PREFIX, + IPV6_INTERFACE_LOCAL_MULTICAST_SUBNET, IPV6_LINK_LOCAL_MULTICAST_SUBNET, + IPV6_MULTICAST_PREFIX, IPV6_SSM_SUBNET, +}; +use omicron_common::api::external::Vni; use proptest::{prelude::*, strategy::Just}; use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr}; +/// Minimum valid IPv6 multicast scope for proptest strategies. +/// +/// Scopes 0 (reserved), 1 (interface-local), and 2 (link-local) are +/// rejected by `MulticastAddrV6::new`, so generated addresses start +/// at scope 3 (realm-local). +const MIN_MULTICAST_SCOPE: u8 = 0x3; + +/// Maximum IPv6 multicast scope value (4 bits). +const MAX_MULTICAST_SCOPE: u8 = 0xf; + +/// Maximum IPv6 multicast flags value (4 bits). +const MAX_MULTICAST_FLAGS: u8 = 0xf; + +/// SSM flags nibble (RFC 4607). SSM addresses have flags = 3. +const SSM_FLAGS: u8 = 0x3; + // Strategy for generating valid IPv4 prefixes fn ipv4_prefix_strategy() -> impl Strategy { (any::(), 0u8..=32u8).prop_map(|(addr_bits, length)| { @@ -520,3 +547,693 @@ proptest! { ); } } + +// ============================================================================ +// Multicast address and route-key property tests and setup +// ============================================================================ + +// Strategy for generating IPv4 unicast addresses (non-multicast, non-loopback) +// Generates directly in valid ranges to avoid filter rejection limits +fn ipv4_unicast_strategy() -> impl Strategy { + prop_oneof![ + // 1.x.x.x - 126.x.x.x (skip 0.x.x.x and 127.x.x.x loopback) + (1u8..=126, any::(), any::(), any::()) + .prop_map(|(a, b, c, d)| Ipv4Addr::new(a, b, c, d)), + // 128.x.x.x - 223.x.x.x (before multicast range) + (128u8..=223, any::(), any::(), any::()) + .prop_map(|(a, b, c, d)| Ipv4Addr::new(a, b, c, d)), + ] + .prop_filter_map("must be valid unicast", |addr| { + UnicastAddrV4::new(addr).ok() + }) +} + +// Strategy for generating IPv6 addresses that are not multicast or loopback. +// +// Returns raw Ipv6Addr, not UnicastAddrV6. Use this for tests that need +// a non-multicast address but don't require a routable unicast source +// (e.g., AF mismatch tests, RPF neighbor fields, multicast addr rejection). +// +// For multicast route key sources, use routable_ipv6_unicast_strategy(). +fn ipv6_unicast_strategy() -> impl Strategy { + // Generate any address except ff00::/8 (multicast) and ::1 (loopback) + // Multicast is only 1/256 of address space, so filter rejection is fine + any::().prop_filter_map("skip multicast/loopback", |bits| { + let addr = Ipv6Addr::from(bits); + if addr.is_multicast() || addr.is_loopback() { + None + } else { + Some(addr) + } + }) +} + +// Strategy for generating valid VNIs (0 to Vni::MAX_VNI) +fn valid_vni_strategy() -> impl Strategy { + (0u32..=Vni::MAX_VNI).prop_map(|v| Vni::try_from(v).unwrap()) +} + +// Strategy for generating invalid VNIs (> Vni::MAX_VNI) +fn invalid_vni_strategy() -> impl Strategy { + (Vni::MAX_VNI + 1)..=u32::MAX +} + +// Strategy for underlay multicast addresses within ff04::/64. +fn admin_local_multicast_strategy() +-> impl Strategy { + any::().prop_map(|bits| { + let addr = Ipv6Addr::new( + IPV6_ADMIN_SCOPED_MULTICAST_PREFIX, + 0, + 0, + 0, + (bits >> 48) as u16, + (bits >> 32) as u16, + (bits >> 16) as u16, + bits as u16, + ); + UnderlayMulticastIpv6::new(addr).expect("valid underlay address") + }) +} + +// Strategy for generating IPv6 multicast addresses that are not admin-local +// Admin-local scope is derived from IPV6_ADMIN_SCOPED_MULTICAST_PREFIX +// Scopes 0-2 are rejected by MulticastAddrV6::new (reserved, interface-local, +// link-local), so we use scope 3 or 5-15. +fn non_admin_local_multicast_strategy() -> impl Strategy +{ + // Extract admin-local scope from the constant (0xff04 -> 4) + let admin_local_scope = (IPV6_ADMIN_SCOPED_MULTICAST_PREFIX & 0xf) as u8; + // Scope must be valid (3+) and not admin-local + let scope = prop_oneof![ + Just(MIN_MULTICAST_SCOPE), + (admin_local_scope + 1)..=MAX_MULTICAST_SCOPE + ]; + (any::(), scope, any::<[u16; 7]>()).prop_map(|(flags, scope, segs)| { + let first = IPV6_MULTICAST_PREFIX + | ((flags as u16 & MAX_MULTICAST_FLAGS as u16) << 4) + | (scope as u16); + MulticastAddrV6::new(Ipv6Addr::new( + first, segs[0], segs[1], segs[2], segs[3], segs[4], segs[5], + segs[6], + )) + .expect("non-admin-local multicast is valid") + }) +} + +// Strategy for routable IPv6 unicast (not link-local, loopback, unspecified) +fn routable_ipv6_unicast_strategy() -> impl Strategy { + any::().prop_filter_map("must be valid unicast", |bits| { + UnicastAddrV6::new(Ipv6Addr::from(bits)).ok() + }) +} + +// ============================================================================ +// Arbitrary implementations for multicast types +// ============================================================================ +// +// These allow using `any::()` etc. in property tests, +// generating only valid instances of each type + +impl Arbitrary for MulticastAddrV4 { + type Parameters = (); + type Strategy = BoxedStrategy; + + fn arbitrary_with(_: Self::Parameters) -> Self::Strategy { + // Derive range boundaries from constants + let mcast_base = IPV4_MULTICAST_RANGE.addr().octets()[0]; + let mcast_end = mcast_base + 15; // /4 prefix = 16 values + let ssm_first = IPV4_SSM_SUBNET.addr().octets()[0]; + + // Generate directly in valid multicast ranges for efficiency + // Valid: 224.0.1.0 - 239.255.255.255 (excluding 224.0.0.x link-local) + prop_oneof![ + // mcast_base.0.1.0 - mcast_base.0.255.255 (skip link-local) + (1u8..=u8::MAX, any::()).prop_map(move |(c, d)| { + MulticastAddrV4::new(Ipv4Addr::new(mcast_base, 0, c, d)) + .expect("mcast_base.0.1+ is valid multicast") + }), + // mcast_base.1.0.0 - mcast_base.255.255.255 + (1u8..=u8::MAX, any::(), any::()).prop_map( + move |(b, c, d)| { + MulticastAddrV4::new(Ipv4Addr::new(mcast_base, b, c, d)) + .expect("mcast_base.1+ is valid multicast") + } + ), + // (mcast_base+1).x.x.x - (ssm_first-1).x.x.x (globally routable) + ( + (mcast_base + 1)..=ssm_first - 1, + any::(), + any::(), + any::() + ) + .prop_map(|(a, b, c, d)| { + MulticastAddrV4::new(Ipv4Addr::new(a, b, c, d)) + .expect("pre-SSM range is valid multicast") + }), + // ssm_first.x.x.x (SSM range) + (any::(), any::(), any::()).prop_map( + move |(b, c, d)| { + MulticastAddrV4::new(Ipv4Addr::new(ssm_first, b, c, d)) + .expect("SSM is valid") + } + ), + // (ssm_first+1).x.x.x - (mcast_end-1).x.x.x (GLOP, etc.) + ( + (ssm_first + 1)..=mcast_end - 1, + any::(), + any::(), + any::() + ) + .prop_map(|(a, b, c, d)| { + MulticastAddrV4::new(Ipv4Addr::new(a, b, c, d)) + .expect("post-SSM range is valid multicast") + }), + // mcast_end.x.x.x (admin-scoped) + (any::(), any::(), any::()).prop_map( + move |(b, c, d)| { + MulticastAddrV4::new(Ipv4Addr::new(mcast_end, b, c, d)) + .expect("admin-scoped is valid") + } + ), + ] + .boxed() + } +} + +impl Arbitrary for MulticastAddrV6 { + type Parameters = (); + type Strategy = BoxedStrategy; + + fn arbitrary_with(_: Self::Parameters) -> Self::Strategy { + // Generate with all valid flag/scope combinations + // Format: ff:: + // Valid scopes: 3-f (excluding 0=reserved, 1=if-local, 2=link-local) + // Flags: 0-f (all combinations valid) + ( + 0x0u8..=MAX_MULTICAST_FLAGS, + MIN_MULTICAST_SCOPE..=MAX_MULTICAST_SCOPE, + any::<[u16; 7]>(), + ) + .prop_map(|(flags, scope, segs)| { + let first_segment = IPV6_MULTICAST_PREFIX + | ((flags as u16) << 4) + | (scope as u16); + let addr = Ipv6Addr::new( + first_segment, + segs[0], + segs[1], + segs[2], + segs[3], + segs[4], + segs[5], + segs[6], + ); + MulticastAddrV6::new(addr) + .expect("scope 3-f with any flags is valid") + }) + .boxed() + } +} + +impl Arbitrary for MulticastAddr { + type Parameters = (); + type Strategy = BoxedStrategy; + + fn arbitrary_with(_: Self::Parameters) -> Self::Strategy { + prop_oneof![ + any::().prop_map(crate::types::MulticastAddr::V4), + any::().prop_map(crate::types::MulticastAddr::V6), + ] + .boxed() + } +} + +// Strategy for generating ASM (non-SSM) IPv4 multicast addresses directly +fn ipv4_asm_group_strategy() -> impl Strategy { + // Derive range boundaries from constants + let mcast_base = IPV4_MULTICAST_RANGE.addr().octets()[0]; + let mcast_end = mcast_base + 15; // /4 prefix = 16 values + let ssm_first = IPV4_SSM_SUBNET.addr().octets()[0]; + + // ASM ranges: mcast_base.0.1+ through (ssm_first-1), plus (ssm_first+1)-mcast_end + prop_oneof![ + // mcast_base.0.1.0 - mcast_base.0.255.255 (skip link-local) + (1u8..=u8::MAX, any::()).prop_map(move |(c, d)| { + MulticastAddrV4::new(Ipv4Addr::new(mcast_base, 0, c, d)) + .expect("mcast_base.0.1+ is valid") + }), + // mcast_base.1.0.0 - mcast_base.255.255.255 + (1u8..=u8::MAX, any::(), any::()).prop_map(move |(b, c, d)| { + MulticastAddrV4::new(Ipv4Addr::new(mcast_base, b, c, d)) + .expect("mcast_base.1+ is valid") + }), + // (mcast_base+1).x.x.x - (ssm_first-1).x.x.x + ( + (mcast_base + 1)..=ssm_first - 1, + any::(), + any::(), + any::() + ) + .prop_map(|(a, b, c, d)| { + MulticastAddrV4::new(Ipv4Addr::new(a, b, c, d)) + .expect("pre-SSM ASM is valid") + }), + // (ssm_first+1).x.x.x - mcast_end.x.x.x (skip SSM) + ( + (ssm_first + 1)..=mcast_end, + any::(), + any::(), + any::() + ) + .prop_map(|(a, b, c, d)| { + MulticastAddrV4::new(Ipv4Addr::new(a, b, c, d)) + .expect("post-SSM ASM is valid") + }), + ] +} + +// Strategy for generating SSM IPv4 multicast addresses directly (232.x.x.x) +fn ipv4_ssm_group_strategy() -> impl Strategy { + let ssm_first_octet = IPV4_SSM_SUBNET.addr().octets()[0]; + (any::(), any::(), any::()).prop_map(move |(b, c, d)| { + MulticastAddrV4::new(Ipv4Addr::new(ssm_first_octet, b, c, d)) + .expect("SSM range is valid multicast") + }) +} + +// Strategy for generating ASM (non-SSM) IPv6 multicast addresses directly +fn ipv6_asm_group_strategy() -> impl Strategy { + // ASM: ff:: where flags != SSM_FLAGS, scope in 3-f + let flags = prop_oneof![ + Just(0x0u8), + Just(0x1u8), + Just(0x2u8), + ((SSM_FLAGS + 1)..=MAX_MULTICAST_FLAGS), + ]; + ( + flags, + MIN_MULTICAST_SCOPE..=MAX_MULTICAST_SCOPE, + any::<[u16; 7]>(), + ) + .prop_map(|(f, s, segs)| { + let first = IPV6_MULTICAST_PREFIX | ((f as u16) << 4) | (s as u16); + MulticastAddrV6::new(Ipv6Addr::new( + first, segs[0], segs[1], segs[2], segs[3], segs[4], segs[5], + segs[6], + )) + .expect("ASM is valid") + }) +} + +// Strategy for generating SSM IPv6 multicast addresses directly (ff3x::) +fn ipv6_ssm_group_strategy() -> impl Strategy { + // SSM: ff3:: where scope in 3-f (link-local and above) + // IPV6_SSM_SUBNET is ff30::/12, so base segment is 0xff30 + let ssm_base = IPV6_SSM_SUBNET.addr().segments()[0]; + (MIN_MULTICAST_SCOPE..=MAX_MULTICAST_SCOPE, any::<[u16; 7]>()).prop_map( + move |(scope, segs)| { + let first = ssm_base | (scope as u16); + MulticastAddrV6::new(Ipv6Addr::new( + first, segs[0], segs[1], segs[2], segs[3], segs[4], segs[5], + segs[6], + )) + .expect("SSM is valid") + }, + ) +} + +impl Arbitrary for MulticastRouteKey { + type Parameters = (); + type Strategy = BoxedStrategy; + + fn arbitrary_with(_: Self::Parameters) -> Self::Strategy { + // Generate directly without filtering for efficiency with high case counts + let vni = (0u32..=Vni::MAX_VNI).prop_map(|v| Vni::try_from(v).unwrap()); + + prop_oneof![ + // V4 ASM (*,G) + (ipv4_asm_group_strategy(), vni.clone()).prop_map(|(grp, vni)| { + MulticastRouteKey::V4(MulticastRouteKeyV4 { + source: None, + group: grp, + vni, + }) + }), + // V4 ASM (S,G) + ( + ipv4_unicast_strategy(), + ipv4_asm_group_strategy(), + vni.clone() + ) + .prop_map(|(src, grp, vni)| { + MulticastRouteKey::V4(MulticastRouteKeyV4 { + source: Some(src), + group: grp, + vni, + }) + }), + // V4 SSM (S,G) - SSM requires source + ( + ipv4_unicast_strategy(), + ipv4_ssm_group_strategy(), + vni.clone() + ) + .prop_map(|(src, grp, vni)| { + MulticastRouteKey::V4(MulticastRouteKeyV4 { + source: Some(src), + group: grp, + vni, + }) + }), + // V6 ASM (*,G) + (ipv6_asm_group_strategy(), vni.clone()).prop_map(|(grp, vni)| { + MulticastRouteKey::V6(MulticastRouteKeyV6 { + source: None, + group: grp, + vni, + }) + }), + // V6 ASM (S,G) + ( + routable_ipv6_unicast_strategy(), + ipv6_asm_group_strategy(), + vni.clone() + ) + .prop_map(|(src, grp, vni)| { + MulticastRouteKey::V6(MulticastRouteKeyV6 { + source: Some(src), + group: grp, + vni, + }) + }), + // V6 SSM (S,G) - SSM requires source + ( + routable_ipv6_unicast_strategy(), + ipv6_ssm_group_strategy(), + vni + ) + .prop_map(|(src, grp, vni)| { + MulticastRouteKey::V6(MulticastRouteKeyV6 { + source: Some(src), + group: grp, + vni, + }) + }), + ] + .boxed() + } +} + +proptest! { + /// Property: Arbitrary `MulticastAddrV4` always validates + #[test] + fn prop_multicast_addr_v4_arbitrary_valid(addr in any::()) { + // Arbitrary impl only generates valid addresses + prop_assert!(addr.ip().is_multicast()); + } + + /// Property: Arbitrary `MulticastAddrV6` always validates + #[test] + fn prop_multicast_addr_v6_arbitrary_valid(addr in any::()) { + // Arbitrary impl only generates valid addresses + prop_assert!(addr.ip().is_multicast()); + } + + /// Property: IPv4 unicast addresses are rejected as multicast + #[test] + fn prop_multicast_addr_v4_rejects_unicast(addr in ipv4_unicast_strategy()) { + let result = MulticastAddrV4::new(addr.ip()); + prop_assert!( + result.is_err(), + "unicast {addr} should be rejected as multicast" + ); + } + + /// Property: IPv6 unicast addresses are rejected as multicast + #[test] + fn prop_multicast_addr_v6_rejects_unicast(addr in ipv6_unicast_strategy()) { + let result = MulticastAddrV6::new(addr); + prop_assert!( + result.is_err(), + "unicast {addr} should be rejected as multicast" + ); + } + + /// Property: IPv4 link-local multicast (224.0.0.x) is rejected + #[test] + fn prop_multicast_addr_v4_rejects_link_local(last_octet in 0u8..=u8::MAX) { + let mcast_base = IPV4_MULTICAST_RANGE.addr().octets()[0]; + let addr = Ipv4Addr::new(mcast_base, 0, 0, last_octet); + let result = MulticastAddrV4::new(addr); + prop_assert!( + result.is_err(), + "link-local {addr} should be rejected" + ); + } + + /// Property: IPv6 link-local multicast (ff02::/16) is rejected + #[test] + fn prop_multicast_addr_v6_rejects_link_local(segs in any::<[u16; 7]>()) { + let prefix = IPV6_LINK_LOCAL_MULTICAST_SUBNET.addr().segments()[0]; + let link_local = Ipv6Addr::new( + prefix, segs[0], segs[1], segs[2], segs[3], segs[4], segs[5], segs[6], + ); + let result = MulticastAddrV6::new(link_local); + prop_assert!( + result.is_err(), + "link-local {link_local} should be rejected" + ); + } + + /// Property: IPv6 interface-local multicast (ff01::/16) is rejected + #[test] + fn prop_multicast_addr_v6_rejects_interface_local(segs in any::<[u16; 7]>()) { + let prefix = IPV6_INTERFACE_LOCAL_MULTICAST_SUBNET.addr().segments()[0]; + let if_local = Ipv6Addr::new( + prefix, segs[0], segs[1], segs[2], segs[3], segs[4], segs[5], segs[6], + ); + let result = MulticastAddrV6::new(if_local); + prop_assert!( + result.is_err(), + "interface-local {if_local} should be rejected" + ); + } + + /// Property: `MulticastAddrV4` roundtrip through ip() preserves address + #[test] + fn prop_multicast_addr_ip_roundtrip_v4(mcast in any::()) { + let ip = mcast.ip(); + let roundtrip = MulticastAddrV4::new(ip).expect("valid"); + prop_assert_eq!(mcast, roundtrip); + } + + /// Property: `MulticastAddrV6` roundtrip through ip() preserves address + #[test] + fn prop_multicast_addr_ip_roundtrip_v6(mcast in any::()) { + let ip = mcast.ip(); + let roundtrip = MulticastAddrV6::new(ip).expect("valid"); + prop_assert_eq!(mcast, roundtrip); + } + + /// Property: Arbitrary `MulticastRouteKey` always validates + #[test] + fn prop_route_key_arbitrary_valid(key in any::()) { + prop_assert!( + key.validate().is_ok(), + "arbitrary key should validate: {key:?}" + ); + } + + /// Property: (*,G) with ASM group validates (source optional for ASM) + #[test] + fn prop_route_key_asm_star_g_valid_v4(group in ipv4_asm_group_strategy()) { + let key = MulticastRouteKey::any_source(group.into()); + prop_assert!( + key.validate().is_ok(), + "(*,G) with ASM {group} should be valid"); + } + + /// Property: (*,G) with ASM group validates (source optional for ASM) + #[test] + fn prop_route_key_asm_star_g_valid_v6(group in ipv6_asm_group_strategy()) { + let key = MulticastRouteKey::any_source(group.into()); + prop_assert!( + key.validate().is_ok(), + "(*,G) with ASM {group} should be valid" + ); + } + + /// Property: (S,G) with unicast source validates (covers ASM and SSM) + #[test] + fn prop_route_key_sg_valid_v4( + src in ipv4_unicast_strategy(), + group in any::(), + ) { + let key = MulticastRouteKey::source_specific_v4(src, group); + prop_assert!( + key.validate().is_ok(), + "(S,G) with unicast source {src} should be valid" + ); + } + + /// Property: (S,G) with unicast source validates (covers ASM and SSM) + #[test] + fn prop_route_key_sg_valid_v6( + src in routable_ipv6_unicast_strategy(), + group in any::(), + ) { + let key = MulticastRouteKey::source_specific_v6(src, group); + prop_assert!( + key.validate().is_ok(), + "(S,G) with unicast source {src} should be valid" + ); + } + + /// Property: SSM without source fails validation (IPv4) + #[test] + fn prop_route_key_ssm_requires_source_v4(group in ipv4_ssm_group_strategy()) { + let key = MulticastRouteKey::any_source(group.into()); + prop_assert!( + key.validate().is_err(), + "SSM (*,G) with {group} should require source" + ); + } + + /// Property: SSM without source fails validation (IPv6) + #[test] + fn prop_route_key_ssm_requires_source_v6(group in ipv6_ssm_group_strategy()) { + let key = MulticastRouteKey::any_source(group.into()); + prop_assert!( + key.validate().is_err(), + "SSM (*,G) with {group} should require source" + ); + } + + /// Property: SSM with source passes validation (IPv4) + #[test] + fn prop_route_key_ssm_with_source_valid_v4( + src in ipv4_unicast_strategy(), + group in ipv4_ssm_group_strategy(), + ) { + let key = MulticastRouteKey::source_specific_v4(src, group); + prop_assert!( + key.validate().is_ok(), + "SSM (S,G) with {src},{group} should be valid" + ); + } + + /// Property: SSM with source passes validation (IPv6) + #[test] + fn prop_route_key_ssm_with_source_valid_v6( + src in routable_ipv6_unicast_strategy(), + group in ipv6_ssm_group_strategy(), + ) { + let key = MulticastRouteKey::source_specific_v6(src, group); + prop_assert!( + key.validate().is_ok(), + "SSM (S,G) with {src},{group} should be valid" + ); + } + + /// Property: VNI in valid range passes validation + #[test] + fn prop_route_key_valid_vni( + src in ipv4_unicast_strategy(), + group in any::(), + vni in valid_vni_strategy(), + ) { + // Use (S,G) so both ASM and SSM groups work + let key = MulticastRouteKey::new( + Some(IpAddr::V4(src.ip())), + group.into(), + vni, + ) + .expect("valid key construction"); + let result = key.validate(); + prop_assert!( + result.is_ok(), + "VNI {vni:?} should be valid: {result:?}" + ); + } + + /// Property: invalid VNI is rejected at construction by Vni::try_from. + #[test] + fn prop_route_key_invalid_vni( + vni in invalid_vni_strategy(), + ) { + prop_assert!( + Vni::try_from(vni).is_err(), + "VNI {vni} should be rejected by Vni::try_from" + ); + } + + /// Property: VNI in valid range passes validation (IPv6) + #[test] + fn prop_route_key_valid_vni_v6( + src in routable_ipv6_unicast_strategy(), + group in any::(), + vni in valid_vni_strategy(), + ) { + let key = MulticastRouteKey::new( + Some(IpAddr::V6(src.ip())), + group.into(), + vni, + ) + .expect("valid key construction"); + let result = key.validate(); + prop_assert!( + result.is_ok(), + "VNI {vni:?} should be valid for v6: {result:?}" + ); + } + + /// Property: Class E reserved (240/4) addresses rejected as unicast + /// source. Per RFC 1112 Section 4, this range is reserved. + #[test] + fn prop_unicast_addr_v4_rejects_class_e( + a in 240u8..=254, + b in any::(), + c in any::(), + d in any::(), + ) { + let addr = Ipv4Addr::new(a, b, c, d); + prop_assert!( + UnicastAddrV4::new(addr).is_err(), + "Class E address {addr} should be rejected" + ); + } + + + /// Property: Route with admin-local underlay group passes validation + #[test] + fn prop_route_admin_local_underlay_valid( + group in ipv4_asm_group_strategy(), + underlay in admin_local_multicast_strategy(), + ) { + let key = MulticastRouteKey::any_source(group.into()); + let route = MulticastRoute::new( + key, + underlay, + MulticastSourceProtocol::Static, + ); + prop_assert!( + route.validate().is_ok(), + "route with admin-local underlay should be valid" + ); + } + + /// Property: Non-admin-local address is rejected by UnderlayMulticastIpv6 + #[test] + fn prop_non_admin_local_underlay_rejected( + underlay in non_admin_local_multicast_strategy(), + ) { + prop_assert!( + UnderlayMulticastIpv6::new(underlay.ip()).is_err(), + "non-admin-local address {underlay} should be rejected" + ); + } + + + +} diff --git a/rdb/src/test.rs b/rdb/src/test.rs index 8e4b27b3..4dba1252 100644 --- a/rdb/src/test.rs +++ b/rdb/src/test.rs @@ -2,6 +2,8 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +// Copyright 2026 Oxide Computer Company + //! Test utilities for rdb tests. use crate::{Db, error::Error}; @@ -9,6 +11,9 @@ use slog::Logger; use std::ops::{Deref, DerefMut}; use std::sync::atomic::{AtomicU64, Ordering}; +/// Default iteration count for rdb wait_for! calls (5 seconds at 1s polling). +pub const TEST_WAIT_ITERATIONS: u64 = 5; + /// A test database wrapper that automatically cleans up the database directory /// when dropped, but only if the test succeeded. /// diff --git a/rdb/src/types.rs b/rdb/src/types.rs index 637428bf..3a68b562 100644 --- a/rdb/src/types.rs +++ b/rdb/src/types.rs @@ -2,9 +2,17 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +// Copyright 2026 Oxide Computer Company + use crate::error::Error; use anyhow::Result; use chrono::{DateTime, Utc}; +use omicron_common::address::{ + IPV4_LINK_LOCAL_MULTICAST_SUBNET, IPV4_MULTICAST_RANGE, IPV4_SSM_SUBNET, + IPV6_INTERFACE_LOCAL_MULTICAST_SUBNET, IPV6_LINK_LOCAL_MULTICAST_SUBNET, + IPV6_MULTICAST_RANGE, IPV6_RESERVED_SCOPE_MULTICAST_SUBNET, + IPV6_SSM_SUBNET, +}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use std::cmp::Ordering; @@ -20,6 +28,14 @@ pub use rdb_types::{ AddressFamily, PeerId, Prefix, Prefix4, Prefix6, ProtocolFilter, }; +// Re-export multicast address constants and types from omicron_common so +// consumers don't need a direct omicron_common dependency. +pub use omicron_common::address::{ + IPV6_ADMIN_SCOPED_MULTICAST_PREFIX, UNDERLAY_MULTICAST_SUBNET, + UNDERLAY_MULTICAST_SUBNET_LAST, +}; +pub use omicron_common::api::external::Vni; + // Marker types for compile-time address family discrimination. // // These zero-sized types enable type-level enforcement of IPv4/IPv6 @@ -380,6 +396,47 @@ impl PrefixDbKey for Prefix6 { } } +/// Extension trait to add `contains` method for checking if a prefix contains +/// an IP address. The base [`Prefix`] type is defined in rdb-types, but this +/// method is specific to RDB's RPF (Reverse Path Forwarding) needs for +/// multicast routing. +pub trait PrefixContains { + /// Check if this prefix contains the given IP address. + /// + /// Performs LPM matching to determine if the address falls + /// within this prefix. Returns `Some(prefix_length)` if the address is + /// contained, `None` otherwise. + fn contains(&self, addr: IpAddr) -> Option; +} + +impl PrefixContains for Prefix { + fn contains(&self, addr: IpAddr) -> Option { + match (self, addr) { + (Prefix::V4(p), IpAddr::V4(a)) => { + let prefix_bits = u32::from(p.value); + let addr_bits = u32::from(a); + let mask = p.mask(); + if (prefix_bits & mask) == (addr_bits & mask) { + Some(p.length) + } else { + None + } + } + (Prefix::V6(p), IpAddr::V6(a)) => { + let prefix_bits = u128::from(p.value); + let addr_bits = u128::from(a); + let mask = p.mask(); + if (prefix_bits & mask) == (addr_bits & mask) { + Some(p.length) + } else { + None + } + } + _ => None, // IPv4 prefix with IPv6 address or vice versa + } + } +} + #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, Serialize, Deserialize)] pub enum Asn { TwoOctet(u16), @@ -408,9 +465,9 @@ impl From for Asn { } impl Asn { - pub fn as_u32(&self) -> u32 { + pub const fn as_u32(&self) -> u32 { match self { - Self::TwoOctet(value) => u32::from(*value), + Self::TwoOctet(value) => *value as u32, Self::FourOctet(value) => *value, } } @@ -733,6 +790,937 @@ impl Display for PrefixChangeNotification { } } +// ============================================================================ +// MRIB (Multicast RIB) Types +// ============================================================================ + +/// Default VNI for fleet-wide multicast routing. +pub const DEFAULT_MULTICAST_VNI: Vni = Vni::DEFAULT_MULTICAST_VNI; + +/// A validated IPv4 unicast address suitable for multicast source fields. +/// +/// This rejects addresses that cannot appear as a forwarded unicast source: +/// multicast, broadcast, loopback, unspecified, link-local, "this +/// network" (0/8), and Class E reserved (240/4). Private ranges +/// (RFC 1918) are allowed since overlay guests use them. +#[derive( + Debug, + Copy, + Clone, + Eq, + PartialEq, + PartialOrd, + Ord, + Hash, + Serialize, + Deserialize, + JsonSchema, +)] +#[serde(try_from = "Ipv4Addr", into = "Ipv4Addr")] +#[schemars(transparent)] +pub struct UnicastAddrV4(Ipv4Addr); + +impl UnicastAddrV4 { + /// Create a new validated IPv4 unicast address. + pub fn new(value: Ipv4Addr) -> Result { + if value.is_multicast() { + return Err(Error::Validation(format!( + "{value} is multicast, not unicast" + ))); + } + if value.is_broadcast() { + return Err(Error::Validation(format!( + "{value} is broadcast, not unicast" + ))); + } + if value.is_loopback() { + return Err(Error::Validation(format!( + "{value} is loopback, not a valid source" + ))); + } + // 0/8 "this network" per RFC 791 + if value.is_unspecified() || value.octets()[0] == 0 { + return Err(Error::Validation(format!( + "{value} is in 0/8 (this-network), not a valid source" + ))); + } + // 169.254/16 per RFC 3927 Section 7: not forwarded by routers + if value.is_link_local() { + return Err(Error::Validation(format!( + "{value} is link-local, not routable" + ))); + } + // Class E reserved (240/4) per RFC 1112 Section 4. + // Replace with Ipv4Addr::is_reserved() when stabilized. + if value.octets()[0] >= 240 { + return Err(Error::Validation(format!( + "{value} is in the reserved Class E range (240/4)" + ))); + } + Ok(Self(value)) + } + + #[inline] + pub const fn ip(&self) -> Ipv4Addr { + self.0 + } +} + +impl fmt::Display for UnicastAddrV4 { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +impl TryFrom for UnicastAddrV4 { + type Error = Error; + fn try_from(value: Ipv4Addr) -> Result { + Self::new(value) + } +} + +impl From for Ipv4Addr { + fn from(addr: UnicastAddrV4) -> Self { + addr.0 + } +} + +/// A validated IPv6 unicast address suitable for multicast source fields. +/// +/// Rejects multicast, loopback, unspecified, and link-local (fe80::/10). +/// ULA (fc00::/7) is allowed since overlay guests may use these ranges. +#[derive( + Debug, + Copy, + Clone, + Eq, + PartialEq, + PartialOrd, + Ord, + Hash, + Serialize, + Deserialize, + JsonSchema, +)] +#[serde(try_from = "Ipv6Addr", into = "Ipv6Addr")] +#[schemars(transparent)] +pub struct UnicastAddrV6(Ipv6Addr); + +impl UnicastAddrV6 { + /// Create a new validated IPv6 unicast address. + pub fn new(value: Ipv6Addr) -> Result { + if value.is_multicast() { + return Err(Error::Validation(format!( + "{value} is multicast, not unicast" + ))); + } + if value.is_loopback() { + return Err(Error::Validation(format!( + "{value} is loopback, not a valid source" + ))); + } + if value.is_unspecified() { + return Err(Error::Validation(format!( + "{value} is unspecified, not a valid source" + ))); + } + // fe80::/10 per RFC 4291 Section 2.5.6: not forwarded + if value.is_unicast_link_local() { + return Err(Error::Validation(format!( + "{value} is link-local, not routable" + ))); + } + Ok(Self(value)) + } + + #[inline] + pub const fn ip(&self) -> Ipv6Addr { + self.0 + } +} + +impl fmt::Display for UnicastAddrV6 { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +impl TryFrom for UnicastAddrV6 { + type Error = Error; + fn try_from(value: Ipv6Addr) -> Result { + Self::new(value) + } +} + +impl From for Ipv6Addr { + fn from(addr: UnicastAddrV6) -> Self { + addr.0 + } +} + +/// A validated IPv4 multicast address. +/// +/// This type guarantees that the inner address is a routable multicast address +/// (not link-local). +#[derive( + Debug, + Copy, + Clone, + Eq, + PartialEq, + PartialOrd, + Ord, + Serialize, + Deserialize, + JsonSchema, +)] +#[serde(try_from = "Ipv4Addr", into = "Ipv4Addr")] +#[schemars(transparent)] +pub struct MulticastAddrV4(Ipv4Addr); + +impl MulticastAddrV4 { + /// Create a new validated IPv4 multicast address. + pub fn new(value: Ipv4Addr) -> Result { + // Must be in multicast range (224.0.0.0/4) + if !IPV4_MULTICAST_RANGE.contains(value) { + return Err(Error::Validation(format!( + "IPv4 address {value} is not multicast \ + (must be in {IPV4_MULTICAST_RANGE})" + ))); + } + + // Reject link-local multicast (224.0.0.0/24) + if IPV4_LINK_LOCAL_MULTICAST_SUBNET.contains(value) { + return Err(Error::Validation(format!( + "IPv4 address {value} is link-local multicast \ + ({IPV4_LINK_LOCAL_MULTICAST_SUBNET}) which is not routable" + ))); + } + + Ok(Self(value)) + } + + /// Returns the underlying IPv4 address. + #[inline] + pub const fn ip(&self) -> Ipv4Addr { + self.0 + } +} + +impl fmt::Display for MulticastAddrV4 { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +impl TryFrom for MulticastAddrV4 { + type Error = Error; + + fn try_from(value: Ipv4Addr) -> Result { + Self::new(value) + } +} + +impl From for Ipv4Addr { + fn from(addr: MulticastAddrV4) -> Self { + addr.0 + } +} + +/// A validated IPv6 multicast address. +/// +/// This type guarantees that the inner address is a routable multicast address +/// (not interface-local, link-local, or reserved scope). +#[derive( + Debug, + Copy, + Clone, + Eq, + PartialEq, + PartialOrd, + Ord, + Serialize, + Deserialize, + JsonSchema, +)] +#[serde(try_from = "Ipv6Addr", into = "Ipv6Addr")] +#[schemars(transparent)] +pub struct MulticastAddrV6(Ipv6Addr); + +impl MulticastAddrV6 { + /// Create a new validated IPv6 multicast address. + pub fn new(value: Ipv6Addr) -> Result { + // Must be in multicast range (ff00::/8) + if !IPV6_MULTICAST_RANGE.contains(value) { + return Err(Error::Validation(format!( + "IPv6 address {value} is not multicast \ + (must be in {IPV6_MULTICAST_RANGE})" + ))); + } + + // Reject reserved scope (ff00::/16) (reserved, not usable) + if IPV6_RESERVED_SCOPE_MULTICAST_SUBNET.contains(value) { + return Err(Error::Validation(format!( + "IPv6 address {value} is in reserved scope \ + ({IPV6_RESERVED_SCOPE_MULTICAST_SUBNET}) which is not routable" + ))); + } + + // Reject interface-local multicast (ff01::/16) + if IPV6_INTERFACE_LOCAL_MULTICAST_SUBNET.contains(value) { + return Err(Error::Validation(format!( + "IPv6 address {value} is interface-local multicast \ + ({IPV6_INTERFACE_LOCAL_MULTICAST_SUBNET}) which is not routable" + ))); + } + + // Reject link-local multicast (ff02::/16) + if IPV6_LINK_LOCAL_MULTICAST_SUBNET.contains(value) { + return Err(Error::Validation(format!( + "IPv6 address {value} is link-local multicast \ + ({IPV6_LINK_LOCAL_MULTICAST_SUBNET}) which is not routable" + ))); + } + + Ok(Self(value)) + } + + /// Returns the underlying IPv6 address. + #[inline] + pub const fn ip(&self) -> Ipv6Addr { + self.0 + } +} + +impl fmt::Display for MulticastAddrV6 { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +impl TryFrom for MulticastAddrV6 { + type Error = Error; + + fn try_from(value: Ipv6Addr) -> Result { + Self::new(value) + } +} + +impl From for Ipv6Addr { + fn from(addr: MulticastAddrV6) -> Self { + addr.0 + } +} + +/// A validated underlay multicast IPv6 address within ff04::/64. +/// +/// The Oxide rack maps overlay multicast groups 1:1 to admin-local scoped +/// IPv6 multicast addresses in `UNDERLAY_MULTICAST_SUBNET` (ff04::/64). +/// This type enforces that invariant at construction time. +/// +// TODO: This duplicates `dpd_types::mcast::UnderlayMulticastIpv6` in dendrite. +// Both should be consolidated into `omicron_common` so maghemite, dendrite, +// and omicron share a single definition. +#[derive( + Debug, + Copy, + Clone, + Eq, + PartialEq, + PartialOrd, + Ord, + Hash, + Serialize, + Deserialize, + JsonSchema, +)] +#[serde(try_from = "Ipv6Addr", into = "Ipv6Addr")] +#[schemars(transparent)] +pub struct UnderlayMulticastIpv6(Ipv6Addr); + +impl UnderlayMulticastIpv6 { + /// Create a new validated underlay multicast address. + /// + /// # Errors + /// + /// Returns an error if the address is not within `UNDERLAY_MULTICAST_SUBNET` + /// (ff04::/64). + pub fn new(value: Ipv6Addr) -> Result { + if !UNDERLAY_MULTICAST_SUBNET.contains(value) { + return Err(Error::Validation(format!( + "underlay address {value} is not within \ + {UNDERLAY_MULTICAST_SUBNET}" + ))); + } + Ok(Self(value)) + } + + /// Returns the underlying IPv6 address. + #[inline] + pub const fn ip(&self) -> Ipv6Addr { + self.0 + } +} + +impl fmt::Display for UnderlayMulticastIpv6 { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +impl TryFrom for UnderlayMulticastIpv6 { + type Error = Error; + + fn try_from(value: Ipv6Addr) -> Result { + Self::new(value) + } +} + +impl From for Ipv6Addr { + fn from(addr: UnderlayMulticastIpv6) -> Self { + addr.0 + } +} + +impl From for IpAddr { + fn from(addr: UnderlayMulticastIpv6) -> Self { + IpAddr::V6(addr.0) + } +} + +impl FromStr for UnderlayMulticastIpv6 { + type Err = Error; + + fn from_str(s: &str) -> Result { + let addr: Ipv6Addr = s.parse().map_err(|_| { + Error::Validation(format!("invalid IPv6 address: {s}")) + })?; + Self::new(addr) + } +} + +/// A validated multicast group address (IPv4 or IPv6). +/// +/// This type guarantees that the contained address is a routable multicast +/// address. Construction is only possible through validated paths. +#[derive( + Debug, + Copy, + Clone, + Eq, + PartialEq, + PartialOrd, + Ord, + Serialize, + Deserialize, + JsonSchema, +)] +pub enum MulticastAddr { + V4(MulticastAddrV4), + V6(MulticastAddrV6), +} + +impl MulticastAddr { + /// Create an IPv4 multicast address from octets. + pub fn new_v4(a: u8, b: u8, c: u8, d: u8) -> Result { + Ok(Self::V4(MulticastAddrV4::new(Ipv4Addr::new(a, b, c, d))?)) + } + + /// Create an IPv6 multicast address from segments. + pub fn new_v6(segments: [u16; 8]) -> Result { + Ok(Self::V6(MulticastAddrV6::new(Ipv6Addr::new( + segments[0], + segments[1], + segments[2], + segments[3], + segments[4], + segments[5], + segments[6], + segments[7], + ))?)) + } + + /// Returns the underlying IP address. + pub const fn ip(&self) -> IpAddr { + match self { + Self::V4(v4) => IpAddr::V4(v4.ip()), + Self::V6(v6) => IpAddr::V6(v6.ip()), + } + } +} + +impl fmt::Display for MulticastAddr { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + match self { + MulticastAddr::V4(addr) => write!(f, "{}", addr), + MulticastAddr::V6(addr) => write!(f, "{}", addr), + } + } +} + +impl From for MulticastAddr { + fn from(addr: MulticastAddrV4) -> Self { + Self::V4(addr) + } +} + +impl From for MulticastAddr { + fn from(addr: MulticastAddrV6) -> Self { + Self::V6(addr) + } +} + +impl TryFrom for MulticastAddr { + type Error = Error; + + fn try_from(value: Ipv4Addr) -> Result { + Ok(Self::V4(MulticastAddrV4::new(value)?)) + } +} + +impl TryFrom for MulticastAddr { + type Error = Error; + + fn try_from(value: Ipv6Addr) -> Result { + Ok(Self::V6(MulticastAddrV6::new(value)?)) + } +} + +impl TryFrom for MulticastAddr { + type Error = Error; + + fn try_from(value: IpAddr) -> Result { + match value { + IpAddr::V4(v4) => Self::try_from(v4), + IpAddr::V6(v6) => Self::try_from(v6), + } + } +} + +/// IPv4 multicast route key with type-enforced address family matching. +#[derive( + Debug, + Copy, + Clone, + Eq, + PartialEq, + PartialOrd, + Ord, + Serialize, + Deserialize, + JsonSchema, +)] +pub struct MulticastRouteKeyV4 { + /// Source address (`None` for (*,G) routes). + pub(crate) source: Option, + /// Multicast group address. + pub(crate) group: MulticastAddrV4, + /// VNI (Virtual Network Identifier). + #[serde(default = "default_multicast_vni")] + pub(crate) vni: Vni, +} + +/// IPv6 multicast route key with type-enforced address family matching. +#[derive( + Debug, + Copy, + Clone, + Eq, + PartialEq, + PartialOrd, + Ord, + Serialize, + Deserialize, + JsonSchema, +)] +pub struct MulticastRouteKeyV6 { + /// Source address (`None` for (*,G) routes). + pub(crate) source: Option, + /// Multicast group address. + pub(crate) group: MulticastAddrV6, + /// VNI (Virtual Network Identifier). + #[serde(default = "default_multicast_vni")] + pub(crate) vni: Vni, +} + +/// Multicast route key: (Source, Group) pair for source-specific multicast, +/// or (*, Group) for any-source multicast. +/// +/// Uses type-enforced address family matching: IPv4 sources can only be +/// paired with IPv4 groups, and IPv6 sources with IPv6 groups. +#[derive( + Debug, + Copy, + Clone, + Eq, + PartialEq, + PartialOrd, + Ord, + Serialize, + Deserialize, + JsonSchema, +)] +pub enum MulticastRouteKey { + V4(MulticastRouteKeyV4), + V6(MulticastRouteKeyV6), +} + +const fn default_multicast_vni() -> Vni { + Vni::DEFAULT_MULTICAST_VNI +} + +impl fmt::Display for MulticastRouteKey { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + match self { + Self::V4(key) => match key.source { + Some(src) => write!(f, "({src},{})", key.group), + None => write!(f, "(*,{})", key.group), + }, + Self::V6(key) => match key.source { + Some(src) => write!(f, "({src},{})", key.group), + None => write!(f, "(*,{})", key.group), + }, + } + } +} + +impl MulticastRouteKey { + /// Create a multicast route key, validating address family matching. + /// + /// Use this when the address family is not known at compile time (e.g., + /// from API requests). Returns an error if source and group address + /// families don't match. For compile-time type safety, prefer + /// [`Self::source_specific_v4`]/[`Self::source_specific_v6`] or + /// [`Self::any_source`]. + pub fn new( + source: Option, + group: MulticastAddr, + vni: Vni, + ) -> Result { + match group { + MulticastAddr::V4(g) => { + let src = match source { + None => None, + Some(IpAddr::V4(s)) => Some(UnicastAddrV4::new(s)?), + Some(IpAddr::V6(s)) => { + return Err(Error::Validation(format!( + "source {s} is IPv6 but group {g} is IPv4" + ))); + } + }; + Ok(Self::V4(MulticastRouteKeyV4 { + source: src, + group: g, + vni, + })) + } + MulticastAddr::V6(g) => { + let src = match source { + None => None, + Some(IpAddr::V6(s)) => Some(UnicastAddrV6::new(s)?), + Some(IpAddr::V4(s)) => { + return Err(Error::Validation(format!( + "source {s} is IPv4 but group {g} is IPv6" + ))); + } + }; + Ok(Self::V6(MulticastRouteKeyV6 { + source: src, + group: g, + vni, + })) + } + } + } + + /// Create an any-source multicast route (*,G) with default VNI. + pub fn any_source(group: MulticastAddr) -> Self { + match group { + MulticastAddr::V4(g) => Self::V4(MulticastRouteKeyV4 { + source: None, + group: g, + vni: Vni::DEFAULT_MULTICAST_VNI, + }), + MulticastAddr::V6(g) => Self::V6(MulticastRouteKeyV6 { + source: None, + group: g, + vni: Vni::DEFAULT_MULTICAST_VNI, + }), + } + } + + /// Create a source-specific IPv4 multicast route (S,G) with default VNI. + pub fn source_specific_v4( + source: UnicastAddrV4, + group: MulticastAddrV4, + ) -> Self { + Self::V4(MulticastRouteKeyV4 { + source: Some(source), + group, + vni: Vni::DEFAULT_MULTICAST_VNI, + }) + } + + /// Create a source-specific IPv6 multicast route (S,G) with default VNI. + pub fn source_specific_v6( + source: UnicastAddrV6, + group: MulticastAddrV6, + ) -> Self { + Self::V6(MulticastRouteKeyV6 { + source: Some(source), + group, + vni: Vni::DEFAULT_MULTICAST_VNI, + }) + } + + /// Create an any-source multicast route (*,G) with specified VNI. + pub fn any_source_with_vni(group: MulticastAddr, vni: Vni) -> Self { + match group { + MulticastAddr::V4(g) => Self::V4(MulticastRouteKeyV4 { + source: None, + group: g, + vni, + }), + MulticastAddr::V6(g) => Self::V6(MulticastRouteKeyV6 { + source: None, + group: g, + vni, + }), + } + } + + /// Create a source-specific IPv4 multicast route (S,G) with VNI. + pub fn source_specific_v4_with_vni( + source: UnicastAddrV4, + group: MulticastAddrV4, + vni: Vni, + ) -> Self { + Self::V4(MulticastRouteKeyV4 { + source: Some(source), + group, + vni, + }) + } + + /// Create a source-specific IPv6 multicast route (S,G) with VNI. + pub fn source_specific_v6_with_vni( + source: UnicastAddrV6, + group: MulticastAddrV6, + vni: Vni, + ) -> Self { + Self::V6(MulticastRouteKeyV6 { + source: Some(source), + group, + vni, + }) + } + + /// Get the source address as IpAddr. + pub fn source(&self) -> Option { + match self { + Self::V4(k) => k.source.map(|s| IpAddr::V4(s.ip())), + Self::V6(k) => k.source.map(|s| IpAddr::V6(s.ip())), + } + } + + /// Get the group address. + pub const fn group(&self) -> MulticastAddr { + match self { + Self::V4(k) => MulticastAddr::V4(k.group), + Self::V6(k) => MulticastAddr::V6(k.group), + } + } + + /// Get the VNI. + pub const fn vni(&self) -> Vni { + match self { + Self::V4(k) => k.vni, + Self::V6(k) => k.vni, + } + } + + /// Serialize this key to bytes for use as a sled database key. + pub fn db_key(&self) -> Result, Error> { + let s = serde_json::to_string(self).map_err(|e| { + Error::Parsing(format!( + "failed to serialize multicast route key: {e}" + )) + })?; + Ok(s.as_bytes().into()) + } + + /// Deserialize a key from sled database bytes. + pub fn from_db_key(v: &[u8]) -> Result { + let s = String::from_utf8_lossy(v); + serde_json::from_str(&s).map_err(|e| { + Error::DbKey(format!("failed to parse multicast route key: {e}")) + }) + } + + /// Validate the multicast route key. + /// + /// Checks: + /// - SSM groups require a source address (RFC 4607) + /// - IPv4: 232.0.0.0/8 + /// - IPv6: ff30::/12 (superset covering all ff3x:: scopes) + /// - Source address (if present) must be unicast + /// - (S,G) joins on ASM ranges are permitted, giving source + /// filtering outside the SSM range (IGMPv3/MLDv2 semantics) + /// + /// VNI validity is enforced by the [`Vni`] type at construction. + pub fn validate(&self) -> Result<(), Error> { + // SSM addresses require a source (RFC 4607). This is consistent with + // DPD's validate_ipv4_multicast / validate_ipv6_multicast. + // + // ASM addresses can also have sources, allowing (S,G) joins on + // ASM ranges for source filtering outside the SSM range. + // + // If real-world deployments need (*,G) on SSM addresses, this + // check and the corresponding DPD validation can be relaxed + // together and we can update our policy handling. + let is_ssm = match self { + Self::V4(k) => IPV4_SSM_SUBNET.contains(k.group.ip()), + Self::V6(k) => IPV6_SSM_SUBNET.contains(k.group.ip()), + }; + if is_ssm && self.source().is_none() { + return Err(Error::Validation(format!( + "SSM group {} requires a source address", + self.group() + ))); + } + + Ok(()) + } +} + +/// Multicast route entry containing replication groups and metadata. +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +pub struct MulticastRoute { + /// The multicast route key (S,G) or (*,G). + pub key: MulticastRouteKey, + /// Expected RPF neighbor for the source (for RPF checks). + pub rpf_neighbor: Option, + /// Underlay multicast group address (ff04::/64). + /// + /// Overlay multicast addresses are mapped 1:1 to admin-local scope + /// underlay addresses. Switches replicate to this address via the + /// PRE (with tofino_asic). + /// + /// OPTE handles the overlay/underlay translation at sled boundaries, while + /// sled membership is managed by Omicron and programmed to DPD/OPTE + /// directly. + pub underlay_group: UnderlayMulticastIpv6, + /// Route source (static, IGMP, etc.). + pub source: MulticastSourceProtocol, + /// Creation timestamp. + pub created: DateTime, + /// Last updated timestamp. + /// + /// Only updated when route fields change semantically (rpf_neighbor, + /// underlay_group, source). An idempotent upsert with an identical + /// value does not update this timestamp. + pub updated: DateTime, +} + +impl MulticastRoute { + pub fn new( + key: MulticastRouteKey, + underlay_group: UnderlayMulticastIpv6, + source: MulticastSourceProtocol, + ) -> Self { + let now = Utc::now(); + Self { + key, + rpf_neighbor: None, + underlay_group, + source, + created: now, + updated: now, + } + } + + /// Validate the multicast route. + /// + /// Checks: + /// - Key validation (source unicast, AF match, VNI range) + /// - RPF neighbor (if present) must be unicast + /// - RPF neighbor address family must match group address family + pub fn validate(&self) -> Result<(), Error> { + self.key.validate()?; + + // underlay_group is validated by UnderlayMulticastIpv6 at + // construction time (must be within ff04::/64). + + // Validate RPF neighbor if present + if let Some(rpf) = &self.rpf_neighbor { + match rpf { + IpAddr::V4(addr) => { + if addr.is_multicast() { + return Err(Error::Validation(format!( + "RPF neighbor {addr} must be unicast, not multicast" + ))); + } + if addr.is_broadcast() { + return Err(Error::Validation(format!( + "RPF neighbor {addr} must be unicast, not broadcast" + ))); + } + // Address family must match group + if !matches!(self.key.group(), MulticastAddr::V4(_)) { + return Err(Error::Validation(format!( + "RPF neighbor {addr} is IPv4 but group {} is IPv6", + self.key.group() + ))); + } + } + IpAddr::V6(addr) => { + if addr.is_multicast() { + return Err(Error::Validation(format!( + "RPF neighbor {addr} must be unicast, not multicast" + ))); + } + // AF must match group + if !matches!(self.key.group(), MulticastAddr::V6(_)) { + return Err(Error::Validation(format!( + "RPF neighbor {addr} is IPv6 but group {} is IPv4", + self.key.group() + ))); + } + } + } + } + + Ok(()) + } +} + +/// Source of a multicast route entry. +#[derive( + Debug, Copy, Clone, Serialize, Deserialize, JsonSchema, Eq, PartialEq, +)] +pub enum MulticastSourceProtocol { + /// Static route configured via API. + Static, + /// Learned via IGMP snooping (future). + Igmp, + /// Learned via MLD snooping (future). + Mld, +} + +/// Notification for MRIB changes, sent to watchers. +#[derive(Clone, Default, Debug)] +pub struct MribChangeNotification { + pub changed: BTreeSet, +} + +impl From for MribChangeNotification { + fn from(value: MulticastRouteKey) -> Self { + Self { + changed: BTreeSet::from([value]), + } + } +} + #[cfg(test)] pub mod test_helpers { use super::Path; @@ -775,6 +1763,18 @@ mod test { cmp::Ordering, collections::BTreeSet, net::IpAddr, str::FromStr, }; + /// ASM IPv4 group suitable for (*,G) tests. + const TEST_GROUP_V4: Ipv4Addr = Ipv4Addr::new(239, 1, 1, 1); + + /// ASM IPv6 group suitable for (*,G) tests. + const TEST_GROUP_V6: Ipv6Addr = Ipv6Addr::new(0xff0e, 0, 0, 0, 0, 0, 0, 1); + + /// Test underlay address within ff04::/64. + fn test_underlay() -> UnderlayMulticastIpv6 { + UnderlayMulticastIpv6::new(Ipv6Addr::new(0xff04, 0, 0, 0, 0, 0, 0, 1)) + .expect("valid test underlay address") + } + fn bgp_path( nexthop: IpAddr, peer: PeerId, @@ -985,6 +1985,188 @@ mod test { assert_eq!(set.iter().next().unwrap().bgp.as_ref().unwrap().med, None,); } + #[test] + fn broadcast_source_rejected() { + assert!( + UnicastAddrV4::new(Ipv4Addr::BROADCAST).is_err(), + "broadcast should be rejected as unicast source" + ); + } + + #[test] + fn loopback_source_rejected_v4() { + assert!( + UnicastAddrV4::new(Ipv4Addr::LOCALHOST).is_err(), + "loopback should be rejected as unicast source" + ); + } + + #[test] + fn loopback_source_rejected_v6() { + assert!( + UnicastAddrV6::new(Ipv6Addr::LOCALHOST).is_err(), + "loopback should be rejected as unicast source" + ); + } + + #[test] + fn unspecified_source_rejected_v4() { + assert!( + UnicastAddrV4::new(Ipv4Addr::UNSPECIFIED).is_err(), + "unspecified should be rejected as unicast source" + ); + } + + #[test] + fn unspecified_source_rejected_v6() { + assert!( + UnicastAddrV6::new(Ipv6Addr::UNSPECIFIED).is_err(), + "unspecified should be rejected as unicast source" + ); + } + + #[test] + fn route_key_af_mismatch_v4_source_v6_group() { + let src = UnicastAddrV4::new(Ipv4Addr::new(10, 0, 0, 1)).unwrap(); + let group = + MulticastAddrV6::new(Ipv6Addr::new(0xff3e, 0, 0, 0, 0, 0, 0, 1)) + .unwrap(); + let result = MulticastRouteKey::new( + Some(IpAddr::V4(src.ip())), + group.into(), + DEFAULT_MULTICAST_VNI, + ); + assert!( + result.is_err(), + "v4 source with v6 group should be rejected" + ); + } + + #[test] + fn route_key_af_mismatch_v6_source_v4_group() { + let src = Ipv6Addr::new(0x2001, 0xdb8, 0, 0, 0, 0, 0, 1); + let group = MulticastAddrV4::new(TEST_GROUP_V4).unwrap(); + let result = MulticastRouteKey::new( + Some(IpAddr::V6(src)), + group.into(), + DEFAULT_MULTICAST_VNI, + ); + assert!( + result.is_err(), + "v6 source with v4 group should be rejected" + ); + } + + #[test] + fn multicast_source_rejected_v4() { + assert!( + UnicastAddrV4::new(Ipv4Addr::new(224, 0, 0, 1)).is_err(), + "multicast address should be rejected as source" + ); + } + + #[test] + fn multicast_source_rejected_v6() { + assert!( + UnicastAddrV6::new(Ipv6Addr::new(0xff02, 0, 0, 0, 0, 0, 0, 1)) + .is_err(), + "multicast address should be rejected as source" + ); + } + + #[test] + fn unicast_rpf_valid_v4() { + let group = MulticastAddrV4::new(TEST_GROUP_V4).unwrap(); + let key = MulticastRouteKey::any_source(group.into()); + let mut route = MulticastRoute::new( + key, + test_underlay(), + MulticastSourceProtocol::Static, + ); + route.rpf_neighbor = Some(IpAddr::V4(Ipv4Addr::new(10, 0, 0, 1))); + assert!(route.validate().is_ok(), "unicast v4 RPF should be valid"); + } + + #[test] + fn unicast_rpf_valid_v6() { + let group = MulticastAddrV6::new(TEST_GROUP_V6).unwrap(); + let key = MulticastRouteKey::any_source(group.into()); + let mut route = MulticastRoute::new( + key, + test_underlay(), + MulticastSourceProtocol::Static, + ); + route.rpf_neighbor = + Some(IpAddr::V6(Ipv6Addr::new(0x2001, 0xdb8, 0, 0, 0, 0, 0, 1))); + assert!(route.validate().is_ok(), "unicast v6 RPF should be valid"); + } + + #[test] + fn multicast_rpf_invalid_v4() { + let group = MulticastAddrV4::new(TEST_GROUP_V4).unwrap(); + let key = MulticastRouteKey::any_source(group.into()); + let mut route = MulticastRoute::new( + key, + test_underlay(), + MulticastSourceProtocol::Static, + ); + route.rpf_neighbor = Some(IpAddr::V4(Ipv4Addr::new(224, 0, 0, 1))); + assert!( + route.validate().is_err(), + "multicast RPF should be rejected" + ); + } + + #[test] + fn multicast_rpf_invalid_v6() { + let group = MulticastAddrV6::new(TEST_GROUP_V6).unwrap(); + let key = MulticastRouteKey::any_source(group.into()); + let mut route = MulticastRoute::new( + key, + test_underlay(), + MulticastSourceProtocol::Static, + ); + route.rpf_neighbor = + Some(IpAddr::V6(Ipv6Addr::new(0xff02, 0, 0, 0, 0, 0, 0, 1))); + assert!( + route.validate().is_err(), + "multicast RPF should be rejected" + ); + } + + #[test] + fn rpf_af_mismatch_v4_rpf_v6_group() { + let group = MulticastAddrV6::new(TEST_GROUP_V6).unwrap(); + let key = MulticastRouteKey::any_source(group.into()); + let mut route = MulticastRoute::new( + key, + test_underlay(), + MulticastSourceProtocol::Static, + ); + route.rpf_neighbor = Some(IpAddr::V4(Ipv4Addr::new(10, 0, 0, 1))); + assert!( + route.validate().is_err(), + "v4 RPF with v6 group should be rejected" + ); + } + + #[test] + fn rpf_af_mismatch_v6_rpf_v4_group() { + let group = MulticastAddrV4::new(TEST_GROUP_V4).unwrap(); + let key = MulticastRouteKey::any_source(group.into()); + let mut route = MulticastRoute::new( + key, + test_underlay(), + MulticastSourceProtocol::Static, + ); + route.rpf_neighbor = + Some(IpAddr::V6(Ipv6Addr::new(0x2001, 0xdb8, 0, 0, 0, 0, 0, 1))); + assert!( + route.validate().is_err(), + "v6 RPF with v4 group should be rejected" + ); + } + /// remove() targets the correct path by identity, not by /// attribute values. #[test]