From 17e1c3b17c1a04c42d3bf612605db446bc90ed45 Mon Sep 17 00:00:00 2001 From: Trey Aspelund Date: Fri, 27 Mar 2026 17:23:29 -0600 Subject: [PATCH 1/4] ddm: cleanup peer tracking and add state durations Db.peers was only used as a cache for reporting peers to ddm-api clients calling GET /peers, and the contents of the cache were a bit misleading. For example, there is no such thing as a peer state in DDM. DDM FSMs have states, but they are instantiated per interface not per peer. Not only does the state reported by GET /peers not actually map to a peer, but it doesn't even directly map to the FSM of the peer's interface. Instead, that state was populated by manual db updates scattered throughout the FSM implementation. This removes Db.peers and introduces a new shared InterfaceState struct that consolidates peer/FSM info for a given interface. GET /peers now reads from each InterfaceState and passes along any peers it finds along with interface state. Also fixes a pre-existing bug in oxstats where interface names were always empty due to reading from a stale SmContext clone. --- ddm-admin-client/src/lib.rs | 5 +- ddm-api/src/lib.rs | 14 ++- ddm-types/versions/src/latest.rs | 5 +- ddm-types/versions/src/lib.rs | 2 + ddm-types/versions/src/peer_durations/db.rs | 48 +++++++++ ddm-types/versions/src/peer_durations/mod.rs | 10 ++ ddm/src/admin.rs | 33 +++++- ddm/src/db.rs | 23 +--- ddm/src/discovery.rs | 42 +++++--- ddm/src/oxstats.rs | 29 ++--- ddm/src/sm.rs | 71 +++++++++++- ddmadm/src/main.rs | 24 ++++- ddmd/src/main.rs | 3 +- .../ddm-admin-1.0.0-b6eac7.json.gitstub | 1 + ...6eac7.json => ddm-admin-2.0.0-45d40c.json} | 101 ++++++++++++++++-- openapi/ddm-admin/ddm-admin-latest.json | 2 +- 16 files changed, 339 insertions(+), 74 deletions(-) create mode 100644 ddm-types/versions/src/peer_durations/db.rs create mode 100644 ddm-types/versions/src/peer_durations/mod.rs create mode 100644 openapi/ddm-admin/ddm-admin-1.0.0-b6eac7.json.gitstub rename openapi/ddm-admin/{ddm-admin-1.0.0-b6eac7.json => ddm-admin-2.0.0-45d40c.json} (85%) diff --git a/ddm-admin-client/src/lib.rs b/ddm-admin-client/src/lib.rs index ea45cea2..9659d16a 100644 --- a/ddm-admin-client/src/lib.rs +++ b/ddm-admin-client/src/lib.rs @@ -17,7 +17,10 @@ progenitor::generate_api!( }, post_hook = (|log: &slog::Logger, result: &Result<_, _>| { slog::trace!(log, "client response"; "result" => ?result); - }) + }), + replace = { + Duration = std::time::Duration, + } ); impl std::cmp::PartialEq for types::TunnelOrigin { diff --git a/ddm-api/src/lib.rs b/ddm-api/src/lib.rs index 546797dc..7156f2d8 100644 --- a/ddm-api/src/lib.rs +++ b/ddm-api/src/lib.rs @@ -3,6 +3,7 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use ddm_types_versions::latest; +use ddm_types_versions::v1; use dropshot::HttpError; use dropshot::HttpResponseOk; use dropshot::HttpResponseUpdatedNoContent; @@ -26,6 +27,7 @@ api_versions!([ // | example for the next person. // v // (next_int, IDENT), + (2, PEER_DURATIONS), (1, INITIAL), ]); @@ -45,11 +47,21 @@ api_versions!([ pub trait DdmAdminApi { type Context; - #[endpoint { method = GET, path = "/peers" }] + #[endpoint { method = GET, path = "/peers", versions = VERSION_PEER_DURATIONS.. }] async fn get_peers( ctx: RequestContext, ) -> Result>, HttpError>; + #[endpoint { method = GET, path = "/peers", versions = ..VERSION_PEER_DURATIONS }] + async fn get_peers_v1( + ctx: RequestContext, + ) -> Result>, HttpError> { + let resp = Self::get_peers(ctx).await?; + let converted: HashMap = + resp.0.into_iter().map(|(k, v)| (k, v.into())).collect(); + Ok(HttpResponseOk(converted)) + } + #[endpoint { method = DELETE, path = "/peers/{addr}" }] async fn expire_peer( ctx: RequestContext, diff --git a/ddm-types/versions/src/latest.rs b/ddm-types/versions/src/latest.rs index 08057601..2d506f92 100644 --- a/ddm-types/versions/src/latest.rs +++ b/ddm-types/versions/src/latest.rs @@ -11,10 +11,11 @@ pub mod admin { } pub mod db { - pub use crate::v1::db::PeerInfo; - pub use crate::v1::db::PeerStatus; pub use crate::v1::db::RouterKind; pub use crate::v1::db::TunnelRoute; + + pub use crate::v2::db::PeerInfo; + pub use crate::v2::db::PeerStatus; } pub mod exchange { diff --git a/ddm-types/versions/src/lib.rs b/ddm-types/versions/src/lib.rs index 9f8dcdc7..7d88b927 100644 --- a/ddm-types/versions/src/lib.rs +++ b/ddm-types/versions/src/lib.rs @@ -32,3 +32,5 @@ pub mod latest; #[path = "initial/mod.rs"] pub mod v1; +#[path = "peer_durations/mod.rs"] +pub mod v2; diff --git a/ddm-types/versions/src/peer_durations/db.rs b/ddm-types/versions/src/peer_durations/db.rs new file mode 100644 index 00000000..08046019 --- /dev/null +++ b/ddm-types/versions/src/peer_durations/db.rs @@ -0,0 +1,48 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::net::Ipv6Addr; +use std::time::Duration; + +use crate::v1::db::RouterKind; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +/// Status of a DDM peer, including how long the peer has been in its +/// current state. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)] +#[serde(tag = "type", content = "duration")] +pub enum PeerStatus { + Init(Duration), + Solicit(Duration), + Exchange(Duration), + Expired(Duration), +} + +/// Information about a DDM peer. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)] +pub struct PeerInfo { + pub status: PeerStatus, + pub addr: Ipv6Addr, + pub host: String, + pub kind: RouterKind, +} + +// Response backwards-compat: convert v2 PeerInfo to v1 PeerInfo. +impl From for crate::v1::db::PeerInfo { + fn from(value: PeerInfo) -> Self { + Self { + status: match value.status { + PeerStatus::Init(_) | PeerStatus::Solicit(_) => { + crate::v1::db::PeerStatus::NoContact + } + PeerStatus::Exchange(_) => crate::v1::db::PeerStatus::Active, + PeerStatus::Expired(_) => crate::v1::db::PeerStatus::Expired, + }, + addr: value.addr, + host: value.host, + kind: value.kind, + } + } +} diff --git a/ddm-types/versions/src/peer_durations/mod.rs b/ddm-types/versions/src/peer_durations/mod.rs new file mode 100644 index 00000000..7a9776a9 --- /dev/null +++ b/ddm-types/versions/src/peer_durations/mod.rs @@ -0,0 +1,10 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Version `PEER_DURATIONS` of the DDM Admin API. +//! +//! Tracks how long each DDM peer has been in its current state and exposes +//! that through the `/peers` endpoint with duration information. + +pub mod db; diff --git a/ddm/src/admin.rs b/ddm/src/admin.rs index 6d49a368..e53ded1a 100644 --- a/ddm/src/admin.rs +++ b/ddm/src/admin.rs @@ -7,7 +7,7 @@ use crate::sm::{AdminEvent, Event, PrefixSet, SmContext}; use ddm_api::DdmAdminApi; use ddm_api::ddm_admin_api_mod; use ddm_types::admin::{EnableStatsRequest, ExpirePathParams, PrefixMap}; -use ddm_types::db::{PeerInfo, TunnelRoute}; +use ddm_types::db::{PeerInfo, RouterKind, TunnelRoute}; use ddm_types::exchange::PathVector; use dropshot::ApiDescription; use dropshot::ApiDescriptionBuildErrors; @@ -20,12 +20,11 @@ use dropshot::HttpResponseUpdatedNoContent; use dropshot::Path; use dropshot::RequestContext; use dropshot::TypedBody; -use mg_common::lock; -use mg_common::net::TunnelOrigin; +use mg_common::{lock, net::TunnelOrigin}; use oxnet::Ipv6Net; use slog::{Logger, error, info}; use std::collections::{HashMap, HashSet}; -use std::net::{IpAddr, SocketAddr, SocketAddrV4, SocketAddrV6}; +use std::net::{IpAddr, Ipv6Addr, SocketAddr, SocketAddrV4, SocketAddrV6}; use std::sync::Arc; use std::sync::Mutex; use std::sync::atomic::{AtomicU64, Ordering}; @@ -113,7 +112,31 @@ impl DdmAdminApi for DdmAdminApiImpl { ctx: RequestContext, ) -> Result>, HttpError> { let ctx = lock!(ctx.context()); - Ok(HttpResponseOk(ctx.db.peers())) + let mut result = HashMap::new(); + for sm in &ctx.peers { + let if_index = *lock!(sm.iface.if_index); + if if_index == 0 { + continue; + } + let status = sm.iface.peer_status(); + let peer = lock!(sm.iface.peer_identity).clone(); + let addr = + lock!(sm.stats.peer_address).unwrap_or(Ipv6Addr::UNSPECIFIED); + let (host, kind) = match peer { + Some(p) => (p.hostname, p.kind), + None => (String::new(), RouterKind::Server), + }; + result.insert( + if_index, + PeerInfo { + status, + addr, + host, + kind, + }, + ); + } + Ok(HttpResponseOk(result)) } async fn expire_peer( diff --git a/ddm/src/db.rs b/ddm/src/db.rs index 13338cc4..424957e0 100644 --- a/ddm/src/db.rs +++ b/ddm/src/db.rs @@ -2,9 +2,8 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use ddm_types::db::{PeerInfo, TunnelRoute}; -use mg_common::lock; -use mg_common::net::TunnelOrigin; +use ddm_types::db::TunnelRoute; +use mg_common::{lock, net::TunnelOrigin}; use oxnet::{IpNet, Ipv6Net}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -45,7 +44,6 @@ pub struct Db { #[derive(Default, Clone)] pub struct DbData { - pub peers: HashMap, pub imported: HashSet, pub imported_tunnel: HashSet, } @@ -65,10 +63,6 @@ impl Db { lock!(self.data).clone() } - pub fn peers(&self) -> HashMap { - lock!(self.data).peers.clone() - } - pub fn imported(&self) -> HashSet { lock!(self.data).imported.clone() } @@ -221,15 +215,6 @@ impl Db { Ok(()) } - /// Set peer info at the given index. Returns true if peer information was - /// changed. - pub fn set_peer(&self, index: u32, info: PeerInfo) -> bool { - match lock!(self.data).peers.insert(index, info.clone()) { - Some(previous) => previous == info, - None => true, - } - } - pub fn remove_nexthop_routes( &self, nexthop: Ipv6Addr, @@ -259,10 +244,6 @@ impl Db { (removed, tnl_removed) } - pub fn remove_peer(&self, index: u32) { - lock!(self.data).peers.remove(&index); - } - pub fn routes_by_vector( &self, dst: Ipv6Net, diff --git a/ddm/src/discovery.rs b/ddm/src/discovery.rs index fc4a84e8..ca03f2e8 100644 --- a/ddm/src/discovery.rs +++ b/ddm/src/discovery.rs @@ -85,11 +85,12 @@ //! and 1 for a transit routers. The fourth byte is a hostname length followed //! directly by a hostname of up to 255 bytes in length. -use crate::db::Db; -use crate::sm::{Config, Event, NeighborEvent, SessionStats}; +use crate::sm::{ + Config, Event, InterfaceState, NeighborEvent, PeerIdentity, SessionStats, +}; use crate::util::u8_slice_assume_init_ref; use crate::{dbg, err, inf, trc, wrn}; -use ddm_types::db::{PeerInfo, PeerStatus, RouterKind}; +use ddm_types::db::RouterKind; use mg_common::lock; use serde::{Deserialize, Serialize}; use slog::Logger; @@ -173,7 +174,8 @@ struct HandlerContext { nbr: Arc>>, log: Logger, event: Sender, - db: Db, + iface: Arc, + stats: Arc, } struct Neighbor { @@ -187,7 +189,7 @@ pub(crate) fn handler( hostname: String, config: Config, event: Sender, - db: Db, + iface: Arc, stats: Arc, log: Logger, ) -> Result<(), DiscoveryError> { @@ -227,7 +229,8 @@ pub(crate) fn handler( hostname, event, config, - db, + iface, + stats: stats.clone(), }; let stop = Arc::new(AtomicBool::new(false)); @@ -519,17 +522,24 @@ fn handle_advertisement( } }; drop(guard); - let updated = ctx.db.set_peer( - ctx.config.if_index, - PeerInfo { - status: PeerStatus::Active, - addr: *sender, - host: hostname, - kind, - }, - ); + let updated = { + let mut info = lock!(ctx.iface.peer_identity); + let peer_addr = *lock!(ctx.stats.peer_address); + let changed = match &*info { + Some(existing) => { + existing.hostname != hostname + || existing.kind != kind + || peer_addr != Some(*sender) + } + None => true, + }; + if changed { + *info = Some(PeerIdentity { hostname, kind }); + } + changed + }; if updated { - lock!(stats.peer_address).replace(*sender); + lock!(ctx.stats.peer_address).replace(*sender); emit_nbr_update(ctx, sender, version); } } diff --git a/ddm/src/oxstats.rs b/ddm/src/oxstats.rs index dc82ea67..e134dfc6 100644 --- a/ddm/src/oxstats.rs +++ b/ddm/src/oxstats.rs @@ -4,7 +4,10 @@ use crate::{admin::RouterStats, sm::SmContext}; use chrono::{DateTime, Utc}; -use mg_common::nexus::{local_underlay_address, run_oximeter}; +use mg_common::{ + lock, + nexus::{local_underlay_address, run_oximeter}, +}; use omicron_common::api::internal::nexus::{ProducerEndpoint, ProducerKind}; use oximeter::{ MetricsError, Producer, Sample, @@ -154,7 +157,7 @@ impl Producer for Stats { self.hostname.clone().into(), self.rack_id, self.sled_id, - peer.config.if_name.clone().into(), + lock!(peer.iface.if_name).clone().into(), SolicitationsSent, peer.stats.solicitations_sent )); @@ -163,7 +166,7 @@ impl Producer for Stats { self.hostname.clone().into(), self.rack_id, self.sled_id, - peer.config.if_name.clone().into(), + lock!(peer.iface.if_name).clone().into(), SolicitationsReceived, peer.stats.solicitations_received )); @@ -172,7 +175,7 @@ impl Producer for Stats { self.hostname.clone().into(), self.rack_id, self.sled_id, - peer.config.if_name.clone().into(), + lock!(peer.iface.if_name).clone().into(), AdvertisementsSent, peer.stats.advertisements_sent )); @@ -181,7 +184,7 @@ impl Producer for Stats { self.hostname.clone().into(), self.rack_id, self.sled_id, - peer.config.if_name.clone().into(), + lock!(peer.iface.if_name).clone().into(), AdvertisementsReceived, peer.stats.advertisements_received )); @@ -190,7 +193,7 @@ impl Producer for Stats { self.hostname.clone().into(), self.rack_id, self.sled_id, - peer.config.if_name.clone().into(), + lock!(peer.iface.if_name).clone().into(), PeerExpirations, peer.stats.peer_expirations )); @@ -199,7 +202,7 @@ impl Producer for Stats { self.hostname.clone().into(), self.rack_id, self.sled_id, - peer.config.if_name.clone().into(), + lock!(peer.iface.if_name).clone().into(), PeerAddressChanges, peer.stats.peer_address_changes )); @@ -208,7 +211,7 @@ impl Producer for Stats { self.hostname.clone().into(), self.rack_id, self.sled_id, - peer.config.if_name.clone().into(), + lock!(peer.iface.if_name).clone().into(), PeerSessionsEstablished, peer.stats.peer_established )); @@ -217,7 +220,7 @@ impl Producer for Stats { self.hostname.clone().into(), self.rack_id, self.sled_id, - peer.config.if_name.clone().into(), + lock!(peer.iface.if_name).clone().into(), UpdatesSent, peer.stats.updates_sent )); @@ -226,7 +229,7 @@ impl Producer for Stats { self.hostname.clone().into(), self.rack_id, self.sled_id, - peer.config.if_name.clone().into(), + lock!(peer.iface.if_name).clone().into(), UpdatesReceived, peer.stats.updates_received )); @@ -235,7 +238,7 @@ impl Producer for Stats { self.hostname.clone().into(), self.rack_id, self.sled_id, - peer.config.if_name.clone().into(), + lock!(peer.iface.if_name).clone().into(), UpdateSendFail, peer.stats.update_send_fail )); @@ -243,7 +246,7 @@ impl Producer for Stats { self.hostname.clone().into(), self.rack_id, self.sled_id, - peer.config.if_name.clone().into(), + lock!(peer.iface.if_name).clone().into(), ImportedUnderlayPrefixes, peer.stats.imported_underlay_prefixes )); @@ -251,7 +254,7 @@ impl Producer for Stats { self.hostname.clone().into(), self.rack_id, self.sled_id, - peer.config.if_name.clone().into(), + lock!(peer.iface.if_name).clone().into(), ImportedTunnelEndpoints, peer.stats.imported_tunnel_endpoints )); diff --git a/ddm/src/sm.rs b/ddm/src/sm.rs index 24215795..cfba05ea 100644 --- a/ddm/src/sm.rs +++ b/ddm/src/sm.rs @@ -6,10 +6,10 @@ use crate::db::Db; use crate::discovery::Version; use crate::exchange::{TunnelUpdate, UnderlayUpdate, Update}; use crate::{dbg, discovery, err, exchange, inf, wrn}; -use ddm_types::db::RouterKind; +use ddm_types::db::{PeerStatus, RouterKind}; use ddm_types::exchange::PathVector; use libnet::get_ipaddr_info; -use mg_common::net::TunnelOrigin; +use mg_common::{lock, net::TunnelOrigin}; use oxnet::Ipv6Net; use slog::Logger; use std::collections::HashSet; @@ -19,7 +19,7 @@ use std::sync::mpsc::{Receiver, Sender}; use std::sync::{Arc, Mutex}; use std::thread::sleep; use std::thread::spawn; -use std::time::Duration; +use std::time::{Duration, Instant}; use thiserror::Error; #[derive(Debug)] @@ -154,6 +154,56 @@ pub struct DpdConfig { pub port: u16, } +#[derive(Clone, Debug)] +pub enum FsmState { + Init, + Solicit, + Exchange, +} + +impl FsmState { + pub fn to_peer_status(&self, elapsed: Duration) -> PeerStatus { + match self { + FsmState::Init => PeerStatus::Init(elapsed), + FsmState::Solicit => PeerStatus::Solicit(elapsed), + FsmState::Exchange => PeerStatus::Exchange(elapsed), + } + } +} + +#[derive(Clone, Debug)] +pub struct PeerIdentity { + pub hostname: String, + pub kind: RouterKind, +} + +pub struct InterfaceState { + pub if_index: Mutex, + pub if_name: Mutex, + pub fsm_state: Mutex, + pub last_fsm_state_change: Mutex, + pub peer_identity: Mutex>, +} + +impl InterfaceState { + pub fn peer_status(&self) -> PeerStatus { + let elapsed = lock!(self.last_fsm_state_change).elapsed(); + lock!(self.fsm_state).to_peer_status(elapsed) + } +} + +impl Default for InterfaceState { + fn default() -> Self { + Self { + if_index: Mutex::new(0), + if_name: Mutex::new(String::new()), + fsm_state: Mutex::new(FsmState::Init), + last_fsm_state_change: Mutex::new(Instant::now()), + peer_identity: Mutex::new(None), + } + } +} + #[derive(Default)] pub struct SessionStats { // Discovery @@ -182,6 +232,7 @@ pub struct SmContext { pub event_channels: Vec>, pub rt: Arc, pub hostname: String, + pub iface: Arc, pub stats: Arc, pub log: Logger, } @@ -231,6 +282,9 @@ impl State for Init { &mut self, event: Receiver, ) -> (Box, Receiver) { + *lock!(self.ctx.iface.fsm_state) = FsmState::Init; + *lock!(self.ctx.iface.last_fsm_state_change) = Instant::now(); + *lock!(self.ctx.iface.peer_identity) = None; loop { let info = match get_ipaddr_info(&self.ctx.config.aobj_name) { Ok(info) => info, @@ -262,6 +316,8 @@ impl State for Init { self.ctx.config.if_name.clone_from(&info.ifname); self.ctx.config.if_index = info.index as u32; self.ctx.config.addr = addr; + *lock!(self.ctx.iface.if_index) = info.index as u32; + *lock!(self.ctx.iface.if_name) = info.ifname.clone(); inf!( self.log, self.ctx.config.if_name, @@ -277,7 +333,7 @@ impl State for Init { self.ctx.hostname.clone(), self.ctx.config.clone(), self.ctx.tx.clone(), - self.ctx.db.clone(), + self.ctx.iface.clone(), self.ctx.stats.clone(), self.ctx.log.clone(), ) @@ -306,6 +362,8 @@ impl State for Solicit { &mut self, event: Receiver, ) -> (Box, Receiver) { + *lock!(self.ctx.iface.fsm_state) = FsmState::Solicit; + *lock!(self.ctx.iface.last_fsm_state_change) = Instant::now(); loop { let e = match event.recv() { Ok(e) => e, @@ -454,7 +512,8 @@ impl Exchange { pull_stop: &AtomicBool, ) { exchange_thread.abort(); - self.ctx.db.remove_peer(self.ctx.config.if_index); + *lock!(self.ctx.iface.peer_identity) = None; + *lock!(self.ctx.stats.peer_address) = None; let (to_remove, to_remove_tnl) = self.ctx.db.remove_nexthop_routes(self.peer); let mut routes: Vec = Vec::new(); @@ -532,6 +591,8 @@ impl State for Exchange { &mut self, event: Receiver, ) -> (Box, Receiver) { + *lock!(self.ctx.iface.fsm_state) = FsmState::Exchange; + *lock!(self.ctx.iface.last_fsm_state_change) = Instant::now(); let exchange_thread = loop { match exchange::handler( self.ctx.clone(), diff --git a/ddmadm/src/main.rs b/ddmadm/src/main.rs index 800315d8..2d6ea6e2 100644 --- a/ddmadm/src/main.rs +++ b/ddmadm/src/main.rs @@ -117,9 +117,15 @@ async fn run() -> Result<()> { "Status".dimmed(), )?; for (index, info) in &msg.into_inner() { + let (state, duration) = match &info.status { + types::PeerStatus::Init(d) => ("Init", d), + types::PeerStatus::Solicit(d) => ("Solicit", d), + types::PeerStatus::Exchange(d) => ("Exchange", d), + types::PeerStatus::Expired(d) => ("Expired", d), + }; writeln!( &mut tw, - "{}\t{}\t{}\t{}\t{:?}", + "{}\t{}\t{}\t{}\t{} {}", index, info.host, info.addr, @@ -128,7 +134,8 @@ async fn run() -> Result<()> { 1 => "Transit", _ => "?", }, - info.status, + state, + format_duration(duration), )?; } tw.flush()?; @@ -250,6 +257,19 @@ async fn run() -> Result<()> { Ok(()) } +fn format_duration(d: &std::time::Duration) -> String { + let secs = d.as_secs(); + let mins = secs / 60; + let hours = mins / 60; + if hours > 0 { + format!("{}h {}m {}s", hours, mins % 60, secs % 60) + } else if mins > 0 { + format!("{}m {}s", mins, secs % 60) + } else { + format!("{}s", secs) + } +} + fn init_logger() -> Logger { let decorator = slog_term::TermDecorator::new().build(); let drain = slog_term::FullFormat::new(decorator).build().fuse(); diff --git a/ddmd/src/main.rs b/ddmd/src/main.rs index d5db3f3c..a7bcefef 100644 --- a/ddmd/src/main.rs +++ b/ddmd/src/main.rs @@ -5,7 +5,7 @@ use clap::Parser; use ddm::admin::{HandlerContext, RouterStats}; use ddm::db::Db; -use ddm::sm::{DpdConfig, SmContext, StateMachine}; +use ddm::sm::{DpdConfig, InterfaceState, SmContext, StateMachine}; use ddm::sys::Route; use ddm_types::db::RouterKind; use signal::handle_signals; @@ -164,6 +164,7 @@ async fn run() { log: log.clone(), hostname: hostname.clone(), rt: rt.clone(), + iface: Arc::new(InterfaceState::default()), stats: Arc::new(ddm::sm::SessionStats::default()), }; let sm = StateMachine { ctx, rx: Some(rx) }; diff --git a/openapi/ddm-admin/ddm-admin-1.0.0-b6eac7.json.gitstub b/openapi/ddm-admin/ddm-admin-1.0.0-b6eac7.json.gitstub new file mode 100644 index 00000000..0d935c8b --- /dev/null +++ b/openapi/ddm-admin/ddm-admin-1.0.0-b6eac7.json.gitstub @@ -0,0 +1 @@ +76204d2907209bd8b963fb2da976ea688282d990:openapi/ddm-admin/ddm-admin-1.0.0-b6eac7.json diff --git a/openapi/ddm-admin/ddm-admin-1.0.0-b6eac7.json b/openapi/ddm-admin/ddm-admin-2.0.0-45d40c.json similarity index 85% rename from openapi/ddm-admin/ddm-admin-1.0.0-b6eac7.json rename to openapi/ddm-admin/ddm-admin-2.0.0-45d40c.json index fe80efd3..e891ca0e 100644 --- a/openapi/ddm-admin/ddm-admin-1.0.0-b6eac7.json +++ b/openapi/ddm-admin/ddm-admin-2.0.0-45d40c.json @@ -6,7 +6,7 @@ "url": "https://oxide.computer", "email": "api@oxide.computer" }, - "version": "1.0.0" + "version": "2.0.0" }, "paths": { "/disable-stats": { @@ -359,6 +359,25 @@ }, "components": { "schemas": { + "Duration": { + "type": "object", + "properties": { + "nanos": { + "type": "integer", + "format": "uint32", + "minimum": 0 + }, + "secs": { + "type": "integer", + "format": "uint64", + "minimum": 0 + } + }, + "required": [ + "nanos", + "secs" + ] + }, "EnableStatsRequest": { "type": "object", "properties": { @@ -463,6 +482,7 @@ ] }, "PeerInfo": { + "description": "Information about a DDM peer.", "type": "object", "properties": { "addr": { @@ -487,11 +507,80 @@ ] }, "PeerStatus": { - "type": "string", - "enum": [ - "NoContact", - "Active", - "Expired" + "description": "Status of a DDM peer, including how long the peer has been in its current state.", + "oneOf": [ + { + "type": "object", + "properties": { + "duration": { + "$ref": "#/components/schemas/Duration" + }, + "type": { + "type": "string", + "enum": [ + "Init" + ] + } + }, + "required": [ + "duration", + "type" + ] + }, + { + "type": "object", + "properties": { + "duration": { + "$ref": "#/components/schemas/Duration" + }, + "type": { + "type": "string", + "enum": [ + "Solicit" + ] + } + }, + "required": [ + "duration", + "type" + ] + }, + { + "type": "object", + "properties": { + "duration": { + "$ref": "#/components/schemas/Duration" + }, + "type": { + "type": "string", + "enum": [ + "Exchange" + ] + } + }, + "required": [ + "duration", + "type" + ] + }, + { + "type": "object", + "properties": { + "duration": { + "$ref": "#/components/schemas/Duration" + }, + "type": { + "type": "string", + "enum": [ + "Expired" + ] + } + }, + "required": [ + "duration", + "type" + ] + } ] }, "RouterKind": { diff --git a/openapi/ddm-admin/ddm-admin-latest.json b/openapi/ddm-admin/ddm-admin-latest.json index 45446659..4eb6e8db 120000 --- a/openapi/ddm-admin/ddm-admin-latest.json +++ b/openapi/ddm-admin/ddm-admin-latest.json @@ -1 +1 @@ -ddm-admin-1.0.0-b6eac7.json \ No newline at end of file +ddm-admin-2.0.0-45d40c.json \ No newline at end of file From 1711a61e369f7662e36794c9f5191784ec250ecb Mon Sep 17 00:00:00 2001 From: Trey Aspelund Date: Sat, 28 Mar 2026 15:23:52 -0600 Subject: [PATCH 2/4] ddmadm: sort output of get-peers --- ddmadm/src/main.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ddmadm/src/main.rs b/ddmadm/src/main.rs index 2d6ea6e2..d9cc66a2 100644 --- a/ddmadm/src/main.rs +++ b/ddmadm/src/main.rs @@ -116,7 +116,9 @@ async fn run() -> Result<()> { "Kind".dimmed(), "Status".dimmed(), )?; - for (index, info) in &msg.into_inner() { + let mut peers: Vec<_> = msg.into_inner().into_iter().collect(); + peers.sort_by_key(|(index, _)| index.clone()); + for (index, info) in &peers { let (state, duration) = match &info.status { types::PeerStatus::Init(d) => ("Init", d), types::PeerStatus::Solicit(d) => ("Solicit", d), From d4c5a31b26c6399ad69d155d027e56bd41060107 Mon Sep 17 00:00:00 2001 From: Trey Aspelund Date: Mon, 30 Mar 2026 09:02:52 -0600 Subject: [PATCH 3/4] ddm: remove dupe peer_address, add fsm helpers After adding PeerIdentity, the peer_address field of SessionStats is now redundant (and also isn't a stat) so this removes it. The peer address is tracked in PeerIdentity, so we can just rely on that field behind a single mutex instead of locking/juggling both stats and identity. Also adds a couple helper methods for transitioning FSM state and updating the peer state. --- ddm/src/admin.rs | 21 +++++++-------------- ddm/src/discovery.rs | 27 ++++++++------------------- ddm/src/sm.rs | 27 ++++++++++++++++----------- 3 files changed, 31 insertions(+), 44 deletions(-) diff --git a/ddm/src/admin.rs b/ddm/src/admin.rs index e53ded1a..8392eeeb 100644 --- a/ddm/src/admin.rs +++ b/ddm/src/admin.rs @@ -7,7 +7,7 @@ use crate::sm::{AdminEvent, Event, PrefixSet, SmContext}; use ddm_api::DdmAdminApi; use ddm_api::ddm_admin_api_mod; use ddm_types::admin::{EnableStatsRequest, ExpirePathParams, PrefixMap}; -use ddm_types::db::{PeerInfo, RouterKind, TunnelRoute}; +use ddm_types::db::{PeerInfo, TunnelRoute}; use ddm_types::exchange::PathVector; use dropshot::ApiDescription; use dropshot::ApiDescriptionBuildErrors; @@ -24,7 +24,7 @@ use mg_common::{lock, net::TunnelOrigin}; use oxnet::Ipv6Net; use slog::{Logger, error, info}; use std::collections::{HashMap, HashSet}; -use std::net::{IpAddr, Ipv6Addr, SocketAddr, SocketAddrV4, SocketAddrV6}; +use std::net::{IpAddr, SocketAddr, SocketAddrV4, SocketAddrV6}; use std::sync::Arc; use std::sync::Mutex; use std::sync::atomic::{AtomicU64, Ordering}; @@ -115,24 +115,17 @@ impl DdmAdminApi for DdmAdminApiImpl { let mut result = HashMap::new(); for sm in &ctx.peers { let if_index = *lock!(sm.iface.if_index); - if if_index == 0 { + let Some(peer) = lock!(sm.iface.peer_identity).clone() else { continue; - } - let status = sm.iface.peer_status(); - let peer = lock!(sm.iface.peer_identity).clone(); - let addr = - lock!(sm.stats.peer_address).unwrap_or(Ipv6Addr::UNSPECIFIED); - let (host, kind) = match peer { - Some(p) => (p.hostname, p.kind), - None => (String::new(), RouterKind::Server), }; + let status = sm.iface.peer_status(); result.insert( if_index, PeerInfo { status, - addr, - host, - kind, + addr: peer.addr, + host: peer.hostname, + kind: peer.kind, }, ); } diff --git a/ddm/src/discovery.rs b/ddm/src/discovery.rs index ca03f2e8..155fb825 100644 --- a/ddm/src/discovery.rs +++ b/ddm/src/discovery.rs @@ -175,7 +175,6 @@ struct HandlerContext { log: Logger, event: Sender, iface: Arc, - stats: Arc, } struct Neighbor { @@ -230,7 +229,6 @@ pub(crate) fn handler( event, config, iface, - stats: stats.clone(), }; let stop = Arc::new(AtomicBool::new(false)); @@ -522,24 +520,15 @@ fn handle_advertisement( } }; drop(guard); - let updated = { - let mut info = lock!(ctx.iface.peer_identity); - let peer_addr = *lock!(ctx.stats.peer_address); - let changed = match &*info { - Some(existing) => { - existing.hostname != hostname - || existing.kind != kind - || peer_addr != Some(*sender) - } - None => true, - }; - if changed { - *info = Some(PeerIdentity { hostname, kind }); - } - changed + let new_peer = PeerIdentity { + addr: *sender, + hostname, + kind, }; - if updated { - lock!(ctx.stats.peer_address).replace(*sender); + let mut info = lock!(ctx.iface.peer_identity); + if info.as_ref() != Some(&new_peer) { + *info = Some(new_peer); + drop(info); emit_nbr_update(ctx, sender, version); } } diff --git a/ddm/src/sm.rs b/ddm/src/sm.rs index cfba05ea..a1f9ffe3 100644 --- a/ddm/src/sm.rs +++ b/ddm/src/sm.rs @@ -171,8 +171,9 @@ impl FsmState { } } -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq)] pub struct PeerIdentity { + pub addr: Ipv6Addr, pub hostname: String, pub kind: RouterKind, } @@ -186,6 +187,15 @@ pub struct InterfaceState { } impl InterfaceState { + pub fn transition(&self, state: FsmState) { + *lock!(self.fsm_state) = state; + *lock!(self.last_fsm_state_change) = Instant::now(); + } + + pub fn clear_peer(&self) { + *lock!(self.peer_identity) = None; + } + pub fn peer_status(&self) -> PeerStatus { let elapsed = lock!(self.last_fsm_state_change).elapsed(); lock!(self.fsm_state).to_peer_status(elapsed) @@ -214,7 +224,6 @@ pub struct SessionStats { pub peer_expirations: AtomicU64, pub peer_address_changes: AtomicU64, pub peer_established: AtomicU64, - pub peer_address: Mutex>, // Exchange pub updates_sent: AtomicU64, @@ -282,9 +291,8 @@ impl State for Init { &mut self, event: Receiver, ) -> (Box, Receiver) { - *lock!(self.ctx.iface.fsm_state) = FsmState::Init; - *lock!(self.ctx.iface.last_fsm_state_change) = Instant::now(); - *lock!(self.ctx.iface.peer_identity) = None; + self.ctx.iface.transition(FsmState::Init); + self.ctx.iface.clear_peer(); loop { let info = match get_ipaddr_info(&self.ctx.config.aobj_name) { Ok(info) => info, @@ -362,8 +370,7 @@ impl State for Solicit { &mut self, event: Receiver, ) -> (Box, Receiver) { - *lock!(self.ctx.iface.fsm_state) = FsmState::Solicit; - *lock!(self.ctx.iface.last_fsm_state_change) = Instant::now(); + self.ctx.iface.transition(FsmState::Solicit); loop { let e = match event.recv() { Ok(e) => e, @@ -512,8 +519,7 @@ impl Exchange { pull_stop: &AtomicBool, ) { exchange_thread.abort(); - *lock!(self.ctx.iface.peer_identity) = None; - *lock!(self.ctx.stats.peer_address) = None; + self.ctx.iface.clear_peer(); let (to_remove, to_remove_tnl) = self.ctx.db.remove_nexthop_routes(self.peer); let mut routes: Vec = Vec::new(); @@ -591,8 +597,7 @@ impl State for Exchange { &mut self, event: Receiver, ) -> (Box, Receiver) { - *lock!(self.ctx.iface.fsm_state) = FsmState::Exchange; - *lock!(self.ctx.iface.last_fsm_state_change) = Instant::now(); + self.ctx.iface.transition(FsmState::Exchange); let exchange_thread = loop { match exchange::handler( self.ctx.clone(), From 67c874547bad93def96bbdabf6db54053156a3e7 Mon Sep 17 00:00:00 2001 From: Trey Aspelund Date: Mon, 30 Mar 2026 09:19:53 -0600 Subject: [PATCH 4/4] ddmadm: show fsm state duration in separate column --- ddmadm/src/main.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ddmadm/src/main.rs b/ddmadm/src/main.rs index d9cc66a2..cc9a5934 100644 --- a/ddmadm/src/main.rs +++ b/ddmadm/src/main.rs @@ -109,12 +109,13 @@ async fn run() -> Result<()> { let mut tw = TabWriter::new(stdout()); writeln!( &mut tw, - "{}\t{}\t{}\t{}\t{}", + "{}\t{}\t{}\t{}\t{}\t{}", "Interface".dimmed(), "Host".dimmed(), "Address".dimmed(), "Kind".dimmed(), "Status".dimmed(), + "Duration".dimmed(), )?; let mut peers: Vec<_> = msg.into_inner().into_iter().collect(); peers.sort_by_key(|(index, _)| index.clone()); @@ -127,7 +128,7 @@ async fn run() -> Result<()> { }; writeln!( &mut tw, - "{}\t{}\t{}\t{}\t{} {}", + "{}\t{}\t{}\t{}\t{}\t{}", index, info.host, info.addr,