diff --git a/raphtory-benchmark/benches/algobench.rs b/raphtory-benchmark/benches/algobench.rs index 3465253c76..4971d945dc 100644 --- a/raphtory-benchmark/benches/algobench.rs +++ b/raphtory-benchmark/benches/algobench.rs @@ -11,8 +11,12 @@ use raphtory::{ global_temporal_three_node_motifs::global_temporal_three_node_motif, local_triangle_count::local_triangle_count, }, + covering::{ + fast_distributed_dominating_set::fast_distributed_dominating_set, + dominating_set::lazy_greedy_dominating_set + } }, - graphgen::random_attachment::random_attachment, + graphgen::{preferential_attachment::ba_preferential_attachment, random_attachment::random_attachment}, prelude::*, }; use raphtory_benchmark::common::bench; @@ -130,6 +134,45 @@ pub fn temporal_motifs(c: &mut Criterion) { group.finish(); } +pub fn dominating_set(c: &mut Criterion) { + let mut group = c.benchmark_group("dominating_set_scaling"); + group.sample_size(100); + group.sampling_mode(SamplingMode::Flat); + + let sizes = [1_000, 10_000, 100_000, 300_000]; + let seed: [u8; 32] = [1; 32]; + + for &size in &sizes { + let g = Graph::new(); + ba_preferential_attachment(&g, size, 2, Some(seed)); + + + group.bench_with_input( + BenchmarkId::new("fast_distributed", size), + &g, + |b, graph| { + b.iter(|| { + let result = fast_distributed_dominating_set(graph); + black_box(result); + }) + } + ); + + group.bench_with_input( + BenchmarkId::new("lazy_greedy", size), + &g, + |b, graph| { + b.iter(|| { + let result = lazy_greedy_dominating_set(graph); + black_box(result); + }) + } + ); + } + + group.finish(); +} + criterion_group!( benches, local_triangle_count_analysis, @@ -138,5 +181,6 @@ criterion_group!( graphgen_large_pagerank, graphgen_large_concomp, temporal_motifs, + dominating_set ); criterion_main!(benches); diff --git a/raphtory/src/algorithms/covering/dominating_set.rs b/raphtory/src/algorithms/covering/dominating_set.rs new file mode 100644 index 0000000000..78a904fa57 --- /dev/null +++ b/raphtory/src/algorithms/covering/dominating_set.rs @@ -0,0 +1,338 @@ +use crate::{db::api::view::StaticGraphViewOps}; +use crate::db::api::view::node::NodeViewOps; +use raphtory_api::core::{ + entities::{ + VID, + }, +}; +use std::{ + collections::HashSet, +}; +use crate::db::api::view::graph::GraphViewOps; + +#[derive(Default, Clone)] +struct LinkedListNode { + next: Option, + prev: Option, + uncovered_count: usize, + vid: VID +} + +struct DominatingSetQueue { + linked_nodes: Vec, + uncovered_count_map: Vec>, + max_uncovered_count: usize, +} + +impl DominatingSetQueue { + pub fn from_graph(g: &G)-> Self { + let n_nodes = g.count_nodes(); + let mut linked_nodes = vec![LinkedListNode::default(); n_nodes]; + let mut uncovered_count_map = vec![None; n_nodes + 1]; + for node in g.nodes() { + let vid = node.node; + let index = vid.index(); + let uncovered_count = node.degree() + 1; + let current_linked_node = &mut linked_nodes[index]; + current_linked_node.uncovered_count = uncovered_count; + current_linked_node.vid = vid; + if let Some(existing_index) = uncovered_count_map[uncovered_count] { + current_linked_node.next = Some(existing_index); + linked_nodes[existing_index].prev = Some(index); + } + uncovered_count_map[uncovered_count] = Some(index); + } + Self { + linked_nodes, + uncovered_count_map, + max_uncovered_count: n_nodes, + } + } + + pub fn maximum(&mut self) -> Option { + while self.max_uncovered_count > 0 { + if let Some(index) = self.uncovered_count_map[self.max_uncovered_count] { + if let Some(next_index) = self.linked_nodes[index].next { + let next_linked_node = &mut self.linked_nodes[next_index]; + next_linked_node.prev = None; + self.uncovered_count_map[self.max_uncovered_count] = Some(next_index); + } else { + self.uncovered_count_map[self.max_uncovered_count] = None; + } + self.linked_nodes[index].next = None; + return Some(index); + } + self.max_uncovered_count -= 1; + } + None + } + // uncovered count should be less than the max uncovered count + pub fn insert(&mut self, index: usize, uncovered_count: usize) { + self.linked_nodes[index].uncovered_count = uncovered_count; + if let Some(existing_index) = self.uncovered_count_map[uncovered_count] { + self.linked_nodes[index].next = Some(existing_index); + self.linked_nodes[existing_index].prev = Some(index); + } + self.uncovered_count_map[uncovered_count] = Some(index); + } + + pub fn node_details(&self, index: usize) -> (VID, usize) { + let linked_node = &self.linked_nodes[index]; + (linked_node.vid, linked_node.uncovered_count) + } +} + + +pub fn lazy_greedy_dominating_set(g: &G) -> HashSet { + let n_nodes = g.count_nodes(); + let mut dominating_set: HashSet = HashSet::new(); + let mut covered_count = 0; + let mut covered_nodes: Vec = vec![false; n_nodes]; + let mut queue = DominatingSetQueue::from_graph(g); + while covered_count < n_nodes { + let index = queue.maximum().unwrap(); + let (vid, stale_uncovered_count) = queue.node_details(index); + let node = g.node(vid).unwrap(); + let mut actual_uncovered_count = 0; + if !covered_nodes[vid.index()] { + actual_uncovered_count += 1; + } + for neighbor in node.neighbours() { + if !covered_nodes[neighbor.node.index()] { + actual_uncovered_count += 1; + } + } + if actual_uncovered_count == stale_uncovered_count { + dominating_set.insert(vid); + if !covered_nodes[vid.index()] { + covered_nodes[vid.index()] = true; + covered_count += 1; + } + for neighbor in node.neighbours() { + if !covered_nodes[neighbor.node.index()] { + covered_nodes[neighbor.node.index()] = true; + covered_count += 1; + } + } + } else { + if actual_uncovered_count > 0 { + queue.insert(index, actual_uncovered_count); + } + } + } + dominating_set +} + +pub fn is_dominating_set(g: &G, dominating_set: &HashSet) -> bool { + let n_nodes = g.count_nodes(); + let mut covered_nodes: Vec = vec![false; n_nodes]; + let mut covered_count = 0; + for &vid in dominating_set { + if !covered_nodes[vid.index()] { + covered_nodes[vid.index()] = true; + covered_count += 1; + } + let node = g.node(vid).unwrap(); + for neighbor in node.neighbours() { + if !covered_nodes[neighbor.node.index()] { + covered_nodes[neighbor.node.index()] = true; + covered_count += 1; + } + } + } + covered_count == n_nodes +} + + + +#[cfg(test)] +mod tests { + use super::*; + use crate::prelude::*; + + #[test] + fn test_empty_graph() { + let graph = Graph::new(); + let ds = lazy_greedy_dominating_set(&graph); + + assert!(ds.is_empty(), "Empty graph should have an empty dominating set"); + } + + #[test] + fn test_single_node_graph() { + let graph = Graph::new(); + graph.add_node(0, 1, NO_PROPS, None).unwrap(); + + let ds = lazy_greedy_dominating_set(&graph); + + assert!(is_dominating_set(&graph, &ds), "Result should be a valid dominating set"); + assert_eq!(ds.len(), 1, "Single node should dominate itself"); + assert!(ds.contains(&VID(0)), "The single node should be in the dominating set"); + } + + #[test] + fn test_two_connected_nodes() { + let graph = Graph::new(); + graph.add_edge(0, 1, 2, NO_PROPS, None).unwrap(); + + let ds = lazy_greedy_dominating_set(&graph); + + assert!(is_dominating_set(&graph, &ds), "Result should be a valid dominating set"); + assert_eq!(ds.len(), 1, "One node should be sufficient to dominate both nodes in an edge"); + } + + #[test] + fn test_star_graph() { + let graph = Graph::new(); + // Star with center 0 and leaves 1-5 + for i in 1..=5 { + graph.add_edge(0, 0, i, NO_PROPS, None).unwrap(); + } + + let ds = lazy_greedy_dominating_set(&graph); + + assert!(is_dominating_set(&graph, &ds), "Result should be a valid dominating set"); + assert_eq!(ds.len(), 1, "Star graph center should be the only node in the dominating set"); + assert!(ds.contains(&VID(0)), "Center node should be in the dominating set"); + } + + #[test] + fn test_path_graph() { + let graph = Graph::new(); + // Path: 1-2-3-4-5 + for i in 1..5 { + graph.add_edge(0, i, i + 1, NO_PROPS, None).unwrap(); + } + + let ds = lazy_greedy_dominating_set(&graph); + + assert!(is_dominating_set(&graph, &ds), "Result should be a valid dominating set"); + // For a path of 5 nodes, we need at most 2 nodes + assert!(ds.len() <= 2, "Path of 5 nodes should need at most 2 nodes in dominating set"); + } + + #[test] + fn test_triangle_graph() { + let graph = Graph::new(); + // Triangle: 1-2-3-1 + graph.add_edge(0, 1, 2, NO_PROPS, None).unwrap(); + graph.add_edge(0, 2, 3, NO_PROPS, None).unwrap(); + graph.add_edge(0, 3, 1, NO_PROPS, None).unwrap(); + + let ds = lazy_greedy_dominating_set(&graph); + + assert!(is_dominating_set(&graph, &ds), "Result should be a valid dominating set"); + assert_eq!(ds.len(), 1, "Triangle should need only 1 node in dominating set"); + } + + #[test] + fn test_complete_graph() { + let graph = Graph::new(); + // Complete graph K4 + for i in 1..=4 { + for j in (i+1)..=4 { + graph.add_edge(0, i, j, NO_PROPS, None).unwrap(); + } + } + + let ds = lazy_greedy_dominating_set(&graph); + + assert!(is_dominating_set(&graph, &ds), "Result should be a valid dominating set"); + assert_eq!(ds.len(), 1, "Complete graph should need only 1 node in dominating set"); + } + + #[test] + fn test_disconnected_graph() { + let graph = Graph::new(); + // Two separate edges: 1-2 and 3-4 + graph.add_edge(0, 1, 2, NO_PROPS, None).unwrap(); + graph.add_edge(0, 3, 4, NO_PROPS, None).unwrap(); + + let ds = lazy_greedy_dominating_set(&graph); + + assert!(is_dominating_set(&graph, &ds), "Result should be a valid dominating set"); + assert_eq!(ds.len(), 2, "Two disconnected components need at least 2 nodes"); + } + + #[test] + fn test_cycle_graph() { + let graph = Graph::new(); + // Cycle: 1-2-3-4-5-1 + for i in 1..=5 { + let next = if i == 5 { 1 } else { i + 1 }; + graph.add_edge(0, i, next, NO_PROPS, None).unwrap(); + } + + let ds = lazy_greedy_dominating_set(&graph); + + assert!(is_dominating_set(&graph, &ds), "Result should be a valid dominating set"); + // For a cycle of 5 nodes, we need at most 2 nodes + assert!(ds.len() <= 2, "Cycle of 5 nodes should need at most 2 nodes in dominating set"); + } + + #[test] + fn test_bipartite_graph() { + let graph = Graph::new(); + // Complete bipartite graph K_{2,3} + for i in 1..=2 { + for j in 3..=5 { + graph.add_edge(0, i, j, NO_PROPS, None).unwrap(); + } + } + + let ds = lazy_greedy_dominating_set(&graph); + + assert!(is_dominating_set(&graph, &ds), "Result should be a valid dominating set"); + // Can be dominated by either all nodes from one partition (2 or 3 nodes) + assert!(ds.len() <= 3); + } + + #[test] + fn test_isolated_nodes() { + let graph = Graph::new(); + // Add isolated nodes without edges + graph.add_node(0, 1, NO_PROPS, None).unwrap(); + graph.add_node(0, 2, NO_PROPS, None).unwrap(); + graph.add_node(0, 3, NO_PROPS, None).unwrap(); + + let ds = lazy_greedy_dominating_set(&graph); + + assert!(is_dominating_set(&graph, &ds), "Result should be a valid dominating set"); + assert_eq!(ds.len(), 3, "All isolated nodes must be in the dominating set"); + } + + #[test] + fn test_mixed_graph() { + let graph = Graph::new(); + // Mix of connected and isolated nodes + graph.add_edge(0, 1, 2, NO_PROPS, None).unwrap(); + graph.add_edge(0, 2, 3, NO_PROPS, None).unwrap(); + graph.add_node(0, 10, NO_PROPS, None).unwrap(); // Isolated node + + let ds = lazy_greedy_dominating_set(&graph); + + assert!(is_dominating_set(&graph, &ds), "Result should be a valid dominating set"); + assert!(ds.contains(&VID(3)), "Isolated node must be in the dominating set"); + // Should have at least 2 nodes: one for the connected part, one for the isolated node + assert!(ds.len() >= 2, "Should have at least 2 nodes in dominating set"); + } + + #[test] + fn test_larger_graph() { + let graph = Graph::new(); + // Create a more complex graph structure + // Central hub connected to multiple smaller clusters + for i in 1..=3 { + graph.add_edge(0, 0, i, NO_PROPS, None).unwrap(); + for j in 1..=2 { + let node_id = i * 10 + j; + graph.add_edge(0, i, node_id, NO_PROPS, None).unwrap(); + } + } + + let ds = lazy_greedy_dominating_set(&graph); + + assert!(is_dominating_set(&graph, &ds), "Result should be a valid dominating set"); + println!("Larger graph dominating set size: {}", ds.len()); + } +} diff --git a/raphtory/src/algorithms/covering/fast_distributed_dominating_set.rs b/raphtory/src/algorithms/covering/fast_distributed_dominating_set.rs new file mode 100644 index 0000000000..d56886d602 --- /dev/null +++ b/raphtory/src/algorithms/covering/fast_distributed_dominating_set.rs @@ -0,0 +1,397 @@ +use crate::db::api::view::StaticGraphViewOps; +use crate::db::api::view::node::NodeViewOps; +use raphtory_api::core::{ + entities::{ + VID, + }, +}; +use std::{ + collections::HashSet, +}; +use crate::db::api::view::graph::GraphViewOps; +use rayon::prelude::*; + +#[derive(Default, Clone)] +struct NodeCoveringState { + vid: VID, + is_covered: bool, + is_active: bool, + is_candidate: bool, + has_no_coverage: bool, + support: usize, + candidates: usize, + weight_rounded: usize, + weight: usize, + add_to_dominating_set: bool +} + + +pub fn fast_distributed_dominating_set(g: &G) -> HashSet { + let mut dominating_set = HashSet::new(); + let mut covered_count = 0; + let n_nodes = g.count_nodes(); + let mut adj_list: Vec> = vec![vec![]; n_nodes]; + let mut current_node_configs = vec![NodeCoveringState::default(); n_nodes]; + let mut next_node_configs = vec![NodeCoveringState::default(); n_nodes]; + for node in g.nodes() { + let vid = node.node; + current_node_configs[vid.index()].vid = vid; + next_node_configs[vid.index()].vid = vid; + adj_list[vid.index()] = node.neighbours().iter().map(|n| n.node.index()).collect(); + } + while covered_count < n_nodes { + next_node_configs.par_iter_mut().for_each(|next_node_config| { + let node_index = next_node_config.vid.index(); + let current_node_config = ¤t_node_configs[node_index]; + next_node_config.is_covered = current_node_config.is_covered; + if current_node_config.has_no_coverage { + return; + } + let mut node_weight = 0 as u64; + if !current_node_config.is_covered { + node_weight += 1; + } + for neighbor_index in &adj_list[node_index] { + let neighbor_config = ¤t_node_configs[*neighbor_index]; + if !neighbor_config.is_covered { + node_weight += 1; + } + } + if node_weight == 0 { + next_node_config.has_no_coverage = true; + next_node_config.weight = 0; + next_node_config.weight_rounded = 0; + } else { + let node_weight_rounded = (2 as u64).pow(node_weight.ilog2()) as usize; + next_node_config.weight = node_weight as usize; + next_node_config.weight_rounded = node_weight_rounded; + } + }); + std::mem::swap(&mut current_node_configs, &mut next_node_configs); + next_node_configs.par_iter_mut().for_each(|next_node_config| { + let node_index = next_node_config.vid.index(); + let current_node_config = ¤t_node_configs[node_index]; + next_node_config.has_no_coverage = current_node_config.has_no_coverage; + next_node_config.weight = current_node_config.weight; + next_node_config.weight_rounded = current_node_config.weight_rounded; + if current_node_config.has_no_coverage { + next_node_config.is_active = false; + return; + } + let mut max_weight_rounded = current_node_config.weight_rounded; + for neighbor_index in &adj_list[node_index] { + let neighbor_config = ¤t_node_configs[*neighbor_index]; + if neighbor_config.weight_rounded > max_weight_rounded { + max_weight_rounded = neighbor_config.weight_rounded; + } + for second_neighbor_index in &adj_list[*neighbor_index] { + let second_neighbor_config = ¤t_node_configs[*second_neighbor_index]; + if second_neighbor_config.weight_rounded > max_weight_rounded { + max_weight_rounded = second_neighbor_config.weight_rounded; + } + } + } + if current_node_config.weight_rounded == max_weight_rounded { + next_node_config.is_active = true; + } else { + next_node_config.is_active = false; + } + }); + std::mem::swap(&mut current_node_configs, &mut next_node_configs); + next_node_configs.par_iter_mut().for_each(|next_node_config| { + let node_index = next_node_config.vid.index(); + let current_node_config = ¤t_node_configs[node_index]; + next_node_config.is_active = current_node_config.is_active; + if current_node_config.has_no_coverage { + next_node_config.support = 0; + return; + } + let mut support = 0; + if current_node_config.is_active { + support += 1; + } + for neighbor_index in &adj_list[node_index] { + let neighbor_config = ¤t_node_configs[*neighbor_index]; + if neighbor_config.is_active { + support += 1; + } + } + next_node_config.support = support; + }); + std::mem::swap(&mut current_node_configs, &mut next_node_configs); + next_node_configs.par_iter_mut().for_each(|next_node_config| { + let node_index = next_node_config.vid.index(); + let current_node_config = ¤t_node_configs[node_index]; + next_node_config.support = current_node_config.support; + if !current_node_config.is_active{ + next_node_config.is_candidate = false; + return; + } + let mut max_support = 0; + if !current_node_config.is_covered { + max_support = current_node_config.support; + } + for neighbor_index in &adj_list[node_index] { + let neighbor_config = ¤t_node_configs[*neighbor_index]; + if !neighbor_config.is_covered && neighbor_config.support > max_support { + max_support = neighbor_config.support; + } + } + let p = 1.0/(max_support as f64); + let r: f64 = rand::random(); + if r < p { + next_node_config.is_candidate = true; + } else { + next_node_config.is_candidate = false; + } + }); + std::mem::swap(&mut current_node_configs, &mut next_node_configs); + next_node_configs.par_iter_mut().for_each(|next_node_config| { + let node_index = next_node_config.vid.index(); + let current_node_config = ¤t_node_configs[node_index]; + next_node_config.is_candidate = current_node_config.is_candidate; + if current_node_config.has_no_coverage { + next_node_config.candidates = 0; + return; + } + let mut candidates = 0; + if current_node_config.is_candidate { + candidates += 1; + } + for neighbor_index in &adj_list[node_index] { + let neighbor_config = ¤t_node_configs[*neighbor_index]; + if neighbor_config.is_candidate { + candidates += 1; + } + } + next_node_config.candidates = candidates; + }); + std::mem::swap(&mut current_node_configs, &mut next_node_configs); + next_node_configs.par_iter_mut().for_each(|next_node_config| { + let node_index = next_node_config.vid.index(); + let current_node_config = ¤t_node_configs[node_index]; + next_node_config.candidates = current_node_config.candidates; + if !current_node_config.is_candidate { + return; + } + let mut sum_candidates = 0; + if !current_node_config.is_covered { + sum_candidates += current_node_config.candidates; + } + for neighbor_index in &adj_list[node_index] { + let neighbor_config = ¤t_node_configs[*neighbor_index]; + if !neighbor_config.is_covered { + sum_candidates += neighbor_config.candidates; + } + } + if sum_candidates <= 3 * current_node_config.weight_rounded { + next_node_config.add_to_dominating_set = true; + } + }); + std::mem::swap(&mut current_node_configs, &mut next_node_configs); + for i in 0..n_nodes { + let add_to_dominating_set = current_node_configs[i].add_to_dominating_set; + if add_to_dominating_set { + { + let node_config = &mut current_node_configs[i]; + dominating_set.insert(node_config.vid); + node_config.add_to_dominating_set = false; + if !node_config.is_covered { + node_config.is_covered = true; + covered_count += 1; + } + } + for neighbor_index in &adj_list[i] { + let neighbor_config = &mut current_node_configs[*neighbor_index]; + if !neighbor_config.is_covered { + neighbor_config.is_covered = true; + covered_count += 1; + } + } + } + } + } + dominating_set +} + + +#[cfg(test)] +mod tests { + use super::*; + use crate::prelude::*; + use super::super::dominating_set::is_dominating_set; + + #[test] + fn test_single_node_graph() { + let graph = Graph::new(); + graph.add_node(0, 1, NO_PROPS, None).unwrap(); + + let ds = fast_distributed_dominating_set(&graph); + + assert!(is_dominating_set(&graph, &ds)); + assert_eq!(ds.len(), 1, "Single node should dominate itself"); + } + + #[test] + fn test_two_connected_nodes() { + let graph = Graph::new(); + graph.add_edge(0, 1, 2, NO_PROPS, None).unwrap(); + + let ds = fast_distributed_dominating_set(&graph); + + assert!(is_dominating_set(&graph, &ds)); + assert_eq!(ds.len(), 1, "One node should dominate an edge"); + } + + #[test] + fn test_star_graph() { + let graph = Graph::new(); + // Star with center 0 and leaves 1-5 + for i in 1..=5 { + graph.add_edge(0, 0, i, NO_PROPS, None).unwrap(); + } + + let ds = fast_distributed_dominating_set(&graph); + + assert!(is_dominating_set(&graph, &ds)); + assert_eq!(ds.len(), 1, "Star graph center should be the dominating set"); + } + + #[test] + fn test_path_graph() { + let graph = Graph::new(); + // Path: 1-2-3-4-5 + for i in 1..5 { + graph.add_edge(0, i, i + 1, NO_PROPS, None).unwrap(); + } + + let ds = fast_distributed_dominating_set(&graph); + + assert!(is_dominating_set(&graph, &ds)); + println!("Path graph dominating set: {:?}", ds); + } + + #[test] + fn test_triangle_graph() { + let graph = Graph::new(); + // Triangle: 1-2-3-1 + graph.add_edge(0, 1, 2, NO_PROPS, None).unwrap(); + graph.add_edge(0, 2, 3, NO_PROPS, None).unwrap(); + graph.add_edge(0, 3, 1, NO_PROPS, None).unwrap(); + + let ds = fast_distributed_dominating_set(&graph); + + assert!(is_dominating_set(&graph, &ds)); + assert!(ds.len() <= 2, "At most 2 nodes needed to dominate a triangle"); + } + + #[test] + fn test_complete_graph_k4() { + let graph = Graph::new(); + // Complete graph K4 + for i in 1..=4 { + for j in (i + 1)..=4 { + graph.add_edge(0, i, j, NO_PROPS, None).unwrap(); + } + } + + let ds = fast_distributed_dominating_set(&graph); + + assert!(is_dominating_set(&graph, &ds)); + assert!(!ds.is_empty()); + } + + #[test] + fn test_cycle_graph() { + let graph = Graph::new(); + // Cycle: 1-2-3-4-5-1 + for i in 1..=5 { + graph.add_edge(0, i, if i == 5 { 1 } else { i + 1 }, NO_PROPS, None).unwrap(); + } + + let ds = fast_distributed_dominating_set(&graph); + + assert!(is_dominating_set(&graph, &ds)); + println!("Cycle graph dominating set: {:?}", ds); + } + + #[test] + fn test_disconnected_components() { + let graph = Graph::new(); + // Component 1: 1-2-3 + graph.add_edge(0, 1, 2, NO_PROPS, None).unwrap(); + graph.add_edge(0, 2, 3, NO_PROPS, None).unwrap(); + // Component 2: 4-5 + graph.add_edge(0, 4, 5, NO_PROPS, None).unwrap(); + + let ds = fast_distributed_dominating_set(&graph); + + assert!(is_dominating_set(&graph, &ds)); + assert!(ds.len() >= 2, "Should have at least one node per component"); + } + + #[test] + fn test_grid_graph() { + let graph = Graph::new(); + // 3x3 grid + // 1-2-3 + // | | | + // 4-5-6 + // | | | + // 7-8-9 + let edges = vec![ + (1, 2), (2, 3), (1, 4), (2, 5), (3, 6), + (4, 5), (5, 6), (4, 7), (5, 8), (6, 9), + (7, 8), (8, 9), + ]; + + for (src, dst) in edges { + graph.add_edge(0, src, dst, NO_PROPS, None).unwrap(); + } + + let ds = fast_distributed_dominating_set(&graph); + + assert!(is_dominating_set(&graph, &ds)); + println!("Grid dominating set size: {}", ds.len()); + } + + #[test] + fn test_with_isolated_nodes() { + let graph = Graph::new(); + // Connected: 1-2-3 + graph.add_edge(0, 1, 2, NO_PROPS, None).unwrap(); + graph.add_edge(0, 2, 3, NO_PROPS, None).unwrap(); + // Isolated nodes + graph.add_node(0, 4, NO_PROPS, None).unwrap(); + graph.add_node(0, 5, NO_PROPS, None).unwrap(); + + let ds = fast_distributed_dominating_set(&graph); + + assert!(is_dominating_set(&graph, &ds)); + assert!(ds.len() >= 3, "Each isolated node must be in the dominating set"); + } + + #[test] + fn test_larger_random_structure() { + let graph = Graph::new(); + // Create a more complex structure + let edges = vec![ + (1, 2), (1, 3), (1, 4), // Node 1 hub + (2, 5), (3, 6), (4, 7), // Branches + (5, 8), (6, 8), (7, 8), // Converge to 8 + (8, 9), (8, 10), // From 8 + (9, 10), // Triangle + ]; + + for (src, dst) in edges { + graph.add_edge(0, src, dst, NO_PROPS, None).unwrap(); + } + + let ds = fast_distributed_dominating_set(&graph); + + assert!(is_dominating_set(&graph, &ds)); + println!("Complex structure dominating set size: {}", ds.len()); + } +} + + diff --git a/raphtory/src/algorithms/covering/mod.rs b/raphtory/src/algorithms/covering/mod.rs new file mode 100644 index 0000000000..38fd5df508 --- /dev/null +++ b/raphtory/src/algorithms/covering/mod.rs @@ -0,0 +1,2 @@ +pub mod dominating_set; +pub mod fast_distributed_dominating_set; \ No newline at end of file diff --git a/raphtory/src/algorithms/diameter/diameter.rs b/raphtory/src/algorithms/diameter/diameter.rs new file mode 100644 index 0000000000..d16fb8fd7e --- /dev/null +++ b/raphtory/src/algorithms/diameter/diameter.rs @@ -0,0 +1,58 @@ +use crate::db::api::state::ops::node; +use crate::db::graph::edge::EdgeView; +use crate::db::graph::node::NodeView; +use crate::{core::entities::nodes::node_ref::AsNodeRef, db::api::view::StaticGraphViewOps}; +use crate::{ + core::entities::nodes::node_ref::NodeRef, + db::{ + api::state::{ops::filter::NO_FILTER, Index, NodeState}, + graph::nodes::Nodes, + }, + errors::GraphError, + prelude::*, +}; +use indexmap::IndexSet; +use raphtory_api::core::{ + entities::{ + properties::prop::{PropType, PropUnwrap}, + VID, + }, + Direction, +}; +use rayon::iter::ParallelIterator; +use std::collections::HashSet; +use std::hash::Hash; +use std::{ + collections::{HashMap}, +}; +use rayon::prelude::*; +use super::super::pathing::{reweighting::get_johnson_reweighting_function, dijkstra::dijkstra_single_source_shortest_paths_algorithm, to_prop}; + +pub fn diameter_approximation( + g: &G, + weight: Option<&str>, + direction: Direction, + s: usize +) -> Result { + let n_nodes = g.nodes().len(); + let weight_fn = get_johnson_reweighting_function(g, weight, direction)?; + let cost_val = to_prop(g, weight, 0.0)?; + let max_val = to_prop(g, weight, f64::MAX)?; + let nodes_of_s_tree_with_max_depth = g.nodes().par_iter().map(|node| { + k_ordered_paths_and_max_depth(g, direction, node.node, s, cost_val, max_val, weight_fn) + }).max_by(|a, b| a.1.partial_cmp(&b.1).unwrap()).map(|(k_ordered, _)| k_ordered).unwrap(); + let max_depth = nodes_of_s_tree_with_max_depth.par_iter().map(|node_vid| { + k_ordered_paths_and_max_depth(g, direction, *node_vid, n_nodes - 1, cost_val, max_val, weight_fn) + }).max_by(|a, b| a.1.partial_cmp(&b.1).unwrap()).map(|(_, dist)| dist).unwrap(); + + +} + +fn k_ordered_paths_and_max_depth(g: &G, direction: Direction, node_vid: VID, s: usize, cost_val: Prop, max_val: Prop, weight_fn: impl Fn(&EdgeView) -> Option) -> (Vec, f64) { + let node = g.node(&node_vid).unwrap(); + let (distances, _, k_ordered) = dijkstra_single_source_shortest_paths_algorithm::, HashMap, HashSet>(g, node, direction, s, cost_val, max_val, weight_fn).unwrap(); + let max_vid = k_ordered[k_ordered.len() - 1]; + let max_distance_val = distances.get(&max_vid).unwrap(); + (k_ordered, max_distance_val.as_f64().unwrap()) +} + diff --git a/raphtory/src/algorithms/diameter/mod.rs b/raphtory/src/algorithms/diameter/mod.rs new file mode 100644 index 0000000000..6d41622dba --- /dev/null +++ b/raphtory/src/algorithms/diameter/mod.rs @@ -0,0 +1 @@ +pub mod diameter; \ No newline at end of file diff --git a/raphtory/src/algorithms/mod.rs b/raphtory/src/algorithms/mod.rs index 9cfdb23900..1612c0f2c3 100644 --- a/raphtory/src/algorithms/mod.rs +++ b/raphtory/src/algorithms/mod.rs @@ -32,6 +32,8 @@ pub mod community_detection; pub mod bipartite; pub mod components; pub mod cores; +pub mod covering; +pub mod diameter; pub mod dynamics; pub mod embeddings; pub mod layout; diff --git a/raphtory/src/algorithms/pathing/bellman_ford.rs b/raphtory/src/algorithms/pathing/bellman_ford.rs new file mode 100644 index 0000000000..8fd3d7298d --- /dev/null +++ b/raphtory/src/algorithms/pathing/bellman_ford.rs @@ -0,0 +1,432 @@ +use crate::db::graph::edge::EdgeView; +use crate::db::graph::node::NodeView; +/// Bellman-Ford algorithm +use crate::{core::entities::nodes::node_ref::AsNodeRef, db::api::view::StaticGraphViewOps}; +use crate::{ + core::entities::nodes::node_ref::NodeRef, + db::{ + api::state::{ops::filter::NO_FILTER, Index, NodeState}, + graph::nodes::Nodes, + }, + errors::GraphError, + prelude::*, +}; +use indexmap::IndexSet; +use raphtory_api::core::{ + entities::{ + properties::prop::{PropType, PropUnwrap}, + VID, + }, + Direction, +}; +use std::hash::Hash; +use std::{ + collections::{HashMap}, +}; +use super::{to_prop, get_prop_val}; + + +/// Finds the shortest paths from a single source to multiple targets in a graph. +/// +/// # Arguments +/// +/// * `graph`: The graph to search in. +/// * `source`: The source node. +/// * `targets`: A vector of target nodes. +/// * `weight`: Option, The name of the weight property for the edges. If not set then defaults all edges to weight=1. +/// * `direction`: The direction of the edges of the shortest path. Defaults to both directions (undirected graph). +/// +/// # Returns +/// +/// Returns a `HashMap` where the key is the target node and the value is a tuple containing +/// the total dist and a vector of nodes representing the shortest path. +/// + +pub fn bellman_ford_single_source_shortest_paths( + g: &G, + source: T, + targets: Vec, + weight: Option<&str>, + direction: Direction, +) -> Result), G>, GraphError> { + // Turn below into a generic function, then add a closure to ensure the prop is correctly unwrapped + // after the calc is done + let dist_val = to_prop(g, weight, 0.0)?; + let max_val = to_prop(g, weight, f64::MAX)?; + let weight_fn = |edge: &EdgeView| -> Option { + let edge_val = match weight{ + None => Some(Prop::U8(1)), + Some(weight) => match edge.properties().get(weight) { + Some(prop) => Some(prop), + _ => None + } + }; + edge_val + }; + let (distances, predecessor) = bellman_ford_single_source_shortest_paths_algorithm(g, Some(source), direction, dist_val, max_val, weight_fn)?; + let mut shortest_paths: HashMap)> = HashMap::new(); + for target in targets.into_iter() { + let target_ref = target.as_node_ref(); + let target_node = match g.node(target_ref) { + Some(tgt) => tgt, + None => { + let gid = match target_ref { + NodeRef::Internal(vid) => g.node_id(vid), + NodeRef::External(gid) => gid.to_owned(), + }; + return Err(GraphError::NodeMissingError(gid)); + } + }; + let mut path = IndexSet::default(); + path.insert(target_node.node); + let mut current_node_id = target_node.node; + while let Some(prev_node) = predecessor.get(current_node_id.index()) { + if *prev_node == current_node_id { + break; + } + path.insert(*prev_node); + current_node_id = *prev_node; + } + path.reverse(); + shortest_paths.insert( + target_node.node, + (distances[target_node.node.index()].as_f64().unwrap(), path), + ); + } + let (index, values): (IndexSet<_, ahash::RandomState>, Vec<_>) = shortest_paths + .into_iter() + .map(|(id, (dist, path))| { + let nodes = + Nodes::new_filtered(g.clone(), g.clone(), NO_FILTER, Some(Index::new(path))); + (id, (dist, nodes)) + }) + .unzip(); + + Ok(NodeState::new( + g.clone(), + values.into(), + Some(Index::new(index)), + )) +} + +pub(crate) fn bellman_ford_single_source_shortest_paths_algorithm) -> Option>( + g: &G, + source: Option, + direction: Direction, + dist_val: Prop, + max_val: Prop, + weight_fn: F +) -> Result<(Vec, Vec), GraphError> { + let max_bound = get_prop_val(max_val.dtype(), f64::MAX)?; + let mut dummy_node = false; + // creates a dummy node if source node is none + let source_node_vid = if let Some(source) = source { + let source_ref = source.as_node_ref(); + let source_node = match g.node(source_ref) { + Some(src) => src, + None => { + let gid = match source_ref { + NodeRef::Internal(vid) => g.node_id(vid), + NodeRef::External(gid) => gid.to_owned(), + }; + return Err(GraphError::NodeMissingError(gid)); + } + }; + source_node.node + } else { + dummy_node = true; + VID(usize::MAX) + }; + let n_nodes = g.count_nodes(); + let mut dist: Vec = vec![max_val.clone(); n_nodes]; + let mut predecessor: Vec = vec![VID(usize::MAX); n_nodes]; + + let n_nodes = g.count_nodes(); + + for node in g.nodes() { + let node_idx = node.node.index(); + if dummy_node { + predecessor[node_idx] = source_node_vid; + } else { + predecessor[node_idx] = node.node; + if node.node == source_node_vid { + dist[source_node_vid.index()] = dist_val.clone(); + } else { + dist[node_idx] = max_val.clone(); + } + } + } + + for _ in 1..n_nodes { + let mut changed = false; + for node in g.nodes() { + if node.node == source_node_vid { + continue; + } + let node_idx = node.node.index(); + let mut min_dist = dist[node_idx].clone(); + let mut min_node = predecessor[node_idx]; + let edges = match direction { + Direction::IN => node.out_edges(), + Direction::OUT => node.in_edges(), + Direction::BOTH => node.edges(), + }; + for edge in edges { + let edge_val = if let Some(w) = weight_fn(&edge) { + w + } else { + continue; + }; + let neighbor_vid = edge.nbr().node; + let neighbor_dist = dist[neighbor_vid.index()].clone(); + if neighbor_dist == max_bound { + continue; + } + let new_dist = neighbor_dist.clone().add(edge_val).unwrap(); + if new_dist < min_dist { + min_dist = new_dist; + min_node = neighbor_vid; + changed = true; + } + } + dist[node_idx] = min_dist; + predecessor[node_idx] = min_node; + } + if !changed { + break; + } + } + + for node in g.nodes() { + let edges = match direction { + Direction::IN => node.out_edges(), + Direction::OUT => node.in_edges(), + Direction::BOTH => node.edges(), + }; + let node_dist = &dist[node.node.index()]; + for edge in edges { + let edge_val = if let Some(w) = weight_fn(&edge) { + w + } else { + continue; + }; + let neighbor_vid = edge.nbr().node; + let neighbor_dist = &dist[neighbor_vid.index()]; + if *neighbor_dist == max_bound { + continue; + } + let new_dist = neighbor_dist.clone().add(edge_val).unwrap(); + if new_dist < *node_dist { + return Err(GraphError::InvalidProperty { reason: "Negative cycle detected".to_string() }); + } + } + } + Ok((dist, predecessor)) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::db::{api::mutation::AdditionOps, graph::graph::Graph}; + use raphtory_api::core::Direction; + + fn load_graph(edges: Vec<(i64, &str, &str, Vec<(&str, f32)>)>) -> Graph { + let graph = Graph::new(); + for (t, src, dst, props) in edges { + graph.add_edge(t, src, dst, props, None).unwrap(); + } + graph + } + + fn graph_with_negative_weights() -> Graph { + load_graph(vec![ + (0, "A", "B", vec![("weight", 4.0f32)]), + (1, "A", "C", vec![("weight", 2.0f32)]), + (2, "B", "C", vec![("weight", -3.0f32)]), + (3, "C", "D", vec![("weight", 2.0f32)]), + (4, "B", "D", vec![("weight", 5.0f32)]), + ]) + } + + #[test] + fn test_bellman_ford_with_virtual_source() { + // Test with source = None and max_val = 0 (same as dist_val) + // This simulates adding a virtual source node with zero-weight edges to all nodes + // Used in Johnson's algorithm to compute potential function h(v) + let graph = graph_with_negative_weights(); + + let dist_val = Prop::F32(0.0); + let max_val = Prop::F32(0.0); + + let weight_fn = |edge: &EdgeView| -> Option { + edge.properties().get("weight") + }; + + let result = bellman_ford_single_source_shortest_paths_algorithm( + &graph, + None::, + Direction::OUT, + dist_val, + max_val, + weight_fn, + ); + + assert!(result.is_ok(), "Bellman-Ford with virtual source should succeed"); + + let (distances, predecessors) = result.unwrap(); + + // All nodes should have distances computed from virtual source + assert_eq!(distances.len(), graph.count_nodes()); + assert_eq!(predecessors.len(), graph.count_nodes()); + + // All distances should be non-infinite (reachable from virtual source) + for (i, dist) in distances.iter().enumerate() { + let dist_f64 = dist.as_f64().unwrap(); + assert!( + dist_f64.is_finite(), + "Node index {} should have finite distance, got: {}", + i, + dist_f64 + ); + } + + // Check that reweighted edges would be non-negative + // w'(u,v) = w(u,v) + h(u) - h(v) where h = distances from virtual source + for edge in graph.edges() { + let u_idx = edge.src().node.index(); + let v_idx = edge.dst().node.index(); + + let h_u = distances[u_idx].as_f64().unwrap(); + let h_v = distances[v_idx].as_f64().unwrap(); + let w_uv = edge.properties().get("weight").unwrap().as_f64().unwrap(); + + let reweighted = w_uv + h_u - h_v; + + assert!( + reweighted >= -1e-10, + "Reweighted edge {} -> {} should be non-negative: {} + {} - {} = {}", + edge.src().name(), + edge.dst().name(), + w_uv, + h_u, + h_v, + reweighted + ); + } + } + + #[test] + fn test_bellman_ford_virtual_source_with_positive_weights() { + let graph = load_graph(vec![ + (0, "A", "B", vec![("weight", 1.0f32)]), + (1, "B", "C", vec![("weight", 2.0f32)]), + (2, "A", "C", vec![("weight", 4.0f32)]), + ]); + + let dist_val = Prop::F32(0.0); + let max_val = Prop::F32(0.0); + + let weight_fn = |edge: &EdgeView| -> Option { + edge.properties().get("weight") + }; + + let result = bellman_ford_single_source_shortest_paths_algorithm( + &graph, + None::, + Direction::OUT, + dist_val, + max_val, + weight_fn, + ); + + assert!(result.is_ok()); + + let (distances, _) = result.unwrap(); + + // With all positive weights and virtual source at 0, + // all distances should be <= 0 (since we're finding minimum distances) + for dist in distances.iter() { + let dist_f64 = dist.as_f64().unwrap(); + assert!(dist_f64 <= 1e-10, "Distance should be <= 0, got: {}", dist_f64); + } + } + + #[test] + fn test_bellman_ford_virtual_source_detects_negative_cycle() { + // Create a graph with a negative cycle + let graph = load_graph(vec![ + (0, "A", "B", vec![("weight", 1.0f32)]), + (1, "B", "C", vec![("weight", -2.0f32)]), + (2, "C", "A", vec![("weight", -1.0f32)]), + ]); + + let dist_val = Prop::F32(0.0); + let max_val = Prop::F32(0.0); + + let weight_fn = |edge: &EdgeView| -> Option { + edge.properties().get("weight") + }; + + let result = bellman_ford_single_source_shortest_paths_algorithm( + &graph, + None::, + Direction::OUT, + dist_val, + max_val, + weight_fn, + ); + + assert!(result.is_err(), "Should detect negative cycle"); + + if let Err(GraphError::InvalidProperty { reason }) = result { + assert!(reason.contains("Negative cycle")); + } else { + panic!("Expected InvalidProperty error with negative cycle message"); + } + } + + #[test] + fn test_bellman_ford_virtual_source_with_integer_weights() { + let edges = vec![ + (0, 1, 2, vec![("weight", 4i64)]), + (1, 1, 3, vec![("weight", 2i64)]), + (2, 2, 3, vec![("weight", -3i64)]), + (3, 3, 4, vec![("weight", 2i64)]), + ]; + + let graph = Graph::new(); + for (t, src, dst, props) in edges { + graph.add_edge(t, src, dst, props, None).unwrap(); + } + + let dist_val = Prop::I64(0); + let max_val = Prop::I64(0); + + let weight_fn = |edge: &EdgeView| -> Option { + edge.properties().get("weight") + }; + + let result = bellman_ford_single_source_shortest_paths_algorithm( + &graph, + None::, + Direction::OUT, + dist_val, + max_val, + weight_fn, + ); + + assert!(result.is_ok()); + + let (distances, _) = result.unwrap(); + + // Check all distances are finite + for dist in distances.iter() { + let dist_f64 = dist.as_f64().unwrap(); + assert!( + dist_f64.abs() < i64::MAX as f64 / 2.0, + "Distance should be finite, got: {}", + dist_f64 + ); + } + } +} diff --git a/raphtory/src/algorithms/pathing/dijkstra.rs b/raphtory/src/algorithms/pathing/dijkstra.rs index 7a063ca44d..2a038357b9 100644 --- a/raphtory/src/algorithms/pathing/dijkstra.rs +++ b/raphtory/src/algorithms/pathing/dijkstra.rs @@ -1,3 +1,4 @@ +use crate::db::graph::edge::EdgeView; /// Dijkstra's algorithm use crate::{core::entities::nodes::node_ref::AsNodeRef, db::api::view::StaticGraphViewOps}; use crate::{ @@ -17,10 +18,80 @@ use raphtory_api::core::{ }, Direction, }; +use std::usize; use std::{ cmp::Ordering, collections::{BinaryHeap, HashMap, HashSet}, }; +use super::to_prop; + +const NO_VID: VID = VID(usize::MAX); + +pub trait GraphMap { + fn new(n_nodes: usize, s: usize, default: T) -> Self; + + fn get_item(&self, vid: VID) -> T; + + fn set_item(&mut self, vid: VID, value: T); +} + +impl GraphMap for Vec { + fn new(n_nodes: usize, capacity: usize, default: T) -> Self { + vec![default; n_nodes] + } + + fn get_item(&self, vid: VID) -> T { + self[vid.index()].clone() + } + + fn set_item(&mut self, vid: VID, value: T) { + self[vid.index()] = value; + } +} + +impl GraphMap for HashMap { + fn new(n_nodes: usize, capacity: usize, default: T) -> Self { + HashMap::with_capacity(capacity) + } + + fn get_item(&self, vid: VID) -> T { + self.get(&vid).cloned().unwrap() + } + + fn set_item(&mut self, vid: VID, value: T) { + self.insert(vid, value); + } +} + +pub trait GraphSet { + fn new(n_nodes: usize, capacity: usize) -> Self; + fn mark_visited(&mut self, vid: VID); + fn is_visited(&self, vid: VID) -> bool; +} + +impl GraphSet for Vec { + fn new(n_nodes: usize, capacity: usize) -> Self { + vec![false; n_nodes] + } + fn mark_visited(&mut self, vid: VID) { + self[vid.index()] = true; + } + fn is_visited(&self, vid: VID) -> bool { + self[vid.index()] + } +} + +impl GraphSet for HashSet { + fn new(n_nodes: usize, capacity: usize) -> Self { + HashSet::with_capacity(capacity) + } + fn mark_visited(&mut self, vid: VID) { + self.insert(vid); + } + fn is_visited(&self, vid: VID) -> bool { + self.contains(&vid) + } +} /// A state in the Dijkstra algorithm with a cost and a node name. #[derive(PartialEq)] @@ -57,7 +128,7 @@ impl PartialOrd for State { /// /// Returns a `HashMap` where the key is the target node and the value is a tuple containing /// the total cost and a vector of nodes representing the shortest path. -/// + pub fn dijkstra_single_source_shortest_paths( g: &G, source: T, @@ -65,6 +136,71 @@ pub fn dijkstra_single_source_shortest_paths, direction: Direction, ) -> Result), G>, GraphError> { + let cost_val = to_prop(g, weight, 0.0)?; + let max_val = to_prop(g, weight, f64::MAX)?; + let weight_fn = |edge: &EdgeView| -> Option { + let edge_val = match weight{ + None => Some(Prop::U8(1)), + Some(weight) => match edge.properties().get(weight) { + Some(prop) => Some(prop), + _ => None + } + }; + edge_val + }; + let n_nodes = g.count_nodes(); + let (distances, predecessor, _) = dijkstra_single_source_shortest_paths_algorithm::, Vec, Vec>(g, source, direction, n_nodes - 1, cost_val, max_val, weight_fn)?; + let mut paths: HashMap)> = HashMap::new(); + for target in targets.into_iter() { + let target_ref = target.as_node_ref(); + let target_node = match g.node(target_ref) { + Some(tgt) => tgt, + None => { + let gid = match target_ref { + NodeRef::Internal(vid) => g.node_id(vid), + NodeRef::External(gid) => gid.to_owned(), + }; + return Err(GraphError::NodeMissingError(gid)); + } + }; + let mut path = IndexSet::default(); + let node_vid = target_node.node; + path.insert(node_vid); + let mut current_node_id = node_vid; + while let Some(prev_node) = predecessor.get(current_node_id.index()) { + if *prev_node == current_node_id { + break; + } + path.insert(*prev_node); + current_node_id = *prev_node; + } + path.reverse(); + paths.insert(node_vid, (distances[node_vid.index()].as_f64().unwrap(), path)); + } + let (index, values): (IndexSet<_, ahash::RandomState>, Vec<_>) = paths + .into_iter() + .map(|(id, (cost, path))| { + let nodes = + Nodes::new_filtered(g.clone(), g.clone(), NO_FILTER, Some(Index::new(path))); + (id, (cost, nodes)) + }) + .unzip(); + Ok(NodeState::new( + g.clone(), + values.into(), + Some(Index::new(index)), + )) +} + +pub(crate) fn dijkstra_single_source_shortest_paths_algorithm) -> Option,D: GraphMap, P: GraphMap, V: GraphSet>( + g: &G, + source: T, + direction: Direction, + k: usize, + cost_val: Prop, + max_val: Prop, + weight_fn: F +) -> Result<(D, P, Vec), GraphError> { let source_ref = source.as_node_ref(); let source_node = match g.node(source_ref) { Some(src) => src, @@ -76,87 +212,38 @@ pub fn dijkstra_single_source_shortest_paths Prop::F32(0f32), - PropType::F64 => Prop::F64(0f64), - PropType::U8 => Prop::U8(0u8), - PropType::U16 => Prop::U16(0u16), - PropType::U32 => Prop::U32(0u32), - PropType::U64 => Prop::U64(0u64), - PropType::I32 => Prop::I32(0i32), - PropType::I64 => Prop::I64(0i64), - p_type => { - return Err(GraphError::InvalidProperty { - reason: format!("Weight type: {:?}, not supported", p_type), - }) - } - }; - let max_val = match weight_type { - PropType::F32 => Prop::F32(f32::MAX), - PropType::F64 => Prop::F64(f64::MAX), - PropType::U8 => Prop::U8(u8::MAX), - PropType::U16 => Prop::U16(u16::MAX), - PropType::U32 => Prop::U32(u32::MAX), - PropType::U64 => Prop::U64(u64::MAX), - PropType::I32 => Prop::I32(i32::MAX), - PropType::I64 => Prop::I64(i64::MAX), - p_type => { - return Err(GraphError::InvalidProperty { - reason: format!("Weight type: {:?}, not supported", p_type), - }) - } - }; let mut heap = BinaryHeap::new(); + heap.push(State { cost: cost_val.clone(), node: source_node.node, }); - - let mut dist: HashMap = HashMap::new(); - let mut predecessor: HashMap = HashMap::new(); - let mut visited: HashSet = HashSet::new(); - let mut paths: HashMap)> = HashMap::new(); - - dist.insert(source_node.node, cost_val.clone()); + let mut dist = D::new(n_nodes, k + 1, max_val.clone()); + dist.set_item(source_node.node, cost_val); + let mut predecessor = P::new(n_nodes, k + 1, NO_VID); + predecessor.set_item(source_node.node, source_node.node); + let mut visited = V::new(n_nodes, k + 1); + let mut visited_count = 0; + let mut k_ordered = vec![NO_VID; k + 1]; while let Some(State { cost, node: node_vid, }) = heap.pop() { - if target_nodes[node_vid.index()] && !paths.contains_key(&node_vid) { - let mut path = IndexSet::default(); - path.insert(node_vid); - let mut current_node_id = node_vid; - while let Some(prev_node) = predecessor.get(¤t_node_id) { - path.insert(*prev_node); - current_node_id = *prev_node; - } - path.reverse(); - paths.insert(node_vid, (cost.as_f64().unwrap(), path)); + // accounts for source node + if visited_count == k + 1 { + break; } - if !visited.insert(node_vid) { + if visited.is_visited(node_vid) { continue; + } else { + k_ordered[visited_count] = node_vid; + visited.mark_visited(node_vid); + visited_count += 1; } - let edges = match direction { Direction::OUT => g.node(node_vid).unwrap().out_edges(), Direction::IN => g.node(node_vid).unwrap().in_edges(), @@ -167,36 +254,21 @@ pub fn dijkstra_single_source_shortest_paths Prop::U8(1), - Some(weight) => match edge.properties().get(weight) { - Some(prop) => prop, - _ => continue, - }, + let edge_val = if let Some(w) = weight_fn(&edge) { + w + } else { + continue; }; - let next_cost = cost.clone().add(edge_val).unwrap(); - if next_cost < *dist.entry(next_node_vid).or_insert(max_val.clone()) { + if next_cost < dist.get_item(next_node_vid) { heap.push(State { cost: next_cost.clone(), node: next_node_vid, }); - dist.insert(next_node_vid, next_cost); - predecessor.insert(next_node_vid, node_vid); + dist.set_item(next_node_vid, next_cost); + predecessor.set_item(next_node_vid, node_vid); } } } - let (index, values): (IndexSet<_, ahash::RandomState>, Vec<_>) = paths - .into_iter() - .map(|(id, (cost, path))| { - let nodes = - Nodes::new_filtered(g.clone(), g.clone(), NO_FILTER, Some(Index::new(path))); - (id, (cost, nodes)) - }) - .unzip(); - Ok(NodeState::new( - g.clone(), - values.into(), - Some(Index::new(index)), - )) + Ok((dist, predecessor, k_ordered)) } diff --git a/raphtory/src/algorithms/pathing/mod.rs b/raphtory/src/algorithms/pathing/mod.rs index 95063769bc..036a329341 100644 --- a/raphtory/src/algorithms/pathing/mod.rs +++ b/raphtory/src/algorithms/pathing/mod.rs @@ -1,3 +1,41 @@ +use raphtory_api::core::entities::properties::prop::{Prop, PropType}; +use crate::{db::api::view::StaticGraphViewOps, errors::GraphError}; + +pub(crate) fn to_prop(g: &G, weight: Option<&str>, val: f64) -> Result { + let mut weight_type = PropType::U8; + if let Some(weight) = weight { + if let Some((_, dtype)) = g.edge_meta().get_prop_id_and_type(weight, false) { + weight_type = dtype; + } else { + return Err(GraphError::PropertyMissingError(weight.to_string())); + } + } + let prop_val = get_prop_val(weight_type, val)?; + Ok(prop_val) +} + +pub(crate) fn get_prop_val(prop_type: PropType, val: f64) -> Result { + let prop_type = match prop_type { + PropType::F32 => Prop::F32(val as f32), + PropType::F64 => Prop::F64(val as f64), + PropType::U8 => Prop::U8(val as u8), + PropType::U16 => Prop::U16(val as u16), + PropType::U32 => Prop::U32(val as u32), + PropType::U64 => Prop::U64(val as u64), + PropType::I32 => Prop::I32(val as i32), + PropType::I64 => Prop::I64(val as i64), + p_type => { + return Err(GraphError::InvalidProperty { + reason: format!("Weight type: {:?}, not supported", p_type), + }) + } + }; + Ok(prop_type) +} + +pub mod bellman_ford; pub mod dijkstra; +pub mod reweighting; pub mod single_source_shortest_path; pub mod temporal_reachability; + diff --git a/raphtory/src/algorithms/pathing/reweighting.rs b/raphtory/src/algorithms/pathing/reweighting.rs new file mode 100644 index 0000000000..23fc121ea8 --- /dev/null +++ b/raphtory/src/algorithms/pathing/reweighting.rs @@ -0,0 +1,237 @@ +use crate::db::graph::edge::EdgeView; +use crate::db::graph::node::NodeView; +use crate::{core::entities::nodes::node_ref::AsNodeRef, db::api::view::StaticGraphViewOps}; +use crate::{ + core::entities::nodes::node_ref::NodeRef, + db::{ + api::state::{ops::filter::NO_FILTER, Index, NodeState}, + graph::nodes::Nodes, + }, + errors::GraphError, + prelude::*, +}; +use indexmap::IndexSet; +use raphtory_api::core::{ + entities::{ + properties::prop::{PropType, PropUnwrap}, + VID, + }, + Direction, +}; +use std::{ + collections::{HashMap}, +}; +use super::super::pathing::{bellman_ford::bellman_ford_single_source_shortest_paths_algorithm, dijkstra::dijkstra_single_source_shortest_paths_algorithm, get_prop_val, to_prop}; + +pub fn get_johnson_reweighting_function<'a, G: StaticGraphViewOps>( + g: &G, + weight: Option<&'a str>, + direction: Direction, +) -> Result) -> Option + 'a, GraphError> { + let dist_val = to_prop(g, weight, 0.0)?; + let weight_fn = move |edge: &EdgeView| -> Option { + let edge_val = match weight{ + None => Some(Prop::U8(1)), + Some(weight) => match edge.properties().get(weight) { + Some(prop) => Some(prop), + _ => None + } + }; + edge_val + }; + let (distances, _) = bellman_ford_single_source_shortest_paths_algorithm(g, None::, direction, dist_val.clone(), dist_val, weight_fn)?; + let reweighting_function = move |edge: &EdgeView| -> Option { + let u = edge.src().node; + let v = edge.dst().node; + let weight_val = weight_fn(edge)?; + let dist_u = distances[u.index()].clone(); + let dist_v = distances[v.index()].clone(); + let new_weight_val_f64 = dist_u.as_f64().unwrap() + weight_val.as_f64().unwrap() - dist_v.as_f64().unwrap(); + // new weight should always be non-negative here + let new_weight_val = get_prop_val(weight_val.dtype(), new_weight_val_f64).unwrap(); + Some(new_weight_val) + }; + Ok(reweighting_function) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::db::{api::mutation::AdditionOps, graph::graph::Graph}; + + fn load_graph(edges: Vec<(i64, &str, &str, Vec<(&str, f32)>)>) -> Graph { + let graph = Graph::new(); + for (t, src, dst, props) in edges { + graph.add_edge(t, src, dst, props, None).unwrap(); + } + graph + } + + fn graph_with_negative_weights() -> Graph { + load_graph(vec![ + (0, "A", "B", vec![("weight", 4.0f32)]), + (1, "A", "C", vec![("weight", 2.0f32)]), + (2, "B", "C", vec![("weight", -3.0f32)]), + (3, "C", "D", vec![("weight", 2.0f32)]), + (4, "B", "D", vec![("weight", 5.0f32)]), + ]) + } + + fn graph_with_positive_weights() -> Graph { + load_graph(vec![ + (0, "A", "B", vec![("weight", 4.0f32)]), + (1, "A", "C", vec![("weight", 2.0f32)]), + (2, "B", "C", vec![("weight", 3.0f32)]), + (3, "C", "D", vec![("weight", 2.0f32)]), + (4, "B", "D", vec![("weight", 5.0f32)]), + ]) + } + + #[test] + fn test_reweighting_negative_weights() { + let graph = graph_with_negative_weights(); + let reweight_fn = get_johnson_reweighting_function(&graph, Some("weight"), Direction::OUT) + .expect("Reweighting should succeed"); + + // Check that all reweighted edges have non-negative weights + for edge in graph.edges() { + let new_weight = reweight_fn(&edge); + assert!(new_weight.is_some()); + let weight_val = new_weight.unwrap().as_f64().unwrap(); + assert!( + weight_val >= -1e-10, + "Reweighted edge {:?} -> {:?} has negative weight: {}", + edge.src().name(), + edge.dst().name(), + weight_val + ); + } + } + + #[test] + fn test_reweighting_positive_weights() { + let graph = graph_with_positive_weights(); + let reweight_fn = get_johnson_reweighting_function(&graph, Some("weight"), Direction::OUT) + .expect("Reweighting should succeed"); + + // Check that all reweighted edges have non-negative weights + for edge in graph.edges() { + let new_weight = reweight_fn(&edge); + assert!(new_weight.is_some()); + let weight_val = new_weight.unwrap().as_f64().unwrap(); + assert!( + weight_val >= -1e-10, + "Reweighted edge {:?} -> {:?} has negative weight: {}", + edge.src().name(), + edge.dst().name(), + weight_val + ); + } + } + + #[test] + fn test_reweighting_no_weight_property() { + let graph = graph_with_negative_weights(); + // Test with None weight (should use uniform weights of 1) + let reweight_fn = get_johnson_reweighting_function(&graph, None, Direction::OUT) + .expect("Reweighting should succeed"); + + // All edges should have non-negative weights + for edge in graph.edges() { + let new_weight = reweight_fn(&edge); + assert!(new_weight.is_some()); + let weight_val = new_weight.unwrap().as_f64().unwrap(); + assert!(weight_val >= -1e-10); + } + } + + #[test] + fn test_reweighting_preserves_shortest_paths() { + let graph = graph_with_negative_weights(); + let reweight_fn = get_johnson_reweighting_function(&graph, Some("weight"), Direction::OUT) + .expect("Reweighting should succeed"); + + // Edge A->B (weight 4) should be reweighted + let edge_ab = graph.edge("A", "B").unwrap(); + let reweight_ab = reweight_fn(&edge_ab).unwrap().as_f64().unwrap(); + + // Edge B->C (weight -3) should be reweighted to non-negative + let edge_bc = graph.edge("B", "C").unwrap(); + let reweight_bc = reweight_fn(&edge_bc).unwrap().as_f64().unwrap(); + + // Both should be non-negative + assert!(reweight_ab >= -1e-10); + assert!(reweight_bc >= -1e-10); + } + + #[test] + fn test_reweighting_with_different_directions() { + let graph = graph_with_negative_weights(); + + // Test with Direction::OUT + let reweight_fn_out = get_johnson_reweighting_function(&graph, Some("weight"), Direction::OUT); + assert!(reweight_fn_out.is_ok()); + + // Test with Direction::IN + let reweight_fn_in = get_johnson_reweighting_function(&graph, Some("weight"), Direction::IN); + assert!(reweight_fn_in.is_ok()); + } + + #[test] + fn test_reweighting_with_integer_weights() { + let edges = vec![ + (0, 1, 2, vec![("weight", 4i64)]), + (1, 1, 3, vec![("weight", 2i64)]), + (2, 2, 3, vec![("weight", -3i64)]), + (3, 3, 4, vec![("weight", 2i64)]), + (4, 2, 4, vec![("weight", 5i64)]), + ]; + + let graph = Graph::new(); + for (t, src, dst, props) in edges { + graph.add_edge(t, src, dst, props, None).unwrap(); + } + + let reweight_fn = get_johnson_reweighting_function(&graph, Some("weight"), Direction::OUT) + .expect("Reweighting should succeed"); + + // Check that all reweighted edges have non-negative weights + for edge in graph.edges() { + let new_weight = reweight_fn(&edge); + assert!(new_weight.is_some()); + let weight_val = new_weight.unwrap().as_f64().unwrap(); + assert!( + weight_val >= -1e-10, + "Reweighted edge has negative weight: {}", + weight_val + ); + } + } + + #[test] + fn test_reweighting_specific_values() { + // Create a simple graph where we can verify exact reweighted values + let graph = load_graph(vec![ + (0, "A", "B", vec![("weight", 1.0f32)]), + (1, "B", "C", vec![("weight", -2.0f32)]), + (2, "A", "C", vec![("weight", 0.0f32)]), + ]); + + let reweight_fn = get_johnson_reweighting_function(&graph, Some("weight"), Direction::OUT) + .expect("Reweighting should succeed"); + + // All edges should be reweighted to non-negative values + for edge in graph.edges() { + let new_weight = reweight_fn(&edge); + assert!(new_weight.is_some()); + let weight_val = new_weight.unwrap().as_f64().unwrap(); + assert!( + weight_val >= -1e-10, + "Edge {:?} -> {:?} has negative weight: {}", + edge.src().name(), + edge.dst().name(), + weight_val + ); + } + } +} \ No newline at end of file diff --git a/raphtory/tests/algo_tests/mod.rs b/raphtory/tests/algo_tests/mod.rs index 3887662709..26ae22df08 100644 --- a/raphtory/tests/algo_tests/mod.rs +++ b/raphtory/tests/algo_tests/mod.rs @@ -6,3 +6,4 @@ mod embeddings; mod metrics; mod motifs; mod pathing; + diff --git a/raphtory/tests/algo_tests/pathing.rs b/raphtory/tests/algo_tests/pathing.rs index c11872df6a..f8e645173d 100644 --- a/raphtory/tests/algo_tests/pathing.rs +++ b/raphtory/tests/algo_tests/pathing.rs @@ -301,6 +301,335 @@ mod dijkstra_tests { } } +#[cfg(test)] +mod bellman_ford_tests { + use raphtory::{ + algorithms::pathing::bellman_ford::bellman_ford_single_source_shortest_paths, + db::{api::mutation::AdditionOps, graph::graph::Graph}, + prelude::*, + test_storage, + }; + use raphtory_api::core::Direction; + + fn load_graph(edges: Vec<(i64, &str, &str, Vec<(&str, f32)>)>) -> Graph { + let graph = Graph::new(); + + for (t, src, dst, props) in edges { + graph.add_edge(t, src, dst, props, None).unwrap(); + } + graph + } + + fn basic_graph() -> Graph { + load_graph(vec![ + (0, "A", "B", vec![("weight", 4.0f32)]), + (1, "A", "C", vec![("weight", 4.0f32)]), + (2, "B", "C", vec![("weight", 2.0f32)]), + (3, "C", "D", vec![("weight", 3.0f32)]), + (4, "C", "E", vec![("weight", -2.0f32)]), + (5, "C", "F", vec![("weight", 6.0f32)]), + (6, "D", "F", vec![("weight", 2.0f32)]), + (7, "E", "F", vec![("weight", 3.0f32)]), + ]) + } + + #[test] + fn test_bellman_ford_multiple_targets() { + let graph = basic_graph(); + + test_storage!(&graph, |graph| { + let targets: Vec<&str> = vec!["D", "F"]; + let results = bellman_ford_single_source_shortest_paths( + graph, + "A", + targets, + Some("weight"), + Direction::OUT, + ); + + let results = results.unwrap(); + + assert_eq!(results.get_by_node("D").unwrap().0, 7.0f64); + assert_eq!( + results.get_by_node("D").unwrap().1.name(), + vec!["A", "C", "D"] + ); + + assert_eq!(results.get_by_node("F").unwrap().0, 5.0f64); + assert_eq!( + results.get_by_node("F").unwrap().1.name(), + vec!["A", "C", "E", "F"] + ); + + let targets: Vec<&str> = vec!["D", "E", "F"]; + let results = bellman_ford_single_source_shortest_paths( + graph, + "B", + targets, + Some("weight"), + Direction::OUT, + ); + let results = results.unwrap(); + assert_eq!(results.get_by_node("D").unwrap().0, 5.0f64); + assert_eq!(results.get_by_node("E").unwrap().0, 0.0f64); + assert_eq!(results.get_by_node("F").unwrap().0, 3.0f64); + assert_eq!( + results.get_by_node("D").unwrap().1.name(), + vec!["B", "C", "D"] + ); + assert_eq!( + results.get_by_node("E").unwrap().1.name(), + vec!["B", "C", "E"] + ); + assert_eq!( + results.get_by_node("F").unwrap().1.name(), + vec!["B", "C", "E", "F"] + ); + }); + } + + #[test] + fn test_bellman_ford_no_weight() { + let graph = basic_graph(); + + test_storage!(&graph, |graph| { + let targets: Vec<&str> = vec!["C", "E", "F"]; + let results = + bellman_ford_single_source_shortest_paths(graph, "A", targets, None, Direction::OUT) + .unwrap(); + assert_eq!(results.get_by_node("C").unwrap().1.name(), vec!["A", "C"]); + assert_eq!( + results.get_by_node("E").unwrap().1.name(), + vec!["A", "C", "E"] + ); + assert_eq!( + results.get_by_node("F").unwrap().1.name(), + vec!["A", "C", "F"] + ); + }); + } + + #[test] + fn test_bellman_ford_multiple_targets_node_ids() { + let edges = vec![ + (0, 1, 2, vec![("weight", 4i64)]), + (1, 1, 3, vec![("weight", 4i64)]), + (2, 2, 3, vec![("weight", 2i64)]), + (3, 3, 4, vec![("weight", 3i64)]), + (4, 3, 5, vec![("weight", -2i64)]), + (5, 3, 6, vec![("weight", 6i64)]), + (6, 4, 6, vec![("weight", 2i64)]), + (7, 5, 6, vec![("weight", 3i64)]), + ]; + + let graph = Graph::new(); + for (t, src, dst, props) in edges { + graph.add_edge(t, src, dst, props, None).unwrap(); + } + + test_storage!(&graph, |graph| { + let targets = vec![4, 6]; + let results = bellman_ford_single_source_shortest_paths( + graph, + 1, + targets, + Some("weight"), + Direction::OUT, + ); + let results = results.unwrap(); + assert_eq!(results.get_by_node("4").unwrap().0, 7f64); + assert_eq!( + results.get_by_node("4").unwrap().1.name(), + vec!["1", "3", "4"] + ); + + assert_eq!(results.get_by_node("6").unwrap().0, 5f64); + assert_eq!( + results.get_by_node("6").unwrap().1.name(), + vec!["1", "3", "5", "6"] + ); + + let targets = vec![4, 5, 6]; + let results = bellman_ford_single_source_shortest_paths( + graph, + 2, + targets, + Some("weight"), + Direction::OUT, + ); + let results = results.unwrap(); + assert_eq!(results.get_by_node("4").unwrap().0, 5f64); + assert_eq!(results.get_by_node("5").unwrap().0, 0f64); + assert_eq!(results.get_by_node("6").unwrap().0, 3f64); + assert_eq!( + results.get_by_node("4").unwrap().1.name(), + vec!["2", "3", "4"] + ); + assert_eq!( + results.get_by_node("5").unwrap().1.name(), + vec!["2", "3", "5"] + ); + assert_eq!( + results.get_by_node("6").unwrap().1.name(), + vec!["2", "3", "5", "6"] + ); + }); + } + + #[test] + fn test_bellman_ford_multiple_targets_i64() { + let edges = vec![ + (0, "A", "B", vec![("weight", 4i64)]), + (1, "A", "C", vec![("weight", 4i64)]), + (2, "B", "C", vec![("weight", 2i64)]), + (3, "C", "D", vec![("weight", 3i64)]), + (4, "C", "E", vec![("weight", -2i64)]), + (5, "C", "F", vec![("weight", 6i64)]), + (6, "D", "F", vec![("weight", 2i64)]), + (7, "E", "F", vec![("weight", 3i64)]), + ]; + + let graph = Graph::new(); + + for (t, src, dst, props) in edges { + graph.add_edge(t, src, dst, props, None).unwrap(); + } + + test_storage!(&graph, |graph| { + let targets: Vec<&str> = vec!["D", "F"]; + let results = bellman_ford_single_source_shortest_paths( + graph, + "A", + targets, + Some("weight"), + Direction::OUT, + ); + let results = results.unwrap(); + assert_eq!(results.get_by_node("D").unwrap().0, 7f64); + assert_eq!( + results.get_by_node("D").unwrap().1.name(), + vec!["A", "C", "D"] + ); + + assert_eq!(results.get_by_node("F").unwrap().0, 5f64); + assert_eq!( + results.get_by_node("F").unwrap().1.name(), + vec!["A", "C", "E", "F"] + ); + + let targets: Vec<&str> = vec!["D", "E", "F"]; + let results = bellman_ford_single_source_shortest_paths( + graph, + "B", + targets, + Some("weight"), + Direction::OUT, + ); + let results = results.unwrap(); + assert_eq!(results.get_by_node("D").unwrap().0, 5f64); + assert_eq!(results.get_by_node("E").unwrap().0, 0f64); + assert_eq!(results.get_by_node("F").unwrap().0, 3f64); + assert_eq!( + results.get_by_node("D").unwrap().1.name(), + vec!["B", "C", "D"] + ); + assert_eq!( + results.get_by_node("E").unwrap().1.name(), + vec!["B", "C", "E"] + ); + assert_eq!( + results.get_by_node("F").unwrap().1.name(), + vec!["B", "C", "E", "F"] + ); + }); + } + + #[test] + fn test_bellman_ford_undirected() { + let edges = vec![ + (0, "C", "A", vec![("weight", 4u64)]), + (1, "A", "B", vec![("weight", 4u64)]), + (3, "C", "D", vec![("weight", 3u64)]), + ]; + + let graph = Graph::new(); + + for (t, src, dst, props) in edges { + graph.add_edge(t, src, dst, props, None).unwrap(); + } + + test_storage!(&graph, |graph| { + let targets: Vec<&str> = vec!["D"]; + let results = bellman_ford_single_source_shortest_paths( + graph, + "A", + targets, + Some("weight"), + Direction::BOTH, + ); + + let results = results.unwrap(); + assert_eq!(results.get_by_node("D").unwrap().0, 7f64); + assert_eq!( + results.get_by_node("D").unwrap().1.name(), + vec!["A", "C", "D"] + ); + }); + } + + #[test] + fn test_bellman_ford_no_weight_undirected() { + let edges = vec![ + (0, "C", "A", vec![("weight", 4u64)]), + (1, "A", "B", vec![("weight", 4u64)]), + (3, "C", "D", vec![("weight", 3u64)]), + ]; + + let graph = Graph::new(); + + for (t, src, dst, props) in edges { + graph.add_edge(t, src, dst, props, None).unwrap(); + } + + test_storage!(&graph, |graph| { + let targets: Vec<&str> = vec!["D"]; + let results = + bellman_ford_single_source_shortest_paths(graph, "A", targets, None, Direction::BOTH) + .unwrap(); + assert_eq!( + results.get_by_node("D").unwrap().1.name(), + vec!["A", "C", "D"] + ); + }); + } + + #[test] + fn test_bellman_ford_negative_cycle() { + let edges = vec![ + (0, "A", "B", vec![("weight", 1i64)]), + (1, "B", "C", vec![("weight", -5i64)]), + (2, "C", "A", vec![("weight", 2i64)]), + ]; + + let graph = Graph::new(); + for (t, src, dst, props) in edges { + graph.add_edge(t, src, dst, props, None).unwrap(); + } + + test_storage!(&graph, |graph| { + let targets: Vec<&str> = vec!["C"]; + let result = bellman_ford_single_source_shortest_paths( + graph, + "A", + targets, + Some("weight"), + Direction::OUT, + ); + assert!(result.is_err()); + }); + } +} + #[cfg(test)] mod sssp_tests { use raphtory::{