From a8e67fb10e2e8cb379c756dcb6871d343a88b281 Mon Sep 17 00:00:00 2001 From: zazabap Date: Wed, 4 Mar 2026 20:05:01 +0000 Subject: [PATCH 1/7] Add plan for #109: LCS to MaximumIndependentSet Co-Authored-By: Claude Opus 4.6 --- ...2026-03-04-lcs-to-maximumindependentset.md | 930 ++++++++++++++++++ 1 file changed, 930 insertions(+) create mode 100644 docs/plans/2026-03-04-lcs-to-maximumindependentset.md diff --git a/docs/plans/2026-03-04-lcs-to-maximumindependentset.md b/docs/plans/2026-03-04-lcs-to-maximumindependentset.md new file mode 100644 index 00000000..f26c02a1 --- /dev/null +++ b/docs/plans/2026-03-04-lcs-to-maximumindependentset.md @@ -0,0 +1,930 @@ +# LongestCommonSubsequence to MaximumIndependentSet Implementation Plan + +> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task. + +**Goal:** Implement the LCS model (issue #108) and the LCS → MaxIS reduction rule (issue #109). + +**Architecture:** The LCS problem takes k strings over an alphabet and finds the longest common subsequence. The reduction constructs a "match graph" where nodes are k-tuples of positions with matching characters, and conflict edges connect incompatible tuples. MaxIS on this graph equals the LCS length. The model goes in `src/models/misc/` since strings are a unique input structure. The reduction produces `MaximumIndependentSet` (unit-weight). + +**Tech Stack:** Rust, serde, inventory (schema registration), `#[reduction]` proc macro + +**References:** +- Apostolico & Guerra, 1987 (https://doi.org/10.1137/0216009) +- Baxter et al., 2004 (https://doi.org/10.1007/978-3-540-27801-6_12) +- Issue #108 (model), Issue #109 (rule) + +--- + +### Task 1: Implement the LCS model + +**Files:** +- Create: `src/models/misc/longest_common_subsequence.rs` +- Modify: `src/models/misc/mod.rs` +- Modify: `src/models/mod.rs` +- Modify: `src/lib.rs` (prelude) +- Create: `src/unit_tests/models/misc/longest_common_subsequence.rs` + +**Step 1: Create the model file** + +Create `src/models/misc/longest_common_subsequence.rs`: + +```rust +//! Longest Common Subsequence problem implementation. +//! +//! Given k strings over a finite alphabet, find a longest string that is +//! a subsequence of every input string. + +use crate::registry::{FieldInfo, ProblemSchemaEntry}; +use crate::traits::{OptimizationProblem, Problem}; +use crate::types::{Direction, SolutionSize}; +use serde::{Deserialize, Serialize}; + +inventory::submit! { + ProblemSchemaEntry { + name: "LongestCommonSubsequence", + module_path: module_path!(), + description: "Find longest string that is a subsequence of every input string", + fields: &[ + FieldInfo { name: "strings", type_name: "Vec>", description: "Input strings s_1, ..., s_k" }, + ], + } +} + +/// The Longest Common Subsequence (LCS) problem. +/// +/// Given `k` strings `s_1, ..., s_k` over a finite alphabet, find a longest +/// string `w` that is a subsequence of every `s_i`. +/// +/// # Representation +/// +/// Variables represent positions in the shortest string. Each variable selects +/// whether that character position contributes to the subsequence (binary: include/exclude). +/// +/// More precisely, for a shortest string of length `m`, we have `m` binary variables. +/// `x_j = 1` means the j-th character of the shortest string is included in the +/// candidate subsequence. The evaluate function checks whether the resulting +/// subsequence of the shortest string is also a subsequence of all other strings. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct LongestCommonSubsequence { + strings: Vec>, +} + +impl LongestCommonSubsequence { + /// Create a new LCS instance. + /// + /// # Panics + /// Panics if fewer than 2 strings are provided or any string is empty. + pub fn new(strings: Vec>) -> Self { + assert!(strings.len() >= 2, "need at least 2 strings"); + Self { strings } + } + + /// Returns the input strings. + pub fn strings(&self) -> &[Vec] { + &self.strings + } + + /// Returns the number of strings k. + pub fn num_strings(&self) -> usize { + self.strings.len() + } + + /// Returns the total length of all strings. + pub fn total_length(&self) -> usize { + self.strings.iter().map(|s| s.len()).sum() + } + + /// Returns the index of the shortest string. + fn shortest_index(&self) -> usize { + self.strings + .iter() + .enumerate() + .min_by_key(|(_, s)| s.len()) + .map(|(i, _)| i) + .unwrap_or(0) + } + + /// Returns the length of the shortest string (number of binary variables). + fn shortest_len(&self) -> usize { + self.strings.iter().map(|s| s.len()).min().unwrap_or(0) + } +} + +/// Check if `subseq` is a subsequence of `s`. +fn is_subsequence(subseq: &[u8], s: &[u8]) -> bool { + let mut it = s.iter(); + subseq.iter().all(|c| it.any(|sc| sc == c)) +} + +impl Problem for LongestCommonSubsequence { + const NAME: &'static str = "LongestCommonSubsequence"; + type Metric = SolutionSize; + + fn variant() -> Vec<(&'static str, &'static str)> { + crate::variant_params![] + } + + fn dims(&self) -> Vec { + vec![2; self.shortest_len()] + } + + fn evaluate(&self, config: &[usize]) -> SolutionSize { + let m = self.shortest_len(); + if config.len() != m { + return SolutionSize::Invalid; + } + if config.iter().any(|&v| v >= 2) { + return SolutionSize::Invalid; + } + + // Build candidate subsequence from shortest string + let si = self.shortest_index(); + let shortest = &self.strings[si]; + let subseq: Vec = config + .iter() + .enumerate() + .filter(|(_, &x)| x == 1) + .map(|(j, _)| shortest[j]) + .collect(); + + // Check if subseq is a subsequence of all other strings + for (i, s) in self.strings.iter().enumerate() { + if i == si { + continue; // Already a subsequence of shortest by construction + } + if !is_subsequence(&subseq, s) { + return SolutionSize::Invalid; + } + } + + SolutionSize::Valid(subseq.len()) + } +} + +impl OptimizationProblem for LongestCommonSubsequence { + type Value = usize; + + fn direction(&self) -> Direction { + Direction::Maximize + } +} + +crate::declare_variants! { + LongestCommonSubsequence => "2^total_length", +} + +#[cfg(test)] +#[path = "../../unit_tests/models/misc/longest_common_subsequence.rs"] +mod tests; +``` + +**Step 2: Register the model in mod.rs files** + +In `src/models/misc/mod.rs`, add: +```rust +mod longest_common_subsequence; +pub use longest_common_subsequence::LongestCommonSubsequence; +``` + +In `src/models/mod.rs`, add `LongestCommonSubsequence` to the re-export line for misc. + +In `src/lib.rs`, add `LongestCommonSubsequence` to the prelude `pub use crate::models::misc::` line. + +**Step 3: Write unit tests** + +Create `src/unit_tests/models/misc/longest_common_subsequence.rs`: + +```rust +use super::*; +use crate::solvers::{BruteForce, Solver}; +use crate::traits::{OptimizationProblem, Problem}; +use crate::types::Direction; + +#[test] +fn test_lcs_basic() { + let problem = LongestCommonSubsequence::new(vec![ + b"ABAC".to_vec(), + b"BACA".to_vec(), + ]); + assert_eq!(problem.num_strings(), 2); + assert_eq!(problem.total_length(), 8); + assert_eq!(problem.direction(), Direction::Maximize); + assert_eq!(::NAME, "LongestCommonSubsequence"); + assert_eq!(::variant(), vec![]); +} + +#[test] +fn test_lcs_dims() { + let problem = LongestCommonSubsequence::new(vec![ + b"ABAC".to_vec(), + b"BACA".to_vec(), + ]); + // Shortest string has length 4, so 4 binary variables + assert_eq!(problem.dims(), vec![2; 4]); +} + +#[test] +fn test_lcs_evaluate_valid_subsequence() { + // ABAC and BACA: "BAC" is a common subsequence of length 3 + let problem = LongestCommonSubsequence::new(vec![ + b"ABAC".to_vec(), + b"BACA".to_vec(), + ]); + // Selecting positions 1,2,3 from ABAC gives "BAC" + assert!(problem.evaluate(&[0, 1, 1, 1]).is_valid()); +} + +#[test] +fn test_lcs_evaluate_invalid_subsequence() { + let problem = LongestCommonSubsequence::new(vec![ + b"ABAC".to_vec(), + b"BACA".to_vec(), + ]); + // Selecting all 4 chars from ABAC gives "ABAC", not a subsequence of "BACA" + assert_eq!(problem.evaluate(&[1, 1, 1, 1]), SolutionSize::Invalid); +} + +#[test] +fn test_lcs_evaluate_empty() { + let problem = LongestCommonSubsequence::new(vec![ + b"ABC".to_vec(), + b"DEF".to_vec(), + ]); + // Empty subsequence is always valid + assert_eq!(problem.evaluate(&[0, 0, 0]), SolutionSize::Valid(0)); +} + +#[test] +fn test_lcs_brute_force() { + let problem = LongestCommonSubsequence::new(vec![ + b"ABAC".to_vec(), + b"BACA".to_vec(), + ]); + let solver = BruteForce::new(); + let solution = solver.find_best(&problem).expect("should find solution"); + let metric = problem.evaluate(&solution); + assert_eq!(metric, SolutionSize::Valid(3)); // LCS = "BAC" or "AAC" or "ACA" +} + +#[test] +fn test_lcs_three_strings() { + let problem = LongestCommonSubsequence::new(vec![ + b"ABCDAB".to_vec(), + b"BDCABA".to_vec(), + b"BCADBA".to_vec(), + ]); + let solver = BruteForce::new(); + let solution = solver.find_best(&problem).expect("should find solution"); + let metric = problem.evaluate(&solution); + assert_eq!(metric, SolutionSize::Valid(4)); // LCS = "BCAB" +} + +#[test] +fn test_lcs_evaluate_wrong_config_length() { + let problem = LongestCommonSubsequence::new(vec![ + b"AB".to_vec(), + b"BA".to_vec(), + ]); + assert_eq!(problem.evaluate(&[1]), SolutionSize::Invalid); + assert_eq!(problem.evaluate(&[1, 0, 0]), SolutionSize::Invalid); +} + +#[test] +fn test_lcs_evaluate_invalid_variable_value() { + let problem = LongestCommonSubsequence::new(vec![ + b"AB".to_vec(), + b"BA".to_vec(), + ]); + assert_eq!(problem.evaluate(&[2, 0]), SolutionSize::Invalid); +} + +#[test] +fn test_lcs_serialization() { + let problem = LongestCommonSubsequence::new(vec![ + b"ABC".to_vec(), + b"BCA".to_vec(), + ]); + let json = serde_json::to_value(&problem).unwrap(); + let restored: LongestCommonSubsequence = serde_json::from_value(json).unwrap(); + assert_eq!(restored.strings(), problem.strings()); +} + +#[test] +fn test_lcs_identical_strings() { + let problem = LongestCommonSubsequence::new(vec![ + b"ABC".to_vec(), + b"ABC".to_vec(), + ]); + let solver = BruteForce::new(); + let solution = solver.find_best(&problem).expect("should find solution"); + assert_eq!(problem.evaluate(&solution), SolutionSize::Valid(3)); +} + +#[test] +fn test_lcs_no_common_chars() { + let problem = LongestCommonSubsequence::new(vec![ + b"ABC".to_vec(), + b"DEF".to_vec(), + ]); + let solver = BruteForce::new(); + let solution = solver.find_best(&problem).expect("should find solution"); + assert_eq!(problem.evaluate(&solution), SolutionSize::Valid(0)); +} + +#[test] +#[should_panic(expected = "need at least 2 strings")] +fn test_lcs_too_few_strings() { + LongestCommonSubsequence::new(vec![b"ABC".to_vec()]); +} +``` + +**Step 4: Register in unit test mod.rs** + +Check `src/unit_tests/models/misc/mod.rs` and add: +```rust +mod longest_common_subsequence; +``` + +**Step 5: Build and test** + +Run: `cargo test test_lcs -- --nocapture` +Expected: All tests pass. + +**Step 6: Register in CLI dispatch** + +In `problemreductions-cli/src/dispatch.rs`: +- Add `use problemreductions::models::misc::LongestCommonSubsequence;` (or adjust import) +- Add `"LongestCommonSubsequence" => deser_opt::(data),` in the deserialize match +- Add `"LongestCommonSubsequence" => try_ser::(any),` in the serialize match + +**Step 7: Commit** + +```bash +git add src/models/misc/longest_common_subsequence.rs src/models/misc/mod.rs \ + src/models/mod.rs src/lib.rs \ + src/unit_tests/models/misc/longest_common_subsequence.rs \ + src/unit_tests/models/misc/mod.rs \ + problemreductions-cli/src/dispatch.rs +git commit -m "feat: add LongestCommonSubsequence model (closes #108)" +``` + +--- + +### Task 2: Implement the LCS → MaxIS reduction rule + +**Files:** +- Create: `src/rules/longestcommonsubsequence_maximumindependentset.rs` +- Modify: `src/rules/mod.rs` +- Create: `src/unit_tests/rules/longestcommonsubsequence_maximumindependentset.rs` + +**Step 1: Create the reduction rule file** + +Create `src/rules/longestcommonsubsequence_maximumindependentset.rs`: + +```rust +//! Reduction from LongestCommonSubsequence to MaximumIndependentSet. +//! +//! Constructs a "match graph" where each vertex is a k-tuple of positions +//! (one per string) that all share the same character. Two vertices are +//! connected by a conflict edge if they cannot coexist in a valid common +//! subsequence — i.e., the position orderings are inconsistent (crossing) +//! or share a position in some string. +//! +//! A maximum independent set in this graph corresponds to a longest common +//! subsequence. + +use crate::models::graph::MaximumIndependentSet; +use crate::models::misc::LongestCommonSubsequence; +use crate::reduction; +use crate::rules::traits::{ReduceTo, ReductionResult}; +use crate::topology::SimpleGraph; +use crate::types::One; + +/// Result of reducing LCS to MaximumIndependentSet. +#[derive(Debug, Clone)] +pub struct ReductionLCSToIS { + target: MaximumIndependentSet, + /// Position tuples for each vertex: nodes[i] = (p_1, p_2, ..., p_k) + nodes: Vec>, + /// Number of variables in the source LCS problem (= shortest string length). + num_source_variables: usize, + /// Index of the shortest string in the source problem. + shortest_index: usize, +} + +impl ReductionResult for ReductionLCSToIS { + type Source = LongestCommonSubsequence; + type Target = MaximumIndependentSet; + + fn target_problem(&self) -> &Self::Target { + &self.target + } + + /// Extract an LCS solution from an IS solution. + /// + /// Each selected vertex represents a matched position tuple. We reconstruct + /// which positions in the shortest string were used, setting x_j = 1 for + /// each position j of the shortest string that appears in a selected node. + fn extract_solution(&self, target_solution: &[usize]) -> Vec { + let mut config = vec![0usize; self.num_source_variables]; + for (vertex_idx, &selected) in target_solution.iter().enumerate() { + if selected == 1 { + let pos_in_shortest = self.nodes[vertex_idx][self.shortest_index]; + config[pos_in_shortest] = 1; + } + } + config + } +} + +/// Check if two position tuples conflict (cannot both be in a common subsequence). +/// +/// Conflict occurs when the relative order is inconsistent across strings: +/// either some positions go forward while others go backward, or any +/// position is shared (equal). +fn tuples_conflict(a: &[usize], b: &[usize]) -> bool { + // Check if all(a_i < b_i) or all(a_i > b_i). If neither, it's a conflict. + let mut all_less = true; + let mut all_greater = true; + for (ai, bi) in a.iter().zip(b.iter()) { + if ai >= bi { + all_less = false; + } + if ai <= bi { + all_greater = false; + } + } + // Conflict if neither all-less nor all-greater + !all_less && !all_greater +} + +#[reduction( + overhead = { + num_vertices = "total_length^num_strings", + num_edges = "total_length^(2 * num_strings)", + } +)] +impl ReduceTo> for LongestCommonSubsequence { + type Result = ReductionLCSToIS; + + fn reduce_to(&self) -> Self::Result { + let k = self.num_strings(); + let strings = self.strings(); + + // Find the shortest string index + let shortest_index = strings + .iter() + .enumerate() + .min_by_key(|(_, s)| s.len()) + .map(|(i, _)| i) + .unwrap_or(0); + let num_source_variables = strings[shortest_index].len(); + + // Step 1: Build match nodes — k-tuples of positions with matching characters + let mut nodes: Vec> = Vec::new(); + + // Collect character positions for each string + // char_positions[i][c] = list of positions in string i with character c + let mut char_positions: Vec>> = + vec![std::collections::HashMap::new(); k]; + for (i, s) in strings.iter().enumerate() { + for (j, &c) in s.iter().enumerate() { + char_positions[i].entry(c).or_default().push(j); + } + } + + // Find all characters that appear in all strings + let common_chars: Vec = { + let first_chars: std::collections::HashSet = + char_positions[0].keys().copied().collect(); + first_chars + .into_iter() + .filter(|c| char_positions.iter().all(|cp| cp.contains_key(c))) + .collect() + }; + + // Generate all k-tuples for each common character + for c in &common_chars { + let position_lists: Vec<&Vec> = + char_positions.iter().map(|cp| &cp[c]).collect(); + // Generate Cartesian product of position lists + let mut tuples: Vec> = vec![vec![]]; + for positions in &position_lists { + let mut new_tuples = Vec::new(); + for tuple in &tuples { + for &pos in *positions { + let mut new_tuple = tuple.clone(); + new_tuple.push(pos); + new_tuples.push(new_tuple); + } + } + tuples = new_tuples; + } + nodes.extend(tuples); + } + + // Step 2: Build conflict edges + let n = nodes.len(); + let mut edges: Vec<(usize, usize)> = Vec::new(); + for i in 0..n { + for j in (i + 1)..n { + if tuples_conflict(&nodes[i], &nodes[j]) { + edges.push((i, j)); + } + } + } + + let target = MaximumIndependentSet::new( + SimpleGraph::new(n, edges), + vec![One; n], + ); + + ReductionLCSToIS { + target, + nodes, + num_source_variables, + shortest_index, + } + } +} + +#[cfg(test)] +#[path = "../unit_tests/rules/longestcommonsubsequence_maximumindependentset.rs"] +mod tests; +``` + +**Step 2: Register in rules/mod.rs** + +Add to `src/rules/mod.rs` (alphabetically): +```rust +mod longestcommonsubsequence_maximumindependentset; +``` + +**Step 3: Write unit tests** + +Create `src/unit_tests/rules/longestcommonsubsequence_maximumindependentset.rs`: + +```rust +use super::*; +use crate::models::graph::MaximumIndependentSet; +use crate::models::misc::LongestCommonSubsequence; +use crate::rules::traits::ReduceTo; +use crate::solvers::{BruteForce, Solver}; +use crate::topology::{Graph, SimpleGraph}; +use crate::traits::Problem; +use crate::types::One; +use std::collections::HashSet; + +#[test] +fn test_lcs_to_maximumindependentset_closed_loop() { + // ABAC and BACA: LCS = 3 (e.g., "BAC") + let source = LongestCommonSubsequence::new(vec![ + b"ABAC".to_vec(), + b"BACA".to_vec(), + ]); + + let reduction = ReduceTo::>::reduce_to(&source); + let target = reduction.target_problem(); + + let solver = BruteForce::new(); + + // Solve source directly + let best_source = solver.find_all_best(&source); + let source_set: HashSet> = best_source.into_iter().collect(); + + // Solve target and extract + let best_target = solver.find_all_best(target); + let extracted: HashSet> = best_target + .iter() + .map(|t| reduction.extract_solution(t)) + .collect(); + + assert!(!extracted.is_empty()); + assert!(extracted.is_subset(&source_set)); + + // Verify optimal value matches + for sol in &extracted { + let metric = source.evaluate(sol); + assert_eq!(metric, SolutionSize::Valid(3)); + } +} + +#[test] +fn test_lcs_to_is_graph_structure() { + // ABAC and BACA + let source = LongestCommonSubsequence::new(vec![ + b"ABAC".to_vec(), + b"BACA".to_vec(), + ]); + + let reduction = ReduceTo::>::reduce_to(&source); + let target = reduction.target_problem(); + + // From the issue: 6 match nodes, 9 conflict edges + assert_eq!(target.graph().num_vertices(), 6); + assert_eq!(target.graph().num_edges(), 9); +} + +#[test] +fn test_lcs_to_is_three_strings() { + let source = LongestCommonSubsequence::new(vec![ + b"ABCDAB".to_vec(), + b"BDCABA".to_vec(), + b"BCADBA".to_vec(), + ]); + + let reduction = ReduceTo::>::reduce_to(&source); + let target = reduction.target_problem(); + + let solver = BruteForce::new(); + let best_target = solver.find_all_best(target); + + // IS size should equal LCS length = 4 + for sol in &best_target { + let is_size: usize = sol.iter().sum(); + assert_eq!(is_size, 4); + } + + // Extract and verify + let best_source = solver.find_all_best(&source); + let source_set: HashSet> = best_source.into_iter().collect(); + let extracted: HashSet> = best_target + .iter() + .map(|t| reduction.extract_solution(t)) + .collect(); + assert!(extracted.is_subset(&source_set)); +} + +#[test] +fn test_lcs_to_is_no_common_chars() { + let source = LongestCommonSubsequence::new(vec![ + b"ABC".to_vec(), + b"DEF".to_vec(), + ]); + + let reduction = ReduceTo::>::reduce_to(&source); + let target = reduction.target_problem(); + + // No matching characters → 0 vertices + assert_eq!(target.graph().num_vertices(), 0); + assert_eq!(target.graph().num_edges(), 0); +} + +#[test] +fn test_lcs_to_is_identical_strings() { + let source = LongestCommonSubsequence::new(vec![ + b"ABC".to_vec(), + b"ABC".to_vec(), + ]); + + let reduction = ReduceTo::>::reduce_to(&source); + let target = reduction.target_problem(); + + let solver = BruteForce::new(); + let best = solver.find_all_best(target); + + // LCS of identical strings = full string length = 3 + for sol in &best { + let is_size: usize = sol.iter().sum(); + assert_eq!(is_size, 3); + } +} + +#[test] +fn test_tuples_conflict_function() { + // Same position → conflict + assert!(tuples_conflict(&[0, 1], &[0, 2])); + // Crossing → conflict + assert!(tuples_conflict(&[0, 1], &[1, 0])); + // Consistent forward → no conflict + assert!(!tuples_conflict(&[0, 0], &[1, 1])); + // Consistent backward → no conflict + assert!(!tuples_conflict(&[1, 1], &[0, 0])); +} +``` + +**Step 4: Register unit test module** + +Check `src/unit_tests/rules/mod.rs` and add: +```rust +mod longestcommonsubsequence_maximumindependentset; +``` + +**Step 5: Build and test** + +Run: `cargo test test_lcs_to -- --nocapture` +Expected: All tests pass. + +**Step 6: Commit** + +```bash +git add src/rules/longestcommonsubsequence_maximumindependentset.rs \ + src/rules/mod.rs \ + src/unit_tests/rules/longestcommonsubsequence_maximumindependentset.rs \ + src/unit_tests/rules/mod.rs +git commit -m "feat: add LCS to MaximumIndependentSet reduction rule (closes #109)" +``` + +--- + +### Task 3: Write example program + +**Files:** +- Create: `examples/reduction_longestcommonsubsequence_to_maximumindependentset.rs` +- Modify: `tests/suites/examples.rs` + +**Step 1: Create the example file** + +Create `examples/reduction_longestcommonsubsequence_to_maximumindependentset.rs`: + +```rust +// # LongestCommonSubsequence to MaximumIndependentSet Reduction +// +// ## Reduction Overview +// A match graph is constructed where each vertex is a pair of positions +// (p_1, p_2) from the two input strings sharing the same character. +// Conflict edges connect pairs that cannot coexist in a common subsequence +// (crossing or shared positions). MaxIS on this graph equals the LCS length. +// +// ## This Example +// - 2 strings: s1 = "ABAC", s2 = "BACA" +// - Match graph: 6 vertices, 9 conflict edges +// - LCS = "BAC" (length 3), corresponding to IS {v2, v3, v5} +// +// ## Output +// Exports `docs/paper/examples/lcs_to_maximumindependentset.json` and +// `lcs_to_maximumindependentset.result.json`. + +use problemreductions::export::*; +use problemreductions::prelude::*; + +pub fn run() { + let source = LongestCommonSubsequence::new(vec![ + b"ABAC".to_vec(), + b"BACA".to_vec(), + ]); + + let reduction = + ReduceTo::>::reduce_to(&source); + let target = reduction.target_problem(); + + println!("\n=== Problem Transformation ==="); + println!( + "Source: LCS with {} strings, total length {}", + source.num_strings(), + source.total_length() + ); + println!( + "Target: MaxIS with {} vertices, {} edges", + target.graph().num_vertices(), + target.graph().num_edges() + ); + + let solver = BruteForce::new(); + let target_solutions = solver.find_all_best(target); + println!("\n=== Solution ==="); + println!("Target solutions found: {}", target_solutions.len()); + + let mut solutions = Vec::new(); + for target_sol in &target_solutions { + let source_sol = reduction.extract_solution(target_sol); + let eval = source.evaluate(&source_sol); + assert!(eval.is_valid()); + solutions.push(SolutionPair { + source_config: source_sol.clone(), + target_config: target_sol.clone(), + }); + } + + let source_sol = reduction.extract_solution(&target_solutions[0]); + println!("Source solution: {:?}", source_sol); + println!("Source value: {:?}", source.evaluate(&source_sol)); + println!("\nReduction verified successfully"); + + // Export JSON + let source_variant = variant_to_map(LongestCommonSubsequence::variant()); + let target_variant = variant_to_map(MaximumIndependentSet::::variant()); + let overhead = lookup_overhead( + "LongestCommonSubsequence", + &source_variant, + "MaximumIndependentSet", + &target_variant, + ) + .expect("LCS -> MaxIS overhead not found"); + + let data = ReductionData { + source: ProblemSide { + problem: LongestCommonSubsequence::NAME.to_string(), + variant: source_variant, + instance: serde_json::json!({ + "num_strings": source.num_strings(), + "total_length": source.total_length(), + "strings": source.strings(), + }), + }, + target: ProblemSide { + problem: MaximumIndependentSet::::NAME.to_string(), + variant: target_variant, + instance: serde_json::json!({ + "num_vertices": target.graph().num_vertices(), + "num_edges": target.graph().num_edges(), + }), + }, + overhead: overhead_to_json(&overhead), + }; + + let results = ResultData { solutions }; + write_example("lcs_to_maximumindependentset", &data, &results); +} + +fn main() { + run() +} +``` + +**Step 2: Register the example in tests/suites/examples.rs** + +Add alphabetically: +```rust +example_test!(reduction_longestcommonsubsequence_to_maximumindependentset); +``` + +And: +```rust +example_fn!( + test_longestcommonsubsequence_to_maximumindependentset, + reduction_longestcommonsubsequence_to_maximumindependentset +); +``` + +**Step 3: Run the example** + +Run: `cargo run --example reduction_longestcommonsubsequence_to_maximumindependentset` +Expected: Prints transformation info, exports JSON files. + +**Step 4: Commit** + +```bash +git add examples/reduction_longestcommonsubsequence_to_maximumindependentset.rs \ + tests/suites/examples.rs +git commit -m "feat: add LCS to MaxIS example program" +``` + +--- + +### Task 4: Regenerate exports and run checks + +**Step 1: Regenerate reduction graph and schemas** + +```bash +cargo run --example export_graph +cargo run --example export_schemas +``` + +**Step 2: Run full test suite** + +```bash +make test clippy fmt-check +``` + +**Step 3: Commit generated files** + +```bash +git add docs/paper/reduction_graph.json docs/paper/problem_schemas.json +git commit -m "chore: regenerate reduction graph and schemas after LCS->MaxIS rule" +``` + +--- + +### Task 5: Document in paper + +Invoke `/write-rule-in-paper` to write the reduction-rule entry in `docs/paper/reductions.typ`. + +Also invoke `/write-model-in-paper` for the LongestCommonSubsequence problem definition. + +Key points for the paper entry: +- **Problem definition:** k strings over alphabet Σ, find longest common subsequence +- **Reduction rule:** Match graph construction with crossing/conflict edges +- **Example:** ABAC/BACA → 6-vertex match graph → IS={v2,v3,v5} → LCS="BAC" +- **Complexity:** O(n^k) vertices worst-case, NP-hard for k≥3 (Maier, 1978) + +**Commit after writing:** +```bash +git add docs/paper/reductions.typ +git commit -m "docs: add LCS problem definition and LCS->MaxIS reduction in paper" +``` + +--- + +### Task 6: Final verification + +**Step 1: Run full checks** + +```bash +make check # fmt + clippy + test +make coverage # Must be >95% +``` + +**Step 2: Run review-implementation skill** + +Invoke `/review-implementation` to verify all structural and semantic checks pass. From a9e9a479a2b1a6f2fe8f6df1ded20b5c7b86e100 Mon Sep 17 00:00:00 2001 From: zazabap Date: Wed, 4 Mar 2026 20:10:21 +0000 Subject: [PATCH 2/7] feat: add LongestCommonSubsequence model Co-Authored-By: Claude Opus 4.6 --- problemreductions-cli/src/commands/graph.rs | 1 - problemreductions-cli/src/dispatch.rs | 4 +- src/lib.rs | 2 +- src/models/misc/longest_common_subsequence.rs | 167 ++++++++++++++++++ src/models/misc/mod.rs | 3 + src/models/mod.rs | 2 +- .../models/misc/longest_common_subsequence.rs | 157 ++++++++++++++++ 7 files changed, 332 insertions(+), 4 deletions(-) create mode 100644 src/models/misc/longest_common_subsequence.rs create mode 100644 src/unit_tests/models/misc/longest_common_subsequence.rs diff --git a/problemreductions-cli/src/commands/graph.rs b/problemreductions-cli/src/commands/graph.rs index 05bc9f28..f992e888 100644 --- a/problemreductions-cli/src/commands/graph.rs +++ b/problemreductions-cli/src/commands/graph.rs @@ -259,7 +259,6 @@ pub(crate) fn variant_to_full_slash(variant: &BTreeMap) -> Strin } } - /// Format a problem node as **bold name/variant** in slash notation. /// This is the single source of truth for "name/variant" display. fn fmt_node(_graph: &ReductionGraph, name: &str, variant: &BTreeMap) -> String { diff --git a/problemreductions-cli/src/dispatch.rs b/problemreductions-cli/src/dispatch.rs index a6b0011f..e2d6a3d6 100644 --- a/problemreductions-cli/src/dispatch.rs +++ b/problemreductions-cli/src/dispatch.rs @@ -1,6 +1,6 @@ use anyhow::{bail, Context, Result}; use problemreductions::models::algebraic::{ClosestVectorProblem, ILP}; -use problemreductions::models::misc::BinPacking; +use problemreductions::models::misc::{BinPacking, LongestCommonSubsequence}; use problemreductions::prelude::*; use problemreductions::rules::{MinimizeSteps, ReductionGraph}; use problemreductions::solvers::{BruteForce, ILPSolver, Solver}; @@ -244,6 +244,7 @@ pub fn load_problem( Some("f64") => deser_opt::>(data), _ => deser_opt::>(data), }, + "LongestCommonSubsequence" => deser_opt::(data), _ => bail!("{}", crate::problem_name::unknown_problem_error(&canonical)), } } @@ -303,6 +304,7 @@ pub fn serialize_any_problem( Some("f64") => try_ser::>(any), _ => try_ser::>(any), }, + "LongestCommonSubsequence" => try_ser::(any), _ => bail!("{}", crate::problem_name::unknown_problem_error(&canonical)), } } diff --git a/src/lib.rs b/src/lib.rs index ef67ab53..43123ab7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -43,7 +43,7 @@ pub mod prelude { KColoring, MaxCut, MaximalIS, MaximumClique, MaximumIndependentSet, MaximumMatching, MinimumDominatingSet, MinimumVertexCover, TravelingSalesman, }; - pub use crate::models::misc::{BinPacking, Factoring, PaintShop}; + pub use crate::models::misc::{BinPacking, Factoring, LongestCommonSubsequence, PaintShop}; pub use crate::models::set::{MaximumSetPacking, MinimumSetCovering}; // Core traits diff --git a/src/models/misc/longest_common_subsequence.rs b/src/models/misc/longest_common_subsequence.rs new file mode 100644 index 00000000..af84bc40 --- /dev/null +++ b/src/models/misc/longest_common_subsequence.rs @@ -0,0 +1,167 @@ +//! Longest Common Subsequence problem implementation. +//! +//! Given k strings, find the longest common subsequence shared by all strings. + +use crate::registry::{FieldInfo, ProblemSchemaEntry}; +use crate::traits::{OptimizationProblem, Problem}; +use crate::types::{Direction, SolutionSize}; +use serde::{Deserialize, Serialize}; + +inventory::submit! { + ProblemSchemaEntry { + name: "LongestCommonSubsequence", + module_path: module_path!(), + description: "Find the longest common subsequence of k strings", + fields: &[ + FieldInfo { name: "strings", type_name: "Vec>", description: "The input strings (at least 2)" }, + ], + } +} + +/// The Longest Common Subsequence problem. +/// +/// Given `k >= 2` strings over alphabet `u8`, find the longest +/// subsequence that appears in all strings. +/// +/// # Representation +/// +/// Variables correspond to positions in the shortest string. +/// Each variable is binary (0 = exclude, 1 = include). +/// A configuration selects a subsequence of the shortest string; +/// it is valid if that subsequence is also a subsequence of every other string. +/// +/// # Example +/// +/// ``` +/// use problemreductions::models::misc::LongestCommonSubsequence; +/// use problemreductions::{Problem, Solver, BruteForce}; +/// +/// let problem = LongestCommonSubsequence::new(vec![ +/// vec![b'A', b'B', b'A', b'C'], +/// vec![b'B', b'A', b'C', b'A'], +/// ]); +/// let solver = BruteForce::new(); +/// let solution = solver.find_best(&problem); +/// assert!(solution.is_some()); +/// ``` +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct LongestCommonSubsequence { + /// The input strings. + strings: Vec>, +} + +impl LongestCommonSubsequence { + /// Create a new LCS problem from a collection of strings. + /// + /// # Panics + /// + /// Panics if fewer than 2 strings are provided. + pub fn new(strings: Vec>) -> Self { + assert!(strings.len() >= 2, "LCS requires at least 2 strings"); + Self { strings } + } + + /// Get the input strings. + pub fn strings(&self) -> &[Vec] { + &self.strings + } + + /// Get the number of input strings. + pub fn num_strings(&self) -> usize { + self.strings.len() + } + + /// Get the total length of all strings combined. + pub fn total_length(&self) -> usize { + self.strings.iter().map(|s| s.len()).sum() + } + + /// Get the index of the shortest string. + fn shortest_index(&self) -> usize { + self.strings + .iter() + .enumerate() + .min_by_key(|(_, s)| s.len()) + .map(|(i, _)| i) + .unwrap_or(0) + } + + /// Get the length of the shortest string. + fn shortest_len(&self) -> usize { + self.strings.iter().map(|s| s.len()).min().unwrap_or(0) + } +} + +/// Check if `subseq` is a subsequence of `string`. +fn is_subsequence(subseq: &[u8], string: &[u8]) -> bool { + let mut it = string.iter(); + for &ch in subseq { + if !it.any(|&c| c == ch) { + return false; + } + } + true +} + +impl Problem for LongestCommonSubsequence { + const NAME: &'static str = "LongestCommonSubsequence"; + type Metric = SolutionSize; + + fn variant() -> Vec<(&'static str, &'static str)> { + crate::variant_params![] + } + + fn dims(&self) -> Vec { + vec![2; self.shortest_len()] + } + + fn evaluate(&self, config: &[usize]) -> SolutionSize { + let shortest_len = self.shortest_len(); + if config.len() != shortest_len { + return SolutionSize::Invalid; + } + // Check all values are binary + if config.iter().any(|&v| v >= 2) { + return SolutionSize::Invalid; + } + + let si = self.shortest_index(); + let shortest = &self.strings[si]; + + // Build the candidate subsequence from selected positions + let subseq: Vec = config + .iter() + .enumerate() + .filter(|(_, &v)| v == 1) + .map(|(i, _)| shortest[i]) + .collect(); + + // Check if it is a subsequence of every other string + for (i, s) in self.strings.iter().enumerate() { + if i == si { + continue; + } + if !is_subsequence(&subseq, s) { + return SolutionSize::Invalid; + } + } + + SolutionSize::Valid(subseq.len()) + } +} + +impl OptimizationProblem for LongestCommonSubsequence { + type Value = usize; + + fn direction(&self) -> Direction { + Direction::Maximize + } +} + +crate::declare_variants! { + LongestCommonSubsequence => "2^total_length", +} + +#[cfg(test)] +#[path = "../../unit_tests/models/misc/longest_common_subsequence.rs"] +mod tests; diff --git a/src/models/misc/mod.rs b/src/models/misc/mod.rs index 6e2fa084..41d49938 100644 --- a/src/models/misc/mod.rs +++ b/src/models/misc/mod.rs @@ -3,12 +3,15 @@ //! Problems with unique input structures that don't fit other categories: //! - [`BinPacking`]: Bin Packing (minimize bins) //! - [`Factoring`]: Integer factorization +//! - [`LongestCommonSubsequence`]: Longest Common Subsequence //! - [`PaintShop`]: Minimize color switches in paint shop scheduling mod bin_packing; pub(crate) mod factoring; +mod longest_common_subsequence; pub(crate) mod paintshop; pub use bin_packing::BinPacking; pub use factoring::Factoring; +pub use longest_common_subsequence::LongestCommonSubsequence; pub use paintshop::PaintShop; diff --git a/src/models/mod.rs b/src/models/mod.rs index 15df5cfa..44cfb646 100644 --- a/src/models/mod.rs +++ b/src/models/mod.rs @@ -15,5 +15,5 @@ pub use graph::{ BicliqueCover, KColoring, MaxCut, MaximalIS, MaximumClique, MaximumIndependentSet, MaximumMatching, MinimumDominatingSet, MinimumVertexCover, SpinGlass, TravelingSalesman, }; -pub use misc::{BinPacking, Factoring, PaintShop}; +pub use misc::{BinPacking, Factoring, LongestCommonSubsequence, PaintShop}; pub use set::{MaximumSetPacking, MinimumSetCovering}; diff --git a/src/unit_tests/models/misc/longest_common_subsequence.rs b/src/unit_tests/models/misc/longest_common_subsequence.rs new file mode 100644 index 00000000..f66262cf --- /dev/null +++ b/src/unit_tests/models/misc/longest_common_subsequence.rs @@ -0,0 +1,157 @@ +use super::*; +use crate::solvers::{BruteForce, Solver}; +use crate::traits::{OptimizationProblem, Problem}; +use crate::types::Direction; + +#[test] +fn test_lcs_basic() { + let problem = + LongestCommonSubsequence::new(vec![vec![b'A', b'B', b'C'], vec![b'B', b'C', b'A']]); + assert_eq!(LongestCommonSubsequence::NAME, "LongestCommonSubsequence"); + assert_eq!(problem.num_strings(), 2); + assert_eq!(problem.total_length(), 6); + assert_eq!(problem.strings().len(), 2); + assert_eq!( + LongestCommonSubsequence::variant(), + Vec::<(&str, &str)>::new() + ); + assert_eq!(problem.direction(), Direction::Maximize); +} + +#[test] +fn test_lcs_dims() { + // Shortest string has length 3 + let problem = + LongestCommonSubsequence::new(vec![vec![b'A', b'B', b'C'], vec![b'B', b'C', b'A', b'D']]); + let dims = problem.dims(); + assert_eq!(dims.len(), 3); + assert!(dims.iter().all(|&d| d == 2)); +} + +#[test] +fn test_lcs_evaluate_valid_subsequence() { + // strings: "ABC", "BAC" + // Shortest is "ABC" (index 0, length 3) + // Select positions 1,2 -> "BC", which is a subsequence of "BAC" + let problem = + LongestCommonSubsequence::new(vec![vec![b'A', b'B', b'C'], vec![b'B', b'A', b'C']]); + let result = problem.evaluate(&[0, 1, 1]); + assert!(result.is_valid()); + assert_eq!(result.unwrap(), 2); +} + +#[test] +fn test_lcs_evaluate_invalid_subsequence() { + // strings: "ABC", "CBA" + // Select all of "ABC" -> "ABC" is NOT a subsequence of "CBA" + let problem = + LongestCommonSubsequence::new(vec![vec![b'A', b'B', b'C'], vec![b'C', b'B', b'A']]); + let result = problem.evaluate(&[1, 1, 1]); + assert!(!result.is_valid()); +} + +#[test] +fn test_lcs_evaluate_empty() { + // Select nothing -> empty subsequence is always valid + let problem = + LongestCommonSubsequence::new(vec![vec![b'A', b'B', b'C'], vec![b'X', b'Y', b'Z']]); + let result = problem.evaluate(&[0, 0, 0]); + assert!(result.is_valid()); + assert_eq!(result.unwrap(), 0); +} + +#[test] +fn test_lcs_brute_force() { + // "ABAC" and "BACA" + // LCS length should be 3 (e.g., "BAC" or "AAC" or "ACA") + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'B', b'A', b'C'], + vec![b'B', b'A', b'C', b'A'], + ]); + let solver = BruteForce::new(); + let solution = solver.find_best(&problem).expect("should find a solution"); + let metric = problem.evaluate(&solution); + assert!(metric.is_valid()); + assert_eq!(metric.unwrap(), 3); +} + +#[test] +fn test_lcs_three_strings() { + // "ABCDAB", "BDCABA", "BCADBA" + // The shortest is any of them (all length 6), let's use these. + // Known LCS of these three is "BCBA" (length 4) or similar. + // Actually let's verify: LCS of ABCDAB, BDCABA, BCADBA + // A common subsequence of length 4: "BDAB"? Let's check: + // ABCDAB: B(1) D(3) A(4) B(5) - yes + // BDCABA: B(0) D(1) A(3) B(4) - yes + // BCADBA: B(0) D(4) ... wait, we need positions in order. + // Let me just trust the brute force solver. + let problem = LongestCommonSubsequence::new(vec![ + vec![b'A', b'B', b'C', b'D', b'A', b'B'], + vec![b'B', b'D', b'C', b'A', b'B', b'A'], + vec![b'B', b'C', b'A', b'D', b'B', b'A'], + ]); + let solver = BruteForce::new(); + let solution = solver.find_best(&problem).expect("should find a solution"); + let metric = problem.evaluate(&solution); + assert!(metric.is_valid()); + // The LCS length should be at least 2 and at most 6 + let len = metric.unwrap(); + assert!(len >= 2, "LCS should be at least 2, got {}", len); + assert!(len <= 6, "LCS should be at most 6, got {}", len); +} + +#[test] +fn test_lcs_evaluate_wrong_config_length() { + let problem = + LongestCommonSubsequence::new(vec![vec![b'A', b'B', b'C'], vec![b'B', b'C', b'A']]); + assert!(!problem.evaluate(&[0, 1]).is_valid()); + assert!(!problem.evaluate(&[0, 1, 1, 0]).is_valid()); +} + +#[test] +fn test_lcs_evaluate_invalid_variable_value() { + let problem = + LongestCommonSubsequence::new(vec![vec![b'A', b'B', b'C'], vec![b'B', b'C', b'A']]); + // Value 2 is out of range for binary variables + assert!(!problem.evaluate(&[0, 2, 1]).is_valid()); +} + +#[test] +fn test_lcs_serialization() { + let problem = + LongestCommonSubsequence::new(vec![vec![b'A', b'B', b'C'], vec![b'B', b'C', b'A']]); + let json = serde_json::to_value(&problem).unwrap(); + let restored: LongestCommonSubsequence = serde_json::from_value(json).unwrap(); + assert_eq!(restored.strings(), problem.strings()); + assert_eq!(restored.num_strings(), problem.num_strings()); +} + +#[test] +fn test_lcs_identical_strings() { + // Two identical strings: LCS = full string + let problem = + LongestCommonSubsequence::new(vec![vec![b'A', b'B', b'C'], vec![b'A', b'B', b'C']]); + let solver = BruteForce::new(); + let solution = solver.find_best(&problem).expect("should find a solution"); + let metric = problem.evaluate(&solution); + assert!(metric.is_valid()); + assert_eq!(metric.unwrap(), 3); +} + +#[test] +fn test_lcs_no_common_chars() { + // No common characters: LCS = 0 + let problem = LongestCommonSubsequence::new(vec![vec![b'A', b'B'], vec![b'X', b'Y']]); + let solver = BruteForce::new(); + let solution = solver.find_best(&problem).expect("should find a solution"); + let metric = problem.evaluate(&solution); + assert!(metric.is_valid()); + assert_eq!(metric.unwrap(), 0); +} + +#[test] +#[should_panic(expected = "LCS requires at least 2 strings")] +fn test_lcs_too_few_strings() { + LongestCommonSubsequence::new(vec![vec![b'A', b'B']]); +} From bda0692930723178b42c482e4ba5737bcd4e3a89 Mon Sep 17 00:00:00 2001 From: zazabap Date: Wed, 4 Mar 2026 20:14:52 +0000 Subject: [PATCH 3/7] fix: tighten LCS complexity string and three-string test assertion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Change complexity from 2^total_length to (total_length/num_strings)^num_strings reflecting the standard DP algorithm O(∏n_i) - Assert exact LCS length (4) in three-string test instead of loose range Co-Authored-By: Claude Opus 4.6 --- src/models/misc/longest_common_subsequence.rs | 2 +- .../models/misc/longest_common_subsequence.rs | 18 +++++------------- 2 files changed, 6 insertions(+), 14 deletions(-) diff --git a/src/models/misc/longest_common_subsequence.rs b/src/models/misc/longest_common_subsequence.rs index af84bc40..e994f362 100644 --- a/src/models/misc/longest_common_subsequence.rs +++ b/src/models/misc/longest_common_subsequence.rs @@ -159,7 +159,7 @@ impl OptimizationProblem for LongestCommonSubsequence { } crate::declare_variants! { - LongestCommonSubsequence => "2^total_length", + LongestCommonSubsequence => "(total_length / num_strings)^num_strings", } #[cfg(test)] diff --git a/src/unit_tests/models/misc/longest_common_subsequence.rs b/src/unit_tests/models/misc/longest_common_subsequence.rs index f66262cf..f64e93e3 100644 --- a/src/unit_tests/models/misc/longest_common_subsequence.rs +++ b/src/unit_tests/models/misc/longest_common_subsequence.rs @@ -78,14 +78,10 @@ fn test_lcs_brute_force() { #[test] fn test_lcs_three_strings() { // "ABCDAB", "BDCABA", "BCADBA" - // The shortest is any of them (all length 6), let's use these. - // Known LCS of these three is "BCBA" (length 4) or similar. - // Actually let's verify: LCS of ABCDAB, BDCABA, BCADBA - // A common subsequence of length 4: "BDAB"? Let's check: - // ABCDAB: B(1) D(3) A(4) B(5) - yes - // BDCABA: B(0) D(1) A(3) B(4) - yes - // BCADBA: B(0) D(4) ... wait, we need positions in order. - // Let me just trust the brute force solver. + // Known LCS = "BCAB" (length 4), verified: + // ABCDAB: B(1) C(2) A(4) B(5) ✓ + // BDCABA: B(0) C(2) A(3) B(4) ✓ + // BCADBA: B(0) C(1) A(2) B(4) ✓ let problem = LongestCommonSubsequence::new(vec![ vec![b'A', b'B', b'C', b'D', b'A', b'B'], vec![b'B', b'D', b'C', b'A', b'B', b'A'], @@ -94,11 +90,7 @@ fn test_lcs_three_strings() { let solver = BruteForce::new(); let solution = solver.find_best(&problem).expect("should find a solution"); let metric = problem.evaluate(&solution); - assert!(metric.is_valid()); - // The LCS length should be at least 2 and at most 6 - let len = metric.unwrap(); - assert!(len >= 2, "LCS should be at least 2, got {}", len); - assert!(len <= 6, "LCS should be at most 6, got {}", len); + assert_eq!(metric, SolutionSize::Valid(4)); } #[test] From 1acbdf1726644bbe3e1a53eebfddae550270f797 Mon Sep 17 00:00:00 2001 From: zazabap Date: Wed, 4 Mar 2026 20:17:54 +0000 Subject: [PATCH 4/7] feat: add LCS to MaximumIndependentSet reduction rule Co-Authored-By: Claude Opus 4.6 --- ...commonsubsequence_maximumindependentset.rs | 163 ++++++++++++++++++ src/rules/mod.rs | 1 + ...commonsubsequence_maximumindependentset.rs | 147 ++++++++++++++++ 3 files changed, 311 insertions(+) create mode 100644 src/rules/longestcommonsubsequence_maximumindependentset.rs create mode 100644 src/unit_tests/rules/longestcommonsubsequence_maximumindependentset.rs diff --git a/src/rules/longestcommonsubsequence_maximumindependentset.rs b/src/rules/longestcommonsubsequence_maximumindependentset.rs new file mode 100644 index 00000000..b15e2e5a --- /dev/null +++ b/src/rules/longestcommonsubsequence_maximumindependentset.rs @@ -0,0 +1,163 @@ +//! Reduction from Longest Common Subsequence (LCS) to MaximumIndependentSet. +//! +//! For k strings, we create a vertex for each k-tuple of positions where all +//! strings share the same character. Two vertices are connected by an edge if +//! their position tuples conflict — i.e., they cannot both appear in a common +//! subsequence because the relative ordering is inconsistent across strings. +//! +//! The maximum independent set in this conflict graph corresponds to the longest +//! common subsequence. + +use crate::models::graph::MaximumIndependentSet; +use crate::models::misc::LongestCommonSubsequence; +use crate::reduction; +use crate::rules::traits::{ReduceTo, ReductionResult}; +use crate::topology::SimpleGraph; +use crate::types::One; + +/// Check whether two position tuples conflict. +/// +/// Two tuples `a` and `b` conflict if they cannot both belong to a common +/// subsequence. This happens when it is NOT the case that all components of `a` +/// are strictly less than the corresponding components of `b`, AND it is NOT the +/// case that all components of `b` are strictly less than the corresponding +/// components of `a`. +pub(crate) fn tuples_conflict(a: &[usize], b: &[usize]) -> bool { + let all_a_lt_b = a.iter().zip(b.iter()).all(|(&ai, &bi)| ai < bi); + let all_b_lt_a = b.iter().zip(a.iter()).all(|(&bi, &ai)| bi < ai); + !all_a_lt_b && !all_b_lt_a +} + +/// Result of reducing LCS to MaximumIndependentSet. +#[derive(Debug, Clone)] +pub struct ReductionLCSToIS { + /// The target MaximumIndependentSet problem. + target: MaximumIndependentSet, + /// Position tuples for each vertex in the IS graph. + /// `nodes[v]` is a k-tuple of positions, one per input string. + nodes: Vec>, + /// Length of the shortest input string (= number of source variables). + num_source_variables: usize, + /// Index of the shortest input string. + shortest_index: usize, +} + +impl ReductionResult for ReductionLCSToIS { + type Source = LongestCommonSubsequence; + type Target = MaximumIndependentSet; + + fn target_problem(&self) -> &Self::Target { + &self.target + } + + /// Extract an LCS solution from an IS solution. + /// + /// For each selected vertex, look up its position in the shortest string + /// and set that bit to 1 in the output configuration. + fn extract_solution(&self, target_solution: &[usize]) -> Vec { + let mut config = vec![0usize; self.num_source_variables]; + for (vertex_idx, &selected) in target_solution.iter().enumerate() { + if selected == 1 { + let pos_in_shortest = self.nodes[vertex_idx][self.shortest_index]; + config[pos_in_shortest] = 1; + } + } + config + } +} + +impl ReductionLCSToIS { + /// Get a reference to the position-tuple nodes. + pub fn nodes(&self) -> &[Vec] { + &self.nodes + } +} + +#[reduction( + overhead = { + num_vertices = "total_length^num_strings", + num_edges = "total_length^(2 * num_strings)", + } +)] +impl ReduceTo> for LongestCommonSubsequence { + type Result = ReductionLCSToIS; + + fn reduce_to(&self) -> Self::Result { + let strings = self.strings(); + let k = strings.len(); + + // Find the shortest string index + let shortest_index = strings + .iter() + .enumerate() + .min_by_key(|(_, s)| s.len()) + .map(|(i, _)| i) + .unwrap_or(0); + let num_source_variables = strings[shortest_index].len(); + + // Collect character positions per string: char -> list of positions + let mut char_positions: Vec>> = + Vec::with_capacity(k); + for s in strings { + let mut map = std::collections::HashMap::new(); + for (pos, &ch) in s.iter().enumerate() { + map.entry(ch).or_insert_with(Vec::new).push(pos); + } + char_positions.push(map); + } + + // Find characters common to all strings + let common_chars: Vec = char_positions[0] + .keys() + .copied() + .filter(|ch| char_positions.iter().all(|cp| cp.contains_key(ch))) + .collect(); + + // Generate match nodes: for each common character, take the Cartesian + // product of positions across all strings + let mut nodes: Vec> = Vec::new(); + for ch in &common_chars { + let position_lists: Vec<&Vec> = + char_positions.iter().map(|cp| &cp[ch]).collect(); + // Cartesian product of position_lists + let mut tuples: Vec> = vec![vec![]]; + for positions in &position_lists { + let mut new_tuples = Vec::new(); + for tuple in &tuples { + for &pos in *positions { + let mut t = tuple.clone(); + t.push(pos); + new_tuples.push(t); + } + } + tuples = new_tuples; + } + nodes.extend(tuples); + } + + let n = nodes.len(); + + // Build conflict edges + let mut edges: Vec<(usize, usize)> = Vec::new(); + for i in 0..n { + for j in (i + 1)..n { + if tuples_conflict(&nodes[i], &nodes[j]) { + edges.push((i, j)); + } + } + } + + let target = MaximumIndependentSet::new(SimpleGraph::new(n, edges), vec![One; n]); + + ReductionLCSToIS { + target, + nodes, + num_source_variables, + shortest_index, + } + } +} + +#[cfg(test)] +#[path = "../unit_tests/rules/longestcommonsubsequence_maximumindependentset.rs"] +mod tests; diff --git a/src/rules/mod.rs b/src/rules/mod.rs index 765e3e8c..d25fb322 100644 --- a/src/rules/mod.rs +++ b/src/rules/mod.rs @@ -12,6 +12,7 @@ mod graph; mod kcoloring_casts; mod ksatisfiability_casts; mod ksatisfiability_qubo; +mod longestcommonsubsequence_maximumindependentset; mod maximumindependentset_casts; mod maximumindependentset_gridgraph; mod maximumindependentset_maximumsetpacking; diff --git a/src/unit_tests/rules/longestcommonsubsequence_maximumindependentset.rs b/src/unit_tests/rules/longestcommonsubsequence_maximumindependentset.rs new file mode 100644 index 00000000..a86f4046 --- /dev/null +++ b/src/unit_tests/rules/longestcommonsubsequence_maximumindependentset.rs @@ -0,0 +1,147 @@ +use super::*; +use crate::solvers::BruteForce; +use crate::topology::Graph; +use crate::traits::Problem; + +#[test] +fn test_lcs_to_maximumindependentset_closed_loop() { + // ABAC / BACA -> LCS length = 3 (e.g., "BAC" or "AAC" or "ACA") + let lcs = LongestCommonSubsequence::new(vec![ + vec![b'A', b'B', b'A', b'C'], + vec![b'B', b'A', b'C', b'A'], + ]); + let reduction = ReduceTo::>::reduce_to(&lcs); + let is_problem = reduction.target_problem(); + + // Solve the IS problem + let solver = BruteForce::new(); + let best_solutions = solver.find_all_best(is_problem); + assert!(!best_solutions.is_empty()); + + // IS size should be 3 (= LCS length) + for sol in &best_solutions { + assert_eq!(sol.iter().sum::(), 3); + } + + // Extract and verify each solution is valid for the original LCS problem + for sol in &best_solutions { + let lcs_config = reduction.extract_solution(sol); + let metric = lcs.evaluate(&lcs_config); + match metric { + crate::types::SolutionSize::Valid(len) => assert_eq!(len, 3), + crate::types::SolutionSize::Invalid => panic!("Extracted solution is invalid"), + } + } +} + +#[test] +fn test_lcs_to_is_graph_structure() { + // ABAC / BACA + // Common chars: A (positions [0,2] and [1,3]), B ([1] and [0]), C ([3] and [2]) + // Match nodes for A: (0,1),(0,3),(2,1),(2,3) = 4 nodes + // Match nodes for B: (1,0) = 1 node + // Match nodes for C: (3,2) = 1 node + // Total: 6 nodes + let lcs = LongestCommonSubsequence::new(vec![ + vec![b'A', b'B', b'A', b'C'], + vec![b'B', b'A', b'C', b'A'], + ]); + let reduction = ReduceTo::>::reduce_to(&lcs); + let is_problem = reduction.target_problem(); + + assert_eq!(is_problem.graph().num_vertices(), 6); + assert_eq!(is_problem.graph().num_edges(), 9); +} + +#[test] +fn test_lcs_to_is_three_strings() { + // Three strings with LCS = "ABCD" (length 4) + // s1 = XABCDY, s2 = ABCDZ, s3 = WABCD + let lcs = LongestCommonSubsequence::new(vec![ + vec![b'X', b'A', b'B', b'C', b'D', b'Y'], + vec![b'A', b'B', b'C', b'D', b'Z'], + vec![b'W', b'A', b'B', b'C', b'D'], + ]); + let reduction = ReduceTo::>::reduce_to(&lcs); + let is_problem = reduction.target_problem(); + + let solver = BruteForce::new(); + let best_solutions = solver.find_all_best(is_problem); + assert!(!best_solutions.is_empty()); + + // IS size should be 4 (= LCS length) + for sol in &best_solutions { + assert_eq!(sol.iter().sum::(), 4); + } + + // Verify extracted solutions + for sol in &best_solutions { + let lcs_config = reduction.extract_solution(sol); + let metric = lcs.evaluate(&lcs_config); + match metric { + crate::types::SolutionSize::Valid(len) => assert_eq!(len, 4), + crate::types::SolutionSize::Invalid => panic!("Extracted solution is invalid"), + } + } +} + +#[test] +fn test_lcs_to_is_no_common_chars() { + // No common characters -> empty graph + let lcs = LongestCommonSubsequence::new(vec![vec![b'A', b'B'], vec![b'C', b'D']]); + let reduction = ReduceTo::>::reduce_to(&lcs); + let is_problem = reduction.target_problem(); + + assert_eq!(is_problem.graph().num_vertices(), 0); + assert_eq!(is_problem.graph().num_edges(), 0); +} + +#[test] +fn test_lcs_to_is_identical_strings() { + // Identical strings -> LCS = full string + let lcs = LongestCommonSubsequence::new(vec![vec![b'A', b'B', b'C'], vec![b'A', b'B', b'C']]); + let reduction = ReduceTo::>::reduce_to(&lcs); + let is_problem = reduction.target_problem(); + + let solver = BruteForce::new(); + let best_solutions = solver.find_all_best(is_problem); + assert!(!best_solutions.is_empty()); + + // IS size should be 3 (= full string length) + for sol in &best_solutions { + assert_eq!(sol.iter().sum::(), 3); + } + + for sol in &best_solutions { + let lcs_config = reduction.extract_solution(sol); + let metric = lcs.evaluate(&lcs_config); + match metric { + crate::types::SolutionSize::Valid(len) => assert_eq!(len, 3), + crate::types::SolutionSize::Invalid => panic!("Extracted solution is invalid"), + } + } +} + +#[test] +fn test_tuples_conflict_function() { + // Same position in one dimension -> conflict + assert!(tuples_conflict(&[0, 1], &[0, 2])); + assert!(tuples_conflict(&[1, 0], &[1, 2])); + + // Crossing: a_0 < b_0 but a_1 > b_1 -> conflict + assert!(tuples_conflict(&[0, 2], &[1, 0])); + + // Consistent ordering: all a_i < b_i -> no conflict + assert!(!tuples_conflict(&[0, 0], &[1, 1])); + assert!(!tuples_conflict(&[0, 1], &[2, 3])); + + // Reverse consistent ordering: all b_i < a_i -> no conflict + assert!(!tuples_conflict(&[2, 3], &[0, 1])); + + // Three dimensions + assert!(!tuples_conflict(&[0, 0, 0], &[1, 1, 1])); + assert!(tuples_conflict(&[0, 1, 0], &[1, 0, 1])); + + // Equal tuples -> conflict (a_i = b_i for all i) + assert!(tuples_conflict(&[1, 2], &[1, 2])); +} From 8cb71521e0f22937018ed475fe0acd97ceb24f29 Mon Sep 17 00:00:00 2001 From: zazabap Date: Wed, 4 Mar 2026 20:22:09 +0000 Subject: [PATCH 5/7] feat: add LCS to MaxIS example program Co-Authored-By: Claude Opus 4.6 --- ...monsubsequence_to_maximumindependentset.rs | 162 ++++++++++++++++++ tests/suites/examples.rs | 5 + 2 files changed, 167 insertions(+) create mode 100644 examples/reduction_longestcommonsubsequence_to_maximumindependentset.rs diff --git a/examples/reduction_longestcommonsubsequence_to_maximumindependentset.rs b/examples/reduction_longestcommonsubsequence_to_maximumindependentset.rs new file mode 100644 index 00000000..74303cd3 --- /dev/null +++ b/examples/reduction_longestcommonsubsequence_to_maximumindependentset.rs @@ -0,0 +1,162 @@ +// # Longest Common Subsequence to Maximum Independent Set Reduction +// +// ## Mathematical Equivalence +// For k strings, create a vertex for each k-tuple of positions where all +// strings share the same character. Two vertices are connected by an edge if +// their position tuples conflict (ordering is inconsistent across strings). +// The maximum independent set in this conflict graph corresponds to the +// longest common subsequence. +// +// ## This Example +// - Instance: two strings ABAC and BACA +// - Matching positions form vertices in the conflict graph +// - Source LCS: longest common subsequence has length 3 (e.g., "BAC" or "ACA") +// - Target MaximumIndependentSet: conflict graph with vertices and edges +// +// ## Output +// Exports `docs/paper/examples/lcs_to_maximumindependentset.json` and +// `lcs_to_maximumindependentset.result.json`. +// +// See docs/paper/reductions.typ for the full reduction specification. + +use problemreductions::export::*; +use problemreductions::prelude::*; +use problemreductions::topology::{Graph, SimpleGraph}; + +pub fn run() { + println!("\n=== Longest Common Subsequence -> Maximum Independent Set Reduction ===\n"); + + // 1. Create LCS instance: ABAC and BACA + let strings = vec![ + vec![b'A', b'B', b'A', b'C'], + vec![b'B', b'A', b'C', b'A'], + ]; + let lcs = LongestCommonSubsequence::new(strings.clone()); + + println!("Source: LongestCommonSubsequence"); + for (i, s) in strings.iter().enumerate() { + println!( + " String {}: {}", + i, + std::str::from_utf8(s).unwrap_or("?") + ); + } + println!(" num_strings: {}", lcs.num_strings()); + println!(" total_length: {}", lcs.total_length()); + + // 2. Reduce to MaximumIndependentSet + let reduction = ReduceTo::>::reduce_to(&lcs); + let target = reduction.target_problem(); + + println!("\nTarget: MaximumIndependentSet"); + println!(" Vertices: {}", target.graph().num_vertices()); + println!( + " Edges: {} {:?}", + target.graph().num_edges(), + target.graph().edges() + ); + + // 3. Solve the target problem + let solver = BruteForce::new(); + let target_solutions = solver.find_all_best(target); + + println!("\nBest target solutions: {}", target_solutions.len()); + + // 4. Extract and verify each solution + let mut solutions = Vec::new(); + for (i, target_sol) in target_solutions.iter().enumerate() { + let source_sol = reduction.extract_solution(target_sol); + let source_size = lcs.evaluate(&source_sol); + let target_size = target.evaluate(target_sol); + + println!( + " Solution {}: target={:?} (size={:?}), source={:?} (size={:?}, valid={})", + i, + target_sol, + target_size, + source_sol, + source_size, + source_size.is_valid() + ); + + assert!( + source_size.is_valid(), + "Extracted source solution must be valid" + ); + + solutions.push(SolutionPair { + source_config: source_sol, + target_config: target_sol.clone(), + }); + } + + // 5. Verify the optimal value + let target_sol = &target_solutions[0]; + let source_sol = reduction.extract_solution(target_sol); + let source_size = lcs.evaluate(&source_sol); + let target_size = target.evaluate(target_sol); + + println!( + "\nOptimal: source LCS length={:?}, target IS size={:?}", + source_size, target_size + ); + + assert!( + source_size.is_valid(), + "Source solution must be valid for optimal" + ); + assert!( + target_size.is_valid(), + "Target solution must be valid for optimal" + ); + + // 6. Export JSON + let source_variant = variant_to_map(LongestCommonSubsequence::variant()); + let target_variant = variant_to_map(MaximumIndependentSet::::variant()); + let overhead = lookup_overhead( + "LongestCommonSubsequence", + &source_variant, + "MaximumIndependentSet", + &target_variant, + ) + .expect("LCS -> MaxIS overhead not found"); + + let data = ReductionData { + source: ProblemSide { + problem: LongestCommonSubsequence::NAME.to_string(), + variant: source_variant, + instance: serde_json::json!({ + "num_strings": lcs.num_strings(), + "total_length": lcs.total_length(), + "strings": lcs.strings().iter().map(|s| + std::str::from_utf8(s).unwrap_or("?").to_string() + ).collect::>(), + }), + }, + target: ProblemSide { + problem: MaximumIndependentSet::::NAME.to_string(), + variant: target_variant, + instance: serde_json::json!({ + "num_vertices": target.graph().num_vertices(), + "num_edges": target.graph().num_edges(), + }), + }, + overhead: overhead_to_json(&overhead), + }; + + let results = ResultData { solutions }; + let name = "lcs_to_maximumindependentset"; + write_example(name, &data, &results); + + println!( + "\nDone: LCS({} strings, total_length={}) maps to IS({} vertices, {} edges)", + lcs.num_strings(), + lcs.total_length(), + target.graph().num_vertices(), + target.graph().num_edges() + ); +} + +fn main() { + run() +} diff --git a/tests/suites/examples.rs b/tests/suites/examples.rs index 3c9ad803..00040d80 100644 --- a/tests/suites/examples.rs +++ b/tests/suites/examples.rs @@ -21,6 +21,7 @@ example_test!(reduction_kcoloring_to_ilp); example_test!(reduction_kcoloring_to_qubo); example_test!(reduction_ksatisfiability_to_qubo); example_test!(reduction_ksatisfiability_to_satisfiability); +example_test!(reduction_longestcommonsubsequence_to_maximumindependentset); example_test!(reduction_maxcut_to_spinglass); example_test!(reduction_maximumclique_to_ilp); example_test!(reduction_maximumindependentset_to_ilp); @@ -87,6 +88,10 @@ example_fn!( test_ksatisfiability_to_satisfiability, reduction_ksatisfiability_to_satisfiability ); +example_fn!( + test_longestcommonsubsequence_to_maximumindependentset, + reduction_longestcommonsubsequence_to_maximumindependentset +); example_fn!(test_maxcut_to_spinglass, reduction_maxcut_to_spinglass); example_fn!(test_maximumclique_to_ilp, reduction_maximumclique_to_ilp); example_fn!( From 695f61a54b9d7536e876919f79a19866d306522c Mon Sep 17 00:00:00 2001 From: zazabap Date: Wed, 4 Mar 2026 20:24:56 +0000 Subject: [PATCH 6/7] chore: regenerate exports and fix formatting after LCS->MaxIS rule Co-Authored-By: Claude Opus 4.6 --- docs/src/reductions/problem_schemas.json | 11 ++ docs/src/reductions/reduction_graph.json | 184 ++++++++++-------- ...monsubsequence_to_maximumindependentset.rs | 11 +- 3 files changed, 116 insertions(+), 90 deletions(-) diff --git a/docs/src/reductions/problem_schemas.json b/docs/src/reductions/problem_schemas.json index 8cc8d2ae..206ce735 100644 --- a/docs/src/reductions/problem_schemas.json +++ b/docs/src/reductions/problem_schemas.json @@ -183,6 +183,17 @@ } ] }, + { + "name": "LongestCommonSubsequence", + "description": "Find the longest common subsequence of k strings", + "fields": [ + { + "name": "strings", + "type_name": "Vec>", + "description": "The input strings (at least 2)" + } + ] + }, { "name": "MaxCut", "description": "Find maximum weight cut in a graph", diff --git a/docs/src/reductions/reduction_graph.json b/docs/src/reductions/reduction_graph.json index 0b0c6850..8790894a 100644 --- a/docs/src/reductions/reduction_graph.json +++ b/docs/src/reductions/reduction_graph.json @@ -148,6 +148,13 @@ "doc_path": "models/formula/struct.KSatisfiability.html", "complexity": "2^num_variables" }, + { + "name": "LongestCommonSubsequence", + "variant": {}, + "category": "misc", + "doc_path": "models/misc/struct.LongestCommonSubsequence.html", + "complexity": "(total_length / num_strings)^num_strings" + }, { "name": "MaxCut", "variant": { @@ -386,7 +393,7 @@ }, { "source": 4, - "target": 38, + "target": 39, "overhead": [ { "field": "num_spins", @@ -431,7 +438,7 @@ }, { "source": 8, - "target": 35, + "target": 36, "overhead": [ { "field": "num_vars", @@ -472,7 +479,7 @@ }, { "source": 13, - "target": 35, + "target": 36, "overhead": [ { "field": "num_vars", @@ -498,7 +505,7 @@ }, { "source": 14, - "target": 35, + "target": 36, "overhead": [ { "field": "num_vars", @@ -509,7 +516,7 @@ }, { "source": 14, - "target": 36, + "target": 37, "overhead": [ { "field": "num_clauses", @@ -543,7 +550,7 @@ }, { "source": 15, - "target": 35, + "target": 36, "overhead": [ { "field": "num_vars", @@ -554,7 +561,7 @@ }, { "source": 15, - "target": 36, + "target": 37, "overhead": [ { "field": "num_clauses", @@ -573,7 +580,7 @@ }, { "source": 16, - "target": 36, + "target": 37, "overhead": [ { "field": "num_clauses", @@ -592,7 +599,22 @@ }, { "source": 17, - "target": 38, + "target": 23, + "overhead": [ + { + "field": "num_vertices", + "formula": "total_length^num_strings" + }, + { + "field": "num_edges", + "formula": "total_length^2 * num_strings" + } + ], + "doc_path": "rules/longestcommonsubsequence_maximumindependentset/index.html" + }, + { + "source": 18, + "target": 39, "overhead": [ { "field": "num_spins", @@ -606,7 +628,7 @@ "doc_path": "rules/spinglass_maxcut/index.html" }, { - "source": 19, + "source": 20, "target": 8, "overhead": [ { @@ -621,8 +643,8 @@ "doc_path": "rules/maximumclique_ilp/index.html" }, { - "source": 20, - "target": 21, + "source": 21, + "target": 22, "overhead": [ { "field": "num_vertices", @@ -636,8 +658,8 @@ "doc_path": "rules/maximumindependentset_casts/index.html" }, { - "source": 20, - "target": 25, + "source": 21, + "target": 26, "overhead": [ { "field": "num_vertices", @@ -651,8 +673,8 @@ "doc_path": "rules/maximumindependentset_casts/index.html" }, { - "source": 21, - "target": 26, + "source": 22, + "target": 27, "overhead": [ { "field": "num_vertices", @@ -666,8 +688,8 @@ "doc_path": "rules/maximumindependentset_casts/index.html" }, { - "source": 22, - "target": 20, + "source": 23, + "target": 21, "overhead": [ { "field": "num_vertices", @@ -681,8 +703,8 @@ "doc_path": "rules/maximumindependentset_gridgraph/index.html" }, { - "source": 22, - "target": 21, + "source": 23, + "target": 22, "overhead": [ { "field": "num_vertices", @@ -696,8 +718,8 @@ "doc_path": "rules/maximumindependentset_gridgraph/index.html" }, { - "source": 22, - "target": 23, + "source": 23, + "target": 24, "overhead": [ { "field": "num_vertices", @@ -711,8 +733,8 @@ "doc_path": "rules/maximumindependentset_casts/index.html" }, { - "source": 22, - "target": 24, + "source": 23, + "target": 25, "overhead": [ { "field": "num_vertices", @@ -726,8 +748,8 @@ "doc_path": "rules/maximumindependentset_triangular/index.html" }, { - "source": 22, - "target": 28, + "source": 23, + "target": 29, "overhead": [ { "field": "num_sets", @@ -741,7 +763,7 @@ "doc_path": "rules/maximumindependentset_maximumsetpacking/index.html" }, { - "source": 23, + "source": 24, "target": 8, "overhead": [ { @@ -756,8 +778,8 @@ "doc_path": "rules/maximumindependentset_ilp/index.html" }, { - "source": 23, - "target": 30, + "source": 24, + "target": 31, "overhead": [ { "field": "num_sets", @@ -771,8 +793,8 @@ "doc_path": "rules/maximumindependentset_maximumsetpacking/index.html" }, { - "source": 23, - "target": 33, + "source": 24, + "target": 34, "overhead": [ { "field": "num_vertices", @@ -786,8 +808,8 @@ "doc_path": "rules/minimumvertexcover_maximumindependentset/index.html" }, { - "source": 23, - "target": 35, + "source": 24, + "target": 36, "overhead": [ { "field": "num_vars", @@ -797,8 +819,8 @@ "doc_path": "rules/maximumindependentset_qubo/index.html" }, { - "source": 24, - "target": 26, + "source": 25, + "target": 27, "overhead": [ { "field": "num_vertices", @@ -812,8 +834,8 @@ "doc_path": "rules/maximumindependentset_casts/index.html" }, { - "source": 25, - "target": 22, + "source": 26, + "target": 23, "overhead": [ { "field": "num_vertices", @@ -827,8 +849,8 @@ "doc_path": "rules/maximumindependentset_casts/index.html" }, { - "source": 25, - "target": 26, + "source": 26, + "target": 27, "overhead": [ { "field": "num_vertices", @@ -842,8 +864,8 @@ "doc_path": "rules/maximumindependentset_casts/index.html" }, { - "source": 26, - "target": 23, + "source": 27, + "target": 24, "overhead": [ { "field": "num_vertices", @@ -857,7 +879,7 @@ "doc_path": "rules/maximumindependentset_casts/index.html" }, { - "source": 27, + "source": 28, "target": 8, "overhead": [ { @@ -872,8 +894,8 @@ "doc_path": "rules/maximummatching_ilp/index.html" }, { - "source": 27, - "target": 30, + "source": 28, + "target": 31, "overhead": [ { "field": "num_sets", @@ -887,8 +909,8 @@ "doc_path": "rules/maximummatching_maximumsetpacking/index.html" }, { - "source": 28, - "target": 22, + "source": 29, + "target": 23, "overhead": [ { "field": "num_vertices", @@ -902,8 +924,8 @@ "doc_path": "rules/maximumindependentset_maximumsetpacking/index.html" }, { - "source": 28, - "target": 30, + "source": 29, + "target": 31, "overhead": [ { "field": "num_sets", @@ -917,8 +939,8 @@ "doc_path": "rules/maximumsetpacking_casts/index.html" }, { - "source": 29, - "target": 35, + "source": 30, + "target": 36, "overhead": [ { "field": "num_vars", @@ -928,7 +950,7 @@ "doc_path": "rules/maximumsetpacking_qubo/index.html" }, { - "source": 30, + "source": 31, "target": 8, "overhead": [ { @@ -943,8 +965,8 @@ "doc_path": "rules/maximumsetpacking_ilp/index.html" }, { - "source": 30, - "target": 23, + "source": 31, + "target": 24, "overhead": [ { "field": "num_vertices", @@ -958,8 +980,8 @@ "doc_path": "rules/maximumindependentset_maximumsetpacking/index.html" }, { - "source": 30, - "target": 29, + "source": 31, + "target": 30, "overhead": [ { "field": "num_sets", @@ -973,7 +995,7 @@ "doc_path": "rules/maximumsetpacking_casts/index.html" }, { - "source": 31, + "source": 32, "target": 8, "overhead": [ { @@ -988,7 +1010,7 @@ "doc_path": "rules/minimumdominatingset_ilp/index.html" }, { - "source": 32, + "source": 33, "target": 8, "overhead": [ { @@ -1003,7 +1025,7 @@ "doc_path": "rules/minimumsetcovering_ilp/index.html" }, { - "source": 33, + "source": 34, "target": 8, "overhead": [ { @@ -1018,8 +1040,8 @@ "doc_path": "rules/minimumvertexcover_ilp/index.html" }, { - "source": 33, - "target": 23, + "source": 34, + "target": 24, "overhead": [ { "field": "num_vertices", @@ -1033,8 +1055,8 @@ "doc_path": "rules/minimumvertexcover_maximumindependentset/index.html" }, { - "source": 33, - "target": 32, + "source": 34, + "target": 33, "overhead": [ { "field": "num_sets", @@ -1048,8 +1070,8 @@ "doc_path": "rules/minimumvertexcover_minimumsetcovering/index.html" }, { - "source": 33, - "target": 35, + "source": 34, + "target": 36, "overhead": [ { "field": "num_vars", @@ -1059,7 +1081,7 @@ "doc_path": "rules/minimumvertexcover_qubo/index.html" }, { - "source": 35, + "source": 36, "target": 8, "overhead": [ { @@ -1074,8 +1096,8 @@ "doc_path": "rules/qubo_ilp/index.html" }, { - "source": 35, - "target": 37, + "source": 36, + "target": 38, "overhead": [ { "field": "num_spins", @@ -1085,7 +1107,7 @@ "doc_path": "rules/spinglass_qubo/index.html" }, { - "source": 36, + "source": 37, "target": 4, "overhead": [ { @@ -1100,7 +1122,7 @@ "doc_path": "rules/sat_circuitsat/index.html" }, { - "source": 36, + "source": 37, "target": 10, "overhead": [ { @@ -1115,7 +1137,7 @@ "doc_path": "rules/sat_coloring/index.html" }, { - "source": 36, + "source": 37, "target": 15, "overhead": [ { @@ -1130,8 +1152,8 @@ "doc_path": "rules/sat_ksat/index.html" }, { - "source": 36, - "target": 22, + "source": 37, + "target": 23, "overhead": [ { "field": "num_vertices", @@ -1145,8 +1167,8 @@ "doc_path": "rules/sat_maximumindependentset/index.html" }, { - "source": 36, - "target": 31, + "source": 37, + "target": 32, "overhead": [ { "field": "num_vertices", @@ -1160,8 +1182,8 @@ "doc_path": "rules/sat_minimumdominatingset/index.html" }, { - "source": 37, - "target": 35, + "source": 38, + "target": 36, "overhead": [ { "field": "num_vars", @@ -1171,8 +1193,8 @@ "doc_path": "rules/spinglass_qubo/index.html" }, { - "source": 38, - "target": 17, + "source": 39, + "target": 18, "overhead": [ { "field": "num_vertices", @@ -1186,8 +1208,8 @@ "doc_path": "rules/spinglass_maxcut/index.html" }, { - "source": 38, - "target": 37, + "source": 39, + "target": 38, "overhead": [ { "field": "num_spins", @@ -1201,7 +1223,7 @@ "doc_path": "rules/spinglass_casts/index.html" }, { - "source": 39, + "source": 40, "target": 8, "overhead": [ { diff --git a/examples/reduction_longestcommonsubsequence_to_maximumindependentset.rs b/examples/reduction_longestcommonsubsequence_to_maximumindependentset.rs index 74303cd3..e34a895f 100644 --- a/examples/reduction_longestcommonsubsequence_to_maximumindependentset.rs +++ b/examples/reduction_longestcommonsubsequence_to_maximumindependentset.rs @@ -27,19 +27,12 @@ pub fn run() { println!("\n=== Longest Common Subsequence -> Maximum Independent Set Reduction ===\n"); // 1. Create LCS instance: ABAC and BACA - let strings = vec![ - vec![b'A', b'B', b'A', b'C'], - vec![b'B', b'A', b'C', b'A'], - ]; + let strings = vec![vec![b'A', b'B', b'A', b'C'], vec![b'B', b'A', b'C', b'A']]; let lcs = LongestCommonSubsequence::new(strings.clone()); println!("Source: LongestCommonSubsequence"); for (i, s) in strings.iter().enumerate() { - println!( - " String {}: {}", - i, - std::str::from_utf8(s).unwrap_or("?") - ); + println!(" String {}: {}", i, std::str::from_utf8(s).unwrap_or("?")); } println!(" num_strings: {}", lcs.num_strings()); println!(" total_length: {}", lcs.total_length()); From 7eac728ec36646c61f2f8bc0e4251043fca7f42c Mon Sep 17 00:00:00 2001 From: zazabap Date: Wed, 4 Mar 2026 20:31:10 +0000 Subject: [PATCH 7/7] =?UTF-8?q?docs:=20add=20LCS=20and=20LCS=E2=86=92MaxIS?= =?UTF-8?q?=20reduction=20in=20paper?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add problem-def for LongestCommonSubsequence and reduction-rule for LCS → MaximumIndependentSet with worked example and proof. Co-Authored-By: Claude Opus 4.6 --- docs/paper/reductions.typ | 81 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/docs/paper/reductions.typ b/docs/paper/reductions.typ index d119290f..28e401e2 100644 --- a/docs/paper/reductions.typ +++ b/docs/paper/reductions.typ @@ -51,6 +51,7 @@ "BicliqueCover": [Biclique Cover], "BinPacking": [Bin Packing], "ClosestVectorProblem": [Closest Vector Problem], + "LongestCommonSubsequence": [Longest Common Subsequence], ) // Definition label: "def:" — each definition block must have a matching label @@ -886,6 +887,45 @@ Biclique Cover is equivalent to factoring the biadjacency matrix $M$ of the bipa ) ] +#problem-def("LongestCommonSubsequence")[ + Given $k >= 2$ strings $s_1, dots, s_k$ over a finite alphabet $Sigma$, find a longest string $w$ that is a subsequence of every $s_i$. A string $w$ is a subsequence of $s$ if $w$ can be obtained by deleting zero or more characters from $s$ without changing the order of the remaining characters. +][ +The Longest Common Subsequence problem is one of Garey and Johnson's classical NP-hard problems (SR10) @garey1979 for $k >= 3$ strings. For $k = 2$, it is solvable in $O(n_1 n_2)$ time via dynamic programming @wagner1974. LCS is foundational to diff and version control (comparing text files), bioinformatics (DNA/protein sequence alignment), and data compression. Not approximable within $n^(1/4 - epsilon)$ for any $epsilon > 0$ @jiang1995. The best known exact algorithm for $k$ strings uses dynamic programming in $O(product_(i=1)^k n_i)$ time, which is polynomial for fixed $k$ but exponential in $k$ @maier1978. + +*Example.* Consider $k = 2$ strings over $Sigma = {A, B, C}$: $s_1 = mono("ABAC")$ and $s_2 = mono("BACA")$. The longest common subsequence is $w = mono("BAC")$ (length 3), verified as a subsequence of both: $s_1 = A bold("B") bold("A") bold("C")$ (positions 1, 2, 3) and $s_2 = bold("B") bold("A") bold("C") A$ (positions 0, 1, 2). + +#figure( + { + let s1 = "ABAC".clusters() + let s2 = "BACA".clusters() + let lcs-pos1 = (1, 2, 3) // positions of BAC in s1 + let lcs-pos2 = (0, 1, 2) // positions of BAC in s2 + let blue = graph-colors.at(0) + align(center, stack(dir: ttb, spacing: 0.4cm, + // s1 + stack(dir: ltr, spacing: 0pt, + box(width: 0.7cm, height: 0.45cm, align(center + horizon, text(8pt)[$s_1:$])), + ..s1.enumerate().map(((i, c)) => { + let fill = if i in lcs-pos1 { blue.transparentize(40%) } else { white } + box(width: 0.5cm, height: 0.45cm, fill: fill, stroke: 0.4pt + luma(180), + align(center + horizon, text(8pt, weight: if i in lcs-pos1 { "bold" } else { "regular" }, c))) + }), + ), + // s2 + stack(dir: ltr, spacing: 0pt, + box(width: 0.7cm, height: 0.45cm, align(center + horizon, text(8pt)[$s_2:$])), + ..s2.enumerate().map(((i, c)) => { + let fill = if i in lcs-pos2 { blue.transparentize(40%) } else { white } + box(width: 0.5cm, height: 0.45cm, fill: fill, stroke: 0.4pt + luma(180), + align(center + horizon, text(8pt, weight: if i in lcs-pos2 { "bold" } else { "regular" }, c))) + }), + ), + )) + }, + caption: [LCS of $s_1 = mono("ABAC")$ and $s_2 = mono("BACA")$: the common subsequence $mono("BAC")$ (highlighted) has length 3.], +) +] + // Completeness check: warn about problem types in JSON but missing from paper #{ let json-models = { @@ -1260,6 +1300,47 @@ where $P$ is a penalty weight large enough that any constraint violation costs m _Solution extraction._ For $v_(j,i) in S$ with literal $x_k$: set $x_k = 1$; for $overline(x_k)$: set $x_k = 0$. ] +#let lcs_mis = load-example("lcs_to_maximumindependentset") +#let lcs_mis_r = load-results("lcs_to_maximumindependentset") +#let lcs_mis_sol = lcs_mis_r.solutions.at(0) +#reduction-rule("LongestCommonSubsequence", "MaximumIndependentSet", + example: true, + example-caption: [LCS of $mono("ABAC")$ and $mono("BACA")$ to conflict graph], + extra: [ + *Step 1 --- Match nodes.* For each pair of positions $(p_1, p_2)$ with $s_1[p_1] = s_2[p_2]$, create a vertex. Characters A, B, C appear in both strings, yielding #lcs_mis.target.instance.num_vertices match nodes: + + #align(center, table( + columns: (auto, auto, auto, auto), + inset: 4pt, + align: center, + table.header([*Node*], [$s_1$ pos], [$s_2$ pos], [*Char*]), + [$v_0$], [0], [1], [A], + [$v_1$], [0], [3], [A], + [$v_2$], [2], [1], [A], + [$v_3$], [2], [3], [A], + [$v_4$], [1], [0], [B], + [$v_5$], [3], [2], [C], + )) + + *Step 2 --- Conflict edges.* Two nodes conflict if their position differences are inconsistent (crossing or collision). This yields #lcs_mis.target.instance.num_edges conflict edges. For example, $v_0 = (0, 1)$ and $v_4 = (1, 0)$ cross: $s_1$ goes forward ($0 < 1$) but $s_2$ goes backward ($1 > 0$). + + *Step 3 --- Verify solution.* Maximum IS: ${v_4, v_2, v_5}$ (size 3). These nodes have positions $(1, 0)$, $(2, 1)$, $(3, 2)$ --- all consistently ordered. The extracted common subsequence is $mono("BAC")$, matching the LCS. \ + *Count:* #lcs_mis_r.solutions.len() optimal solution(s) #sym.checkmark + ], +)[ + @apostolico1987 Two positions in different strings can contribute to a common subsequence only if they hold the same character and their relative ordering is consistent across all strings. The _match graph_ encodes these constraints: vertices represent character-matching position tuples, and edges forbid incompatible pairs. An independent set of compatible position tuples directly yields a common subsequence, so the LCS length equals the maximum independent set size. +][ + _Construction._ For $k$ strings $s_1, dots, s_k$ of lengths $n_1, dots, n_k$: + + _Vertices:_ For each $k$-tuple $(p_1, dots, p_k)$ with $s_1[p_1] = s_2[p_2] = dots = s_k[p_k]$, create a vertex. Total: $|V| <= product_(i=1)^k n_i$. + + _Edges:_ $(u, v) in E$ iff the tuples $u = (a_1, dots, a_k)$ and $v = (b_1, dots, b_k)$ _conflict_ --- i.e., it is NOT the case that $a_i < b_i$ for all $i$, AND NOT the case that $b_i < a_i$ for all $i$. This means either some positions cross ($a_i < b_i$ but $a_j > b_j$) or collide ($a_i = b_i$ for some $i$). + + _Correctness._ ($arrow.r.double$) A common subsequence of length $L$ selects $L$ position tuples that are pairwise consistently ordered --- they form an IS of size $L$. ($arrow.l.double$) An IS of size $L$ consists of $L$ pairwise non-conflicting tuples, which can be sorted into a consistent order, yielding a common subsequence of length $L$. + + _Solution extraction._ For each selected vertex, read the position in the shortest string; set the corresponding binary variable to 1. +] + #let sat_kc = load-example("satisfiability_to_kcoloring") #let sat_kc_r = load-results("satisfiability_to_kcoloring") #let sat_kc_sol = sat_kc_r.solutions.at(0)