diff --git a/.Rbuildignore b/.Rbuildignore index f2ea6e43d..1aa03d51f 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -7,6 +7,7 @@ ^src\- ^src\. CONTRIBUTING +AGENTS.md README.md ^\.github ^\.lintr @@ -34,3 +35,5 @@ revdep ^\.vs.*$ ^\.vscode.*$ ^CRAN-SUBMISSION$ +^\.positai$ +^\.claude$ diff --git a/.github/workflows/R-CMD-check.yml b/.github/workflows/R-CMD-check.yml index 4c5033a2d..67d431d60 100644 --- a/.github/workflows/R-CMD-check.yml +++ b/.github/workflows/R-CMD-check.yml @@ -227,3 +227,130 @@ jobs: with: deps: ${{ matrix.config.deps }} extra-packages: ms609/TreeDistData + + benchmark: + runs-on: ubuntu-latest + needs: check-release + if: github.event_name == 'pull_request' || github.event_name == 'workflow_dispatch' + name: benchmark + + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + RSPM: "https://packagemanager.posit.co/cran/__linux__/noble/latest" + _R_CHECK_BUILD_VIGNETTES_: false + _R_CHECK_CRAN_INCOMING_: false + _R_CHECK_FORCE_SUGGESTS_: false + R_REMOTES_STANDALONE: true + R_REMOTES_NO_ERRORS_FROM_WARNINGS: true + R_REALLY_FORCE_SYMBOLS: true + PKG_CXXFLAGS: "-O3" + + steps: + - name: Checkout PR branch + uses: actions/checkout@v5 + with: + path: pr + + - name: Checkout target branch + uses: actions/checkout@v5 + with: + ref: ${{ github.event.pull_request.base.ref || 'main' }} + path: main + + - name: Set up R + uses: r-lib/actions/setup-r@v2 + with: + extra-repositories: https://ms609.github.io/packages/ + + - name: Install R dependencies + uses: r-lib/actions/setup-r-dependencies@v2 + with: + dependencies: '"hard"' + extra-packages: | + github::ms609/TreeTools + needs: benchmark + + - name: Benchmark PR + uses: ms609/actions/benchmark-step@main + with: + path: pr + output: pr + + - name: Benchmark target + uses: ms609/actions/benchmark-step@main + with: + path: main + output: main + + - name: Benchmark PR again + uses: ms609/actions/benchmark-step@main + with: + path: pr + output: pr2 + + - run: dir pr-benchmark-results + working-directory: pr + - run: dir main-benchmark-results + working-directory: pr + + - name: Compare benchmarks + id: compare_results + working-directory: pr + run: | + Rscript benchmark/_compare_results.R + shell: bash + + - name: Upload PR benchmark results + if: failure() + uses: actions/upload-artifact@v4 + with: + name: pr-benchmark-results + path: pr/pr-benchmark-results/*.bench.Rds + + - name: Upload main benchmark results + if: failure() + uses: actions/upload-artifact@v4 + with: + name: main-benchmark-results + path: pr/main-benchmark-results/*.bench.Rds + + - name: Comment on PR + if: always() && github.event_name == 'pull_request' && steps.compare_results.outputs.report != '' + uses: actions/github-script@v7 + env: + BENCHMARK_MESSAGE: ${{ steps.compare_results.outputs.report }} + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + script: | + const benchmarkIdentifier = ''; + const outdatedPrefix = '> **⚠️ This benchmark result is outdated. See the latest comment below.**\n\n'; + + const comments = await github.rest.issues.listComments({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number + }); + + const previousBenchmarkComments = comments.data.filter(comment => + comment.user.type === 'Bot' && + comment.body.includes(benchmarkIdentifier) && + !comment.body.startsWith('> **⚠️ This benchmark result is outdated.') + ); + + for (const comment of previousBenchmarkComments) { + await github.rest.issues.updateComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: comment.id, + body: outdatedPrefix + comment.body + }); + } + + const newCommentBody = benchmarkIdentifier + '\n' + process.env.BENCHMARK_MESSAGE; + + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + body: newCommentBody + }); diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml deleted file mode 100644 index e737f07ae..000000000 --- a/.github/workflows/benchmark.yml +++ /dev/null @@ -1,125 +0,0 @@ -name: Benchmark - -on: - workflow_dispatch: - workflow_run: - workflows: ["R-CMD-check"] - types: - - completed - paths: - - "src/**" - - "R/**" - - "**.R" - - "**.cpp" - - "**.c" - - "**.h" - - "**.hpp" - - "configure*" - - "Makevars*" - - "**benchmark.yml" - - "!memcheck**" - - "!docs**" - - "!inst**" - - "!man**" - - "!man-roxygen**" - - "!memcheck**" - - "!tests**" - - "!vignettes**" - -jobs: - benchmark: - runs-on: ubuntu-latest - - env: - GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} - RSPM: "https://packagemanager.rstudio.com/cran/__linux__/noble/latest" - _R_CHECK_BUILD_VIGNETTES_: false - _R_CHECK_CRAN_INCOMING_: false - _R_CHECK_FORCE_SUGGESTS_: false # CRAN settings - R_REMOTES_STANDALONE: true - R_REMOTES_NO_ERRORS_FROM_WARNINGS: true - R_REALLY_FORCE_SYMBOLS: true # Until R4.3 - - steps: - - name: Checkout PR branch - uses: actions/checkout@v5 - with: - path: pr - - - name: Checkout target branch - uses: actions/checkout@v5 - with: - ref: ${{ github.event.pull_request.base.ref || 'main' }} - path: main - - - name: Set up R - uses: r-lib/actions/setup-r@v2 - with: - extra-repositories: https://ms609.github.io/packages/ - - - name: Install R dependencies - uses: r-lib/actions/setup-r-dependencies@v2 - with: - dependencies: '"hard"' - extra-packages: | - github::ms609/TreeTools - needs: benchmark - - - name: Benchmark PR - uses: ms609/actions/benchmark-step@main - with: - path: pr - output: pr - - - name: Benchmark target - uses: ms609/actions/benchmark-step@main - with: - path: main - output: main - - - name: Benchmark PR again - uses: ms609/actions/benchmark-step@main - with: - path: pr - output: pr2 - - - run: dir pr-benchmark-results - working-directory: pr - - run: dir main-benchmark-results - working-directory: pr - - - name: Compare benchmarks - id: compare_results - working-directory: pr - run: | - Rscript benchmark/_compare_results.R - shell: bash - - - name: Upload PR benchmark results - if: failure() - uses: actions/upload-artifact@v4 - with: - name: pr-benchmark-results - path: pr/pr-benchmark-results/*.bench.Rds - - - name: Upload main benchmark results - if: failure() - uses: actions/upload-artifact@v4 - with: - name: main-benchmark-results - path: pr/main-benchmark-results/*.bench.Rds - - - name: Comment on PR - if: always() && github.event_name == 'pull_request' && steps.compare_results.outputs.report != '' - uses: actions/github-script@v7 - env: - BENCHMARK_MESSAGE: ${{ steps.compare_results.outputs.report }} - with: - github-token: ${{ secrets.GITHUB_TOKEN }} - script: | - github.rest.issues.createComment({ - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: context.issue.number, - body: process.env.BENCHMARK_MESSAGE - }); diff --git a/.positai/settings.json b/.positai/settings.json new file mode 100644 index 000000000..9d0741541 --- /dev/null +++ b/.positai/settings.json @@ -0,0 +1,32 @@ +{ + "permission": { + "read": { + "*.r": "allow", + "*.h": "allow", + "*.log": "allow", + "*.cpp": "allow", + "*.R": "allow", + "*": "allow" + }, + "edit": { + "*.cpp": "allow", + "*.h": "allow", + "DESCRIPTION": "allow", + "src/Makevars": "allow", + "src/Makevars.win": "allow", + "*.R": "allow", + "*.md": "allow" + }, + "bash": { + "cd C:/Users/pjjg18/GitHub/TreeDist": "allow", + "cat *": "allow", + "echo *": "allow", + "find *": "allow", + "grep *": "allow", + "head *": "allow", + "ls *": "allow", + "rm -f src/*.o src/*.dll": "allow", + "tail *": "allow" + } + } +} diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 000000000..ea631ff5c --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,479 @@ +# TreeDist — Agent Memory + +## Project Overview + +**TreeDist** is an R package (GPL ≥ 3) providing a suite of topological distance metrics +between phylogenetic trees. The mathematical core is implemented in C++17 and exposed to R +via Rcpp. The primary optimization goal is speed: many real analyses compute pairwise +distances over hundreds or thousands of trees, so inner loops must be tight. + +Current version: `2.12.0.9000` (development). +CRAN package page: + +--- + +## Repository Layout + +``` +TreeDist/ +├── src/ # C++17 source (the main optimization target) +├── R/ # R wrapper layer and pure-R helpers +├── benchmark/ # Microbenchmark scripts (bench package) +├── tests/testthat/ # Unit tests +├── data-raw/ # Scripts that regenerate lookup tables / data +├── vignettes/ # User-facing tutorials +└── inst/ # Installed extras +``` + +**Sibling repository** — `../TreeTools` is a companion package that TreeDist links against +at the C++ level (`LinkingTo: TreeTools`). Edits to TreeTools headers (especially +`SplitList.h`) can affect TreeDist performance and can be pushed to CRAN independently +when ready. + +--- + +## C++ Source Files + +| File | Size | Purpose | +|------|------|---------| +| `tree_distances.cpp` | 22 KB | Main distance calculations; calls into CostMatrix / LAP | +| `tree_distances.h` | 15 KB | **CostMatrix** class; cache-aligned storage; `findRowSubmin` hot path | +| `lap.cpp` | 10 KB | Jonker-Volgenant LAPJV linear assignment; extensively hand-optimized | +| `spr.cpp` | 7 KB | SPR distance approximation | +| `spr_lookup.cpp` | — | SPR lookup-table implementation | +| `nni_distance.cpp` | 16 KB | NNI distance approximations; HybridBuffer allocation | +| `li_diameters.h` | 30 KB | Precomputed NNI diameter lookup tables | +| `information.h` | 6 KB | log₂ / factorial lookup tables (max 8192); cached at startup | +| `binary_entropy_counts.cpp` | — | Binary entropy calculations | +| `day_1985.cpp` | 10 KB | Consensus tree information | +| `hmi.cpp` | 6 KB | Hierarchical Mutual Information | +| `hpart.cpp` | 7 KB | Hierarchical partition structures | +| `reduce_tree.cpp` | 11 KB | Prune trees to common tip set before distance calculation | +| `path_vector.cpp` | 3 KB | Path distance vector | +| `mast.cpp` | 5 KB | Maximum Agreement Subtree | +| `RcppExports.cpp` | 20 KB | Auto-generated Rcpp glue (do not edit by hand) | +| `ints.h` | — | Fixed-width integer typedefs (`splitbit`, `int16`, `int32`, …) | + +--- + +## Key Optimizations Already in Place + +Understanding what has already been done avoids duplicating effort. + +### Memory / cache +- **64-byte alignment** (`alignas(64)`) on `CostMatrix::data_` and `t_data_`. +- `CostMatrix` pads row width to a multiple of `BLOCK_SIZE = 16` so SIMD loads never + straddle cache lines. +- A **transposed copy** of the cost matrix is maintained to allow column-wise access with + sequential memory reads. + +### Arithmetic +- **Lookup tables** in `information.h` for `log2` (values 0–(SL_MAX_TIPS-1)²) and + `log2_factorial` (up to 8192); initialized via `__attribute__((constructor))`. + Using these avoids runtime `std::log2()` calls on hot paths. +- **Loop unrolling** — `CostMatrix::findRowSubmin()` manually unrolls 4× (comment: + "gives ~20% speedup"). +- **`__restrict__`** pointer annotations in `lap.cpp` and `tree_distances.h` to enable + compiler alias analysis. +- **`__builtin_assume_aligned(ptr, 64)`** hints around inner loops. +- **`__builtin_popcount()`** for split-bit counting. + +### Algorithm +- **Column reduction in reverse order** in LAPJV (faster convergence). +- **`findRowSubmin` two-pass strategy** avoids redundant comparisons. +- **LAPJV v2.10.0** achieved a 2× speedup for large matrices via reorganisation. +- **`HybridBuffer`** in `nni_distance.cpp`: small allocations go on the + stack (thresholds: 512 splits, 16 bins) to avoid heap overhead. +- **Tree reduction** (`reduce_tree.cpp`): trees are pruned to their common tip set before + any distance is computed; this is a major algorithmic win for partially-overlapping + taxon sets. + +### Types +- `cost = int_fast64_t` for LAP to avoid overflow and exploit 64-bit registers. +- `BIG = numeric_limits::max() / SL_MAX_SPLITS` — the divide avoids integer + overflow inside the LAP inner loop. +- `ROUND_PRECISION = 2048²` for safe rounding in cost scaling. + +--- + +## Benchmark Infrastructure + +All benchmarks live in `benchmark/` and use the `bench` package. + +```r +source("benchmark/_init.R") # loads TreeTools, TreeDist; defines Benchmark() +``` + +`Benchmark(expr)` wraps `bench::mark()` with `min_iterations = 3` and `time_unit = "us"`. +When run non-interactively (e.g. CI) results are serialised to `.bench.Rds` files. +`_compare_results.R` compares PR results against `main` and fails the build on a +regression > 4 %. + +### ⚠ Benchmarking protocol — always use a release install + +**Do not benchmark with `devtools::load_all()`.** It appends +`-UNDEBUG -Wall -pedantic -g -O0` to the compiler flags, which overrides the +`-O2` in `~/.R/Makevars` and produces an unrepresentative (typically 3–5× +slower) build. All timing figures in this file were produced from a release +install unless noted otherwise. + +**Also beware of stale `.o` files.** `devtools::load_all()` writes debug- +compiled object files to `src/`. A subsequent `install.packages()` sees +up-to-date timestamps and skips recompilation, silently producing a "release" +install that contains `-O0` objects. `bench_ab()` and `bench_release()` now +clean `src/*.o` and `src/*.dll` before installing to prevent this. + +#### Preferred workflow: A/B comparison (`compare-ab.R`) + +`compare-ab.R` installs a renamed copy of the package as **TreeDistRef**, +then installs the current working-tree source as **TreeDist**, and loads +both into a single `Rscript` subprocess for `bench::mark()` comparison. +Because both packages share the same process, CPU state / thermals / +background load affect each measurement equally — eliminating the +environmental noise that plagued the old `bench_release()` approach. + +```r +source("benchmark/compare-ab.R") + +# 1. On the code you want as baseline: +install_ref() # installs as TreeDistRef to benchmark/ref_lib/ + +# 2. Make your changes, then: +bench_ab() # installs dev TreeDist, compares both in subprocess +``` + +`install_ref()` copies the source, renames the package (DESCRIPTION, +NAMESPACE, RcppExports symbol prefixes, `R_init_` entry point), cleans +stale `.o` files, and installs to `benchmark/ref_lib/`. `bench_ab()` +does the same for the dev version (to a temp lib), then runs the +comparison. Results are saved to `benchmark/results/ab.Rds`. + +Note: `compare-ab.R` and `compare-release.R` are local comparison tools +and intentionally do **not** start with `bench-` — the `bench-` prefix +is reserved for benchmark test scripts run by CI (`_run_benchmarks.R`). + +MSD (MatchingSplitDistance) serves as a built-in **canary**: it does not +use `add_ic_element`, so any difference on MSD indicates a build-level +or environmental problem rather than a real code change. + +#### Legacy workflow: `bench_release()` / `bench_compare()` + +Still available via `benchmark/compare-release.R`. Useful for recording +named snapshots, but comparisons across runs are vulnerable to +environmental drift (~15–20% between sessions on this machine). + +```r +source("benchmark/compare-release.R") + +# 1. Benchmark HEAD (before your patch) +bench_release(label = "baseline") + +# 2. Apply your changes, then benchmark again +bench_release(label = "my-optimisation") + +# 3. Compare +bench_compare("baseline", "my-optimisation") +``` + +`bench_release()` installs the current working-tree source to a private temp +library via `install.packages(..., type = "source")` (so Makevars flags apply +correctly) and runs the suite in a `Rscript` subprocess. Results are saved to +`benchmark/results/