From 0c47f79df7f8e61bc02d1503587b439c13ce8366 Mon Sep 17 00:00:00 2001 From: Austin Richardson Date: Thu, 3 Apr 2025 20:50:13 -0700 Subject: [PATCH 1/4] Add (a)cellular root ranks --- src/rank.rs | 9 +++++++++ tests/data/nodes.dmp | 4 ++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/rank.rs b/src/rank.rs index 2d9aee5..64d74f5 100644 --- a/src/rank.rs +++ b/src/rank.rs @@ -15,6 +15,8 @@ use crate::errors::{Error, ErrorKind, TaxonomyResult}; #[non_exhaustive] pub enum TaxRank { Domain, + AcellularRoot, + CellularRoot, Subdomain, Realm, Subrealm, @@ -113,6 +115,8 @@ impl TaxRank { pub fn to_ncbi_rank(self) -> &'static str { match self { TaxRank::Superkingdom => "superkingdom", + TaxRank::AcellularRoot => "acellular root", + TaxRank::CellularRoot => "cellular root", TaxRank::Kingdom => "kingdom", TaxRank::Subkingdom => "subkingdom", TaxRank::Superphylum => "superphylum", @@ -170,6 +174,8 @@ impl FromStr for TaxRank { "domain" | "regio" => Ok(TaxRank::Domain), "subdomain" => Ok(TaxRank::Subdomain), "realm" => Ok(TaxRank::Realm), + "acellular root" => Ok(TaxRank::AcellularRoot), + "cellular root" => Ok(TaxRank::CellularRoot), "subrealm" => Ok(TaxRank::Subrealm), "hyperkingdom" | "hyperregnum" => Ok(TaxRank::Hyperkingdom), "superkingdom" | "superregnum" => Ok(TaxRank::Superkingdom), @@ -257,6 +263,8 @@ impl fmt::Display for TaxRank { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let rank_str = match self { TaxRank::Domain => "domain", + TaxRank::AcellularRoot => "acellular root", + TaxRank::CellularRoot => "cellular root", TaxRank::Subdomain => "subdomain", TaxRank::Realm => "realm", TaxRank::Subrealm => "subrealm", @@ -357,6 +365,7 @@ mod test { static RANKS: &[super::TaxRank] = &[ Domain, + AcellularRoot, Subdomain, Realm, Subrealm, diff --git a/tests/data/nodes.dmp b/tests/data/nodes.dmp index 0b0683e..ae2d0de 100644 --- a/tests/data/nodes.dmp +++ b/tests/data/nodes.dmp @@ -1,5 +1,5 @@ 1 | 1 | no rank | | 8 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | | -10239 | 1 | no_rank | | 9 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | | +10239 | 1 | acellular root | | 9 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | | 2 | 131567 | superkingdom | | 0 | 0 | 11 | 0 | 0 | 0 | 0 | 0 | | 543 | 91347 | family | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | 561 | 543 | genus | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | @@ -7,4 +7,4 @@ 1224 | 2 | phylum | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | 1236 | 1224 | class | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | 91347 | 1236 | order | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | -131567 | 1 | no rank | | 8 | 1 | 1 | 1 | 0 | 1 | 1 | 0 | | +131567 | 1 | cellular root | | 8 | 1 | 1 | 1 | 0 | 1 | 1 | 0 | | From 9979a033ef621c275802fd56dbdd05f7ae08655a Mon Sep 17 00:00:00 2001 From: Austin Richardson Date: Thu, 3 Apr 2025 20:59:45 -0700 Subject: [PATCH 2/4] Add NCBI taxonomy test --- .github/workflows/test_ncbi.yml | 41 +++++++++++++++++++++++++++++++++ test_ncbi.py | 17 ++++++++++++++ 2 files changed, 58 insertions(+) create mode 100644 .github/workflows/test_ncbi.yml create mode 100755 test_ncbi.py diff --git a/.github/workflows/test_ncbi.yml b/.github/workflows/test_ncbi.yml new file mode 100644 index 0000000..f201815 --- /dev/null +++ b/.github/workflows/test_ncbi.yml @@ -0,0 +1,41 @@ +name: Test Latest NCBI Taxonomy +on: + push: + branches: + - master + pull_request: + schedule: + - cron: '0 0 1 * *' # runs at 00:00 on the 1st of every month (UTC) + +jobs: + test-ncbi-python-bindings: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3 + + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + + - name: Install python headers + run: sudo apt-get update && sudo apt-get install python3-dev python3-pip python3-venv + + - name: Install python dependencies + run: | + python3 -m venv venv + . venv/bin/activate + pip3 install maturin downloads + pip3 show maturin + + - name: Add library to venv + run: | + . venv/bin/activate + maturin develop --features=python + + - name: Test Python bindings with latest NCBI taxonomy + run: | + . venv/bin/activate + python test_ncbi.py diff --git a/test_ncbi.py b/test_ncbi.py new file mode 100755 index 0000000..b3fcece --- /dev/null +++ b/test_ncbi.py @@ -0,0 +1,17 @@ +#!/usr/bin/env python3 + +import taxonomy +from downloads import download +import subprocess +import unittest + + +class LatestNCBITestCase(unittest.TestCase): + def test_load_latest_ncbi_taxonomy(self): + download("https://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz") + subprocess.check_output(["tar", "-zxvf", "taxdump.tar.gz"]) + taxonomy.Taxonomy.from_ncbi(".") + + +if __name__ == "__main__": + unittest.main() From 3fa5f49d053a1d90cd3656713754004a4f6ccaa2 Mon Sep 17 00:00:00 2001 From: Austin Richardson Date: Fri, 4 Apr 2025 10:24:11 -0700 Subject: [PATCH 3/4] Skip NCBI test by default --- .github/workflows/test_ncbi.yml | 2 +- .github/workflows/tests.yml | 2 +- test_ncbi.py | 17 ----------------- test_python.py | 13 +++++++++++++ 4 files changed, 15 insertions(+), 19 deletions(-) delete mode 100755 test_ncbi.py diff --git a/.github/workflows/test_ncbi.yml b/.github/workflows/test_ncbi.yml index f201815..eaeb759 100644 --- a/.github/workflows/test_ncbi.yml +++ b/.github/workflows/test_ncbi.yml @@ -38,4 +38,4 @@ jobs: - name: Test Python bindings with latest NCBI taxonomy run: | . venv/bin/activate - python test_ncbi.py + TAXONOMY_TEST_NCBI=true python test_ncbi.py diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index fd828fb..4687d93 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -49,7 +49,7 @@ jobs: run: | python3 -m venv venv . venv/bin/activate - pip3 install maturin + pip3 install maturin downloads pip3 show maturin - name: add library to venv diff --git a/test_ncbi.py b/test_ncbi.py deleted file mode 100755 index b3fcece..0000000 --- a/test_ncbi.py +++ /dev/null @@ -1,17 +0,0 @@ -#!/usr/bin/env python3 - -import taxonomy -from downloads import download -import subprocess -import unittest - - -class LatestNCBITestCase(unittest.TestCase): - def test_load_latest_ncbi_taxonomy(self): - download("https://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz") - subprocess.check_output(["tar", "-zxvf", "taxdump.tar.gz"]) - taxonomy.Taxonomy.from_ncbi(".") - - -if __name__ == "__main__": - unittest.main() diff --git a/test_python.py b/test_python.py index 1902040..0d0bccc 100644 --- a/test_python.py +++ b/test_python.py @@ -2,6 +2,9 @@ import unittest from taxonomy import Taxonomy, TaxonomyError +from downloads import download +import os +import subprocess JSON_DATA = """ { @@ -487,5 +490,15 @@ def test_invalid_format(self): Taxonomy.from_gtdb(file.read()) +class LatestNCBITestCase(unittest.TestCase): + @unittest.skipUnless( + os.getenv("TAXONOMY_TEST_NCBI"), "Define TAXONOMY_TEST_NCBI to run NCBI test" + ) + def test_load_latest_ncbi_taxonomy(self): + download("https://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz") + subprocess.check_output(["tar", "-zxvf", "taxdump.tar.gz"]) + Taxonomy.from_ncbi(".") + + if __name__ == "__main__": unittest.main() From b3315bf52900ae63ba58f694d0b513917c21e5c8 Mon Sep 17 00:00:00 2001 From: Austin Richardson Date: Fri, 4 Apr 2025 10:30:53 -0700 Subject: [PATCH 4/4] Fix test invocation --- .github/workflows/test_ncbi.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test_ncbi.yml b/.github/workflows/test_ncbi.yml index eaeb759..e7628f0 100644 --- a/.github/workflows/test_ncbi.yml +++ b/.github/workflows/test_ncbi.yml @@ -38,4 +38,4 @@ jobs: - name: Test Python bindings with latest NCBI taxonomy run: | . venv/bin/activate - TAXONOMY_TEST_NCBI=true python test_ncbi.py + TAXONOMY_TEST_NCBI=true python -m unittest test_python.LatestNCBITestCase.test_load_latest_ncbi_taxonomy