diff --git a/RcppTskit/NEWS.md b/RcppTskit/NEWS.md index 8a62977..e54e322 100644 --- a/RcppTskit/NEWS.md +++ b/RcppTskit/NEWS.md @@ -35,6 +35,11 @@ and releases adhere to [Semantic Versioning](https://semver.org/spec/v2.0.0.html to append node rows from \code{R}, mirroring `tsk_node_table_add_row()`. - Added `rtsk_edge_table_add_row()` and `TableCollection$edge_table_add_row()` to append edge rows from \code{R}, mirroring `tsk_edge_table_add_row()`. +- Added `rtsk_site_table_add_row()` and `TableCollection$site_table_add_row()` + to append site rows from \code{R}, mirroring `tsk_site_table_add_row()`. +- Added `rtsk_mutation_table_add_row()` and + `TableCollection$mutation_table_add_row()` to append mutation rows from + \code{R}, mirroring `tsk_mutation_table_add_row()`. - TODO ### Changed diff --git a/RcppTskit/R/Class-TableCollection.R b/RcppTskit/R/Class-TableCollection.R index 80dd49e..6d88ab8 100644 --- a/RcppTskit/R/Class-TableCollection.R +++ b/RcppTskit/R/Class-TableCollection.R @@ -367,6 +367,75 @@ TableCollection <- R6Class( rtsk_table_collection_get_num_sites(self$xptr) }, + #' @description Add a row to the sites table. + #' @param position numeric scalar site position. + #' @param ancestral_state for the new site; accepts \code{NULL}, + #' a raw vector, or a character of length 1. + #' @param metadata for the new site; accepts \code{NULL}, + #' a raw vector, or a character of length 1. + #' @details See the \code{tskit Python} equivalent at + #' \url{https://tskit.dev/tskit/docs/stable/python-api.html#tskit.SiteTable.add_row}. + #' The function casts inputs to the expected class. \code{position} + #' must be a non-\code{NA} finite numeric scalar. + #' @return Integer row ID (0-based) of the newly added site. + #' @examples + #' ts_file <- system.file("examples/test.trees", package = "RcppTskit") + #' tc <- tc_load(ts_file) + #' n_before <- tc$num_sites() + #' new_id <- tc$site_table_add_row(position = 0.5, ancestral_state = "A") + #' new_id <- tc$site_table_add_row(position = 1.5, ancestral_state = charToRaw("G")) + #' new_id <- tc$site_table_add_row(position = 2.5, ancestral_state = "T", metadata = "abc") + #' n_after <- tc$num_sites() + site_table_add_row = function( + position, + ancestral_state = NULL, + metadata = NULL + ) { + if ( + is.null(position) || + length(position) != 1L || + !is.numeric(position) || + is.na(position) || + !is.finite(position) + ) { + stop("position must be a non-NA finite numeric scalar!") + } + if (is.null(ancestral_state)) { + ancestral_state_raw <- NULL + } else if (is.raw(ancestral_state)) { + ancestral_state_raw <- ancestral_state + } else if ( + is.character(ancestral_state) && + length(ancestral_state) == 1L && + !is.na(ancestral_state) + ) { + ancestral_state_raw <- charToRaw(ancestral_state) + } else { + stop( + "ancestral_state must be NULL, a raw vector, or a length-1 non-NA character string!" + ) + } + if (is.null(metadata)) { + metadata_raw <- NULL + } else if (is.raw(metadata)) { + metadata_raw <- metadata + } else if ( + is.character(metadata) && length(metadata) == 1L && !is.na(metadata) + ) { + metadata_raw <- charToRaw(metadata) + } else { + stop( + "metadata must be NULL, a raw vector, or a length-1 non-NA character string!" + ) + } + rtsk_site_table_add_row( + tc = self$xptr, + position = as.numeric(position), + ancestral_state = ancestral_state_raw, + metadata = metadata_raw + ) + }, + #' @description Get the number of mutations in a table collection. #' @return A signed 64 bit integer \code{bit64::integer64}. #' @examples @@ -377,6 +446,104 @@ TableCollection <- R6Class( rtsk_table_collection_get_num_mutations(self$xptr) }, + #' @description Add a row to the mutations table. + #' @param site integer scalar site row ID (0-based). + #' @param node integer scalar node row ID (0-based). + #' @param parent integer scalar parent mutation row ID (0-based, or \code{-1}). + #' @param time numeric scalar mutation time; use \code{NaN} for + #' \code{TSK_UNKNOWN_TIME}. + #' @param derived_state for the new mutation; accepts \code{NULL}, + #' a raw vector, or a character of length 1. + #' @param metadata for the new mutation; accepts \code{NULL}, + #' a raw vector, or a character of length 1. + #' @details See the \code{tskit Python} equivalent at + #' \url{https://tskit.dev/tskit/docs/stable/python-api.html#tskit.MutationTable.add_row}. + #' The function casts inputs to the expected class. \code{site}, + #' \code{node}, and \code{parent} must be non-\code{NA} integer scalars. + #' \code{time} must be a numeric scalar that is finite or \code{NaN} + #' (unknown time). + #' @return Integer row ID (0-based) of the newly added mutation. + #' @examples + #' ts_file <- system.file("examples/test.trees", package = "RcppTskit") + #' tc <- tc_load(ts_file) + #' n_before <- tc$num_mutations() + #' new_id <- tc$mutation_table_add_row(site = 0L, node = 0L, derived_state = "T") + #' new_id <- tc$mutation_table_add_row( + #' site = 0L, + #' node = 0L, + #' parent = -1L, + #' time = 1.5, + #' derived_state = charToRaw("C"), + #' metadata = "abc" + #' ) + #' n_after <- tc$num_mutations() + mutation_table_add_row = function( + site, + node, + parent = -1L, + time = NaN, + derived_state = NULL, + metadata = NULL + ) { + if (is.null(site) || length(site) != 1L || is.na(as.integer(site))) { + stop("site must be a non-NA integer scalar!") + } + if (is.null(node) || length(node) != 1L || is.na(as.integer(node))) { + stop("node must be a non-NA integer scalar!") + } + if ( + is.null(parent) || length(parent) != 1L || is.na(as.integer(parent)) + ) { + stop("parent must be a non-NA integer scalar!") + } + if ( + is.null(time) || + length(time) != 1L || + !is.numeric(time) || + (is.na(time) && !is.nan(time)) || + !(is.finite(time) || is.nan(time)) + ) { + stop("time must be a non-NA numeric scalar that is finite or NaN!") + } + if (is.null(derived_state)) { + derived_state_raw <- NULL + } else if (is.raw(derived_state)) { + derived_state_raw <- derived_state + } else if ( + is.character(derived_state) && + length(derived_state) == 1L && + !is.na(derived_state) + ) { + derived_state_raw <- charToRaw(derived_state) + } else { + stop( + "derived_state must be NULL, a raw vector, or a length-1 non-NA character string!" + ) + } + if (is.null(metadata)) { + metadata_raw <- NULL + } else if (is.raw(metadata)) { + metadata_raw <- metadata + } else if ( + is.character(metadata) && length(metadata) == 1L && !is.na(metadata) + ) { + metadata_raw <- charToRaw(metadata) + } else { + stop( + "metadata must be NULL, a raw vector, or a length-1 non-NA character string!" + ) + } + rtsk_mutation_table_add_row( + tc = self$xptr, + site = as.integer(site), + node = as.integer(node), + parent = as.integer(parent), + time = as.numeric(time), + derived_state = derived_state_raw, + metadata = metadata_raw + ) + }, + #' @description Get the sequence length. #' @return A numeric. #' @examples diff --git a/RcppTskit/R/RcppExports.R b/RcppTskit/R/RcppExports.R index aa3e2d1..ce28ca3 100644 --- a/RcppTskit/R/RcppExports.R +++ b/RcppTskit/R/RcppExports.R @@ -215,6 +215,14 @@ rtsk_edge_table_add_row <- function(tc, left, right, parent, child, metadata = N .Call(`_RcppTskit_rtsk_edge_table_add_row`, tc, left, right, parent, child, metadata) } +rtsk_site_table_add_row <- function(tc, position, ancestral_state = NULL, metadata = NULL) { + .Call(`_RcppTskit_rtsk_site_table_add_row`, tc, position, ancestral_state, metadata) +} + +rtsk_mutation_table_add_row <- function(tc, site, node, parent, time, derived_state = NULL, metadata = NULL) { + .Call(`_RcppTskit_rtsk_mutation_table_add_row`, tc, site, node, parent, time, derived_state, metadata) +} + test_tsk_bug_assert_c <- function() { invisible(.Call(`_RcppTskit_test_tsk_bug_assert_c`)) } @@ -259,3 +267,11 @@ test_rtsk_edge_table_add_row_forced_error <- function(tc) { invisible(.Call(`_RcppTskit_test_rtsk_edge_table_add_row_forced_error`, tc)) } +test_rtsk_site_table_add_row_forced_error <- function(tc) { + invisible(.Call(`_RcppTskit_test_rtsk_site_table_add_row_forced_error`, tc)) +} + +test_rtsk_mutation_table_add_row_forced_error <- function(tc) { + invisible(.Call(`_RcppTskit_test_rtsk_mutation_table_add_row_forced_error`, tc)) +} + diff --git a/RcppTskit/inst/WORDLIST b/RcppTskit/inst/WORDLIST index bc2dadc..fb28183 100644 --- a/RcppTskit/inst/WORDLIST +++ b/RcppTskit/inst/WORDLIST @@ -6,13 +6,19 @@ SLiM TableCollection TreeSequence Tskit +arXiv bitmask +cloneable +com doi etc finaliser +github +HighlanderLab iyae kastore msprime +num tc ts tskit diff --git a/RcppTskit/inst/include/RcppTskit_public.hpp b/RcppTskit/inst/include/RcppTskit_public.hpp index 4ae29cd..177f2e4 100644 --- a/RcppTskit/inst/include/RcppTskit_public.hpp +++ b/RcppTskit/inst/include/RcppTskit_public.hpp @@ -68,5 +68,13 @@ int rtsk_node_table_add_row( int rtsk_edge_table_add_row( SEXP tc, double left, double right, int parent, int child, Rcpp::Nullable metadata = R_NilValue); +int rtsk_site_table_add_row( + SEXP tc, double position, + Rcpp::Nullable ancestral_state = R_NilValue, + Rcpp::Nullable metadata = R_NilValue); +int rtsk_mutation_table_add_row( + SEXP tc, int site, int node, int parent, double time, + Rcpp::Nullable derived_state = R_NilValue, + Rcpp::Nullable metadata = R_NilValue); #endif diff --git a/RcppTskit/man/TableCollection.Rd b/RcppTskit/man/TableCollection.Rd index 513f731..b9aa71a 100644 --- a/RcppTskit/man/TableCollection.Rd +++ b/RcppTskit/man/TableCollection.Rd @@ -147,6 +147,18 @@ tc_file <- system.file("examples/test.trees", package = "RcppTskit") tc <- tc_load(tc_file) tc$num_sites() +## ------------------------------------------------ +## Method `TableCollection$site_table_add_row` +## ------------------------------------------------ + +ts_file <- system.file("examples/test.trees", package = "RcppTskit") +tc <- tc_load(ts_file) +n_before <- tc$num_sites() +new_id <- tc$site_table_add_row(position = 0.5, ancestral_state = "A") +new_id <- tc$site_table_add_row(position = 1.5, ancestral_state = charToRaw("G")) +new_id <- tc$site_table_add_row(position = 2.5, ancestral_state = "T", metadata = "abc") +n_after <- tc$num_sites() + ## ------------------------------------------------ ## Method `TableCollection$num_mutations` ## ------------------------------------------------ @@ -155,6 +167,24 @@ tc_file <- system.file("examples/test.trees", package = "RcppTskit") tc <- tc_load(tc_file) tc$num_mutations() +## ------------------------------------------------ +## Method `TableCollection$mutation_table_add_row` +## ------------------------------------------------ + +ts_file <- system.file("examples/test.trees", package = "RcppTskit") +tc <- tc_load(ts_file) +n_before <- tc$num_mutations() +new_id <- tc$mutation_table_add_row(site = 0L, node = 0L, derived_state = "T") +new_id <- tc$mutation_table_add_row( + site = 0L, + node = 0L, + parent = -1L, + time = 1.5, + derived_state = charToRaw("C"), + metadata = "abc" +) +n_after <- tc$num_mutations() + ## ------------------------------------------------ ## Method `TableCollection$sequence_length` ## ------------------------------------------------ @@ -280,7 +310,9 @@ tc \item \href{#method-TableCollection-num_edges}{\code{TableCollection$num_edges()}} \item \href{#method-TableCollection-edge_table_add_row}{\code{TableCollection$edge_table_add_row()}} \item \href{#method-TableCollection-num_sites}{\code{TableCollection$num_sites()}} +\item \href{#method-TableCollection-site_table_add_row}{\code{TableCollection$site_table_add_row()}} \item \href{#method-TableCollection-num_mutations}{\code{TableCollection$num_mutations()}} +\item \href{#method-TableCollection-mutation_table_add_row}{\code{TableCollection$mutation_table_add_row()}} \item \href{#method-TableCollection-sequence_length}{\code{TableCollection$sequence_length()}} \item \href{#method-TableCollection-time_units}{\code{TableCollection$time_units()}} \item \href{#method-TableCollection-has_index}{\code{TableCollection$has_index()}} @@ -764,6 +796,57 @@ tc$num_sites() } +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-TableCollection-site_table_add_row}{}}} +\subsection{Method \code{site_table_add_row()}}{ +Add a row to the sites table. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{TableCollection$site_table_add_row( + position, + ancestral_state = NULL, + metadata = NULL +)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{position}}{numeric scalar site position.} + +\item{\code{ancestral_state}}{for the new site; accepts \code{NULL}, +a raw vector, or a character of length 1.} + +\item{\code{metadata}}{for the new site; accepts \code{NULL}, +a raw vector, or a character of length 1.} +} +\if{html}{\out{
}} +} +\subsection{Details}{ +See the \code{tskit Python} equivalent at + \url{https://tskit.dev/tskit/docs/stable/python-api.html#tskit.SiteTable.add_row}. + The function casts inputs to the expected class. \code{position} + must be a non-\code{NA} finite numeric scalar. +} + +\subsection{Returns}{ +Integer row ID (0-based) of the newly added site. +} +\subsection{Examples}{ +\if{html}{\out{
}} +\preformatted{ts_file <- system.file("examples/test.trees", package = "RcppTskit") +tc <- tc_load(ts_file) +n_before <- tc$num_sites() +new_id <- tc$site_table_add_row(position = 0.5, ancestral_state = "A") +new_id <- tc$site_table_add_row(position = 1.5, ancestral_state = charToRaw("G")) +new_id <- tc$site_table_add_row(position = 2.5, ancestral_state = "T", metadata = "abc") +n_after <- tc$num_sites() +} +\if{html}{\out{
}} + +} + } \if{html}{\out{
}} \if{html}{\out{}} @@ -787,6 +870,75 @@ tc$num_mutations() } +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-TableCollection-mutation_table_add_row}{}}} +\subsection{Method \code{mutation_table_add_row()}}{ +Add a row to the mutations table. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{TableCollection$mutation_table_add_row( + site, + node, + parent = -1L, + time = NaN, + derived_state = NULL, + metadata = NULL +)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{site}}{integer scalar site row ID (0-based).} + +\item{\code{node}}{integer scalar node row ID (0-based).} + +\item{\code{parent}}{integer scalar parent mutation row ID (0-based, or \code{-1}).} + +\item{\code{time}}{numeric scalar mutation time; use \code{NaN} for +\code{TSK_UNKNOWN_TIME}.} + +\item{\code{derived_state}}{for the new mutation; accepts \code{NULL}, +a raw vector, or a character of length 1.} + +\item{\code{metadata}}{for the new mutation; accepts \code{NULL}, +a raw vector, or a character of length 1.} +} +\if{html}{\out{
}} +} +\subsection{Details}{ +See the \code{tskit Python} equivalent at + \url{https://tskit.dev/tskit/docs/stable/python-api.html#tskit.MutationTable.add_row}. + The function casts inputs to the expected class. \code{site}, + \code{node}, and \code{parent} must be non-\code{NA} integer scalars. + \code{time} must be a numeric scalar that is finite or \code{NaN} + (unknown time). +} + +\subsection{Returns}{ +Integer row ID (0-based) of the newly added mutation. +} +\subsection{Examples}{ +\if{html}{\out{
}} +\preformatted{ts_file <- system.file("examples/test.trees", package = "RcppTskit") +tc <- tc_load(ts_file) +n_before <- tc$num_mutations() +new_id <- tc$mutation_table_add_row(site = 0L, node = 0L, derived_state = "T") +new_id <- tc$mutation_table_add_row( + site = 0L, + node = 0L, + parent = -1L, + time = 1.5, + derived_state = charToRaw("C"), + metadata = "abc" +) +n_after <- tc$num_mutations() +} +\if{html}{\out{
}} + +} + } \if{html}{\out{
}} \if{html}{\out{}} diff --git a/RcppTskit/src/RcppExports.cpp b/RcppTskit/src/RcppExports.cpp index 185d8bb..c0f0f61 100644 --- a/RcppTskit/src/RcppExports.cpp +++ b/RcppTskit/src/RcppExports.cpp @@ -581,6 +581,37 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// rtsk_site_table_add_row +int rtsk_site_table_add_row(const SEXP tc, const double position, const Rcpp::Nullable ancestral_state, const Rcpp::Nullable metadata); +RcppExport SEXP _RcppTskit_rtsk_site_table_add_row(SEXP tcSEXP, SEXP positionSEXP, SEXP ancestral_stateSEXP, SEXP metadataSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const SEXP >::type tc(tcSEXP); + Rcpp::traits::input_parameter< const double >::type position(positionSEXP); + Rcpp::traits::input_parameter< const Rcpp::Nullable >::type ancestral_state(ancestral_stateSEXP); + Rcpp::traits::input_parameter< const Rcpp::Nullable >::type metadata(metadataSEXP); + rcpp_result_gen = Rcpp::wrap(rtsk_site_table_add_row(tc, position, ancestral_state, metadata)); + return rcpp_result_gen; +END_RCPP +} +// rtsk_mutation_table_add_row +int rtsk_mutation_table_add_row(const SEXP tc, const int site, const int node, const int parent, const double time, const Rcpp::Nullable derived_state, const Rcpp::Nullable metadata); +RcppExport SEXP _RcppTskit_rtsk_mutation_table_add_row(SEXP tcSEXP, SEXP siteSEXP, SEXP nodeSEXP, SEXP parentSEXP, SEXP timeSEXP, SEXP derived_stateSEXP, SEXP metadataSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const SEXP >::type tc(tcSEXP); + Rcpp::traits::input_parameter< const int >::type site(siteSEXP); + Rcpp::traits::input_parameter< const int >::type node(nodeSEXP); + Rcpp::traits::input_parameter< const int >::type parent(parentSEXP); + Rcpp::traits::input_parameter< const double >::type time(timeSEXP); + Rcpp::traits::input_parameter< const Rcpp::Nullable >::type derived_state(derived_stateSEXP); + Rcpp::traits::input_parameter< const Rcpp::Nullable >::type metadata(metadataSEXP); + rcpp_result_gen = Rcpp::wrap(rtsk_mutation_table_add_row(tc, site, node, parent, time, derived_state, metadata)); + return rcpp_result_gen; +END_RCPP +} // test_tsk_bug_assert_c void test_tsk_bug_assert_c(); RcppExport SEXP _RcppTskit_test_tsk_bug_assert_c() { @@ -689,6 +720,26 @@ BEGIN_RCPP return R_NilValue; END_RCPP } +// test_rtsk_site_table_add_row_forced_error +void test_rtsk_site_table_add_row_forced_error(const SEXP tc); +RcppExport SEXP _RcppTskit_test_rtsk_site_table_add_row_forced_error(SEXP tcSEXP) { +BEGIN_RCPP + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const SEXP >::type tc(tcSEXP); + test_rtsk_site_table_add_row_forced_error(tc); + return R_NilValue; +END_RCPP +} +// test_rtsk_mutation_table_add_row_forced_error +void test_rtsk_mutation_table_add_row_forced_error(const SEXP tc); +RcppExport SEXP _RcppTskit_test_rtsk_mutation_table_add_row_forced_error(SEXP tcSEXP) { +BEGIN_RCPP + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const SEXP >::type tc(tcSEXP); + test_rtsk_mutation_table_add_row_forced_error(tc); + return R_NilValue; +END_RCPP +} static const R_CallMethodDef CallEntries[] = { {"_RcppTskit_test_validate_options", (DL_FUNC) &_RcppTskit_test_validate_options, 2}, @@ -741,6 +792,8 @@ static const R_CallMethodDef CallEntries[] = { {"_RcppTskit_rtsk_individual_table_add_row", (DL_FUNC) &_RcppTskit_rtsk_individual_table_add_row, 5}, {"_RcppTskit_rtsk_node_table_add_row", (DL_FUNC) &_RcppTskit_rtsk_node_table_add_row, 6}, {"_RcppTskit_rtsk_edge_table_add_row", (DL_FUNC) &_RcppTskit_rtsk_edge_table_add_row, 6}, + {"_RcppTskit_rtsk_site_table_add_row", (DL_FUNC) &_RcppTskit_rtsk_site_table_add_row, 4}, + {"_RcppTskit_rtsk_mutation_table_add_row", (DL_FUNC) &_RcppTskit_rtsk_mutation_table_add_row, 7}, {"_RcppTskit_test_tsk_bug_assert_c", (DL_FUNC) &_RcppTskit_test_tsk_bug_assert_c, 0}, {"_RcppTskit_test_tsk_bug_assert_cpp", (DL_FUNC) &_RcppTskit_test_tsk_bug_assert_cpp, 0}, {"_RcppTskit_test_tsk_trace_error_c", (DL_FUNC) &_RcppTskit_test_tsk_trace_error_c, 0}, @@ -752,6 +805,8 @@ static const R_CallMethodDef CallEntries[] = { {"_RcppTskit_test_rtsk_individual_table_add_row_forced_error", (DL_FUNC) &_RcppTskit_test_rtsk_individual_table_add_row_forced_error, 1}, {"_RcppTskit_test_rtsk_node_table_add_row_forced_error", (DL_FUNC) &_RcppTskit_test_rtsk_node_table_add_row_forced_error, 1}, {"_RcppTskit_test_rtsk_edge_table_add_row_forced_error", (DL_FUNC) &_RcppTskit_test_rtsk_edge_table_add_row_forced_error, 1}, + {"_RcppTskit_test_rtsk_site_table_add_row_forced_error", (DL_FUNC) &_RcppTskit_test_rtsk_site_table_add_row_forced_error, 1}, + {"_RcppTskit_test_rtsk_mutation_table_add_row_forced_error", (DL_FUNC) &_RcppTskit_test_rtsk_mutation_table_add_row_forced_error, 1}, {NULL, NULL, 0} }; diff --git a/RcppTskit/src/RcppTskit.cpp b/RcppTskit/src/RcppTskit.cpp index e38e676..ee0c4ba 100644 --- a/RcppTskit/src/RcppTskit.cpp +++ b/RcppTskit/src/RcppTskit.cpp @@ -1496,3 +1496,173 @@ int rtsk_edge_table_add_row( } return static_cast(row_id); } + +// PUBLIC, wrapper for tsk_site_table_add_row +// @title Add a row to the site table in a table collection +// @param tc an external pointer to table collection as a +// \code{tsk_table_collection_t} object. +// @param position numeric scalar site position. +// @param ancestral_state raw vector with ancestral-state bytes +// (can be \code{NULL}, treated as empty). +// @param metadata raw vector with metadata bytes +// (can be \code{NULL}). +// @details This function calls +// \url{https://tskit.dev/tskit/docs/stable/c-api.html#c.tsk_site_table_add_row} +// on the sites table of \code{tc}. +// @return The 0-based row ID of the newly added site. +// @examples +// ts_file <- system.file("examples/test.trees", package = "RcppTskit") +// tc_xptr <- RcppTskit:::rtsk_table_collection_load(ts_file) +// n_before <- RcppTskit:::rtsk_table_collection_get_num_sites(tc_xptr) +// m_before <- +// RcppTskit:::rtsk_table_collection_metadata_length(tc_xptr)[["sites"]] +// new_id <- RcppTskit:::rtsk_site_table_add_row( +// tc = tc_xptr, position = 0.5, ancestral_state = charToRaw("A") +// ) +// new_id <- RcppTskit:::rtsk_site_table_add_row( +// tc = tc_xptr, +// position = 1.5, +// ancestral_state = charToRaw("G"), +// metadata = charToRaw("abc") +// ) +// n_after <- RcppTskit:::rtsk_table_collection_get_num_sites(tc_xptr) +// m_after <- +// RcppTskit:::rtsk_table_collection_metadata_length(tc_xptr)[["sites"]] +// new_id == as.integer(n_before) && n_after == n_before + 2L && +// m_after == m_before + 3L +// [[Rcpp::export]] +int rtsk_site_table_add_row( + const SEXP tc, const double position, + const Rcpp::Nullable ancestral_state = R_NilValue, + const Rcpp::Nullable metadata = R_NilValue) { + if (Rcpp::NumericVector::is_na(position)) { + Rcpp::stop("position must not be NA_real_ in rtsk_site_table_add_row"); + } + if (!std::isfinite(position)) { + Rcpp::stop("position must be finite in rtsk_site_table_add_row"); + } + rtsk_table_collection_t tc_xptr(tc); + + const Rcpp::RawVector ancestral_state_vec = + nullable_to_vector_or_empty(ancestral_state); + const tsk_size_t ancestral_state_length = + static_cast(ancestral_state_vec.size()); + const char *ancestral_state_ptr = + ancestral_state_length > 0 + ? reinterpret_cast(RAW(ancestral_state_vec)) + : nullptr; + + const Rcpp::RawVector metadata_vec = + nullable_to_vector_or_empty(metadata); + const tsk_size_t metadata_length = + static_cast(metadata_vec.size()); + const char *metadata_ptr = + metadata_length > 0 ? reinterpret_cast(RAW(metadata_vec)) + : nullptr; + + const tsk_id_t row_id = tsk_site_table_add_row( + &tc_xptr->sites, position, ancestral_state_ptr, ancestral_state_length, + metadata_ptr, metadata_length); + if (row_id < 0) { + Rcpp::stop(tsk_strerror(row_id)); + } + return static_cast(row_id); +} + +// PUBLIC, wrapper for tsk_mutation_table_add_row +// @title Add a row to the mutation table in a table collection +// @param tc an external pointer to table collection as a +// \code{tsk_table_collection_t} object. +// @param site integer site row ID (0-based). +// @param node integer node row ID (0-based). +// @param parent integer parent mutation row ID (0-based, or \code{-1}). +// @param time numeric mutation time value. +// @param derived_state raw vector with derived-state bytes +// (can be \code{NULL}, treated as empty). +// @param metadata raw vector with metadata bytes +// (can be \code{NULL}). +// @details This function calls +// \url{https://tskit.dev/tskit/docs/stable/c-api.html#c.tsk_mutation_table_add_row} +// on the mutations table of \code{tc}. +// @return The 0-based row ID of the newly added mutation. +// @examples +// ts_file <- system.file("examples/test.trees", package = "RcppTskit") +// tc_xptr <- RcppTskit:::rtsk_table_collection_load(ts_file) +// n_before <- RcppTskit:::rtsk_table_collection_get_num_mutations(tc_xptr) +// m_before <- +// RcppTskit:::rtsk_table_collection_metadata_length(tc_xptr)[["mutations"]] +// new_id <- RcppTskit:::rtsk_mutation_table_add_row( +// tc = tc_xptr, +// site = 0L, +// node = 0L, +// parent = -1L, +// time = TSK_UNKNOWN_TIME, +// derived_state = charToRaw("T") +// ) +// new_id <- RcppTskit:::rtsk_mutation_table_add_row( +// tc = tc_xptr, +// site = 0L, +// node = 0L, +// parent = -1L, +// time = 1.0, +// derived_state = charToRaw("C"), +// metadata = charToRaw("abc") +// ) +// n_after <- RcppTskit:::rtsk_table_collection_get_num_mutations(tc_xptr) +// m_after <- +// RcppTskit:::rtsk_table_collection_metadata_length(tc_xptr)[["mutations"]] +// new_id == as.integer(n_before) && n_after == n_before + 2L && +// m_after == m_before + 3L +// [[Rcpp::export]] +int rtsk_mutation_table_add_row( + const SEXP tc, const int site, const int node, const int parent, + const double time, + const Rcpp::Nullable derived_state = R_NilValue, + const Rcpp::Nullable metadata = R_NilValue) { + if (Rcpp::IntegerVector::is_na(site)) { + Rcpp::stop("site must not be NA_integer_ in rtsk_mutation_table_add_row"); + } + if (Rcpp::IntegerVector::is_na(node)) { + Rcpp::stop("node must not be NA_integer_ in rtsk_mutation_table_add_row"); + } + if (Rcpp::IntegerVector::is_na(parent)) { + Rcpp::stop("parent must not be NA_integer_ in rtsk_mutation_table_add_row"); + } + if (R_IsNA(time)) { + Rcpp::stop("time must not be NA_real_ in rtsk_mutation_table_add_row"); + } + if (!std::isfinite(time) && !std::isnan(time)) { + Rcpp::stop("time must be finite or NaN in " + "rtsk_mutation_table_add_row"); + } + + const tsk_id_t row_site = static_cast(site); + const tsk_id_t row_node = static_cast(node); + const tsk_id_t row_parent = static_cast(parent); + rtsk_table_collection_t tc_xptr(tc); + + const Rcpp::RawVector derived_state_vec = + nullable_to_vector_or_empty(derived_state); + const tsk_size_t derived_state_length = + static_cast(derived_state_vec.size()); + const char *derived_state_ptr = + derived_state_length > 0 + ? reinterpret_cast(RAW(derived_state_vec)) + : nullptr; + + const Rcpp::RawVector metadata_vec = + nullable_to_vector_or_empty(metadata); + const tsk_size_t metadata_length = + static_cast(metadata_vec.size()); + const char *metadata_ptr = + metadata_length > 0 ? reinterpret_cast(RAW(metadata_vec)) + : nullptr; + + const tsk_id_t row_id = tsk_mutation_table_add_row( + &tc_xptr->mutations, row_site, row_node, row_parent, time, + derived_state_ptr, derived_state_length, metadata_ptr, metadata_length); + if (row_id < 0) { + Rcpp::stop(tsk_strerror(row_id)); + } + return static_cast(row_id); +} diff --git a/RcppTskit/src/tests.cpp b/RcppTskit/src/tests.cpp index eb13a02..b30f7e9 100644 --- a/RcppTskit/src/tests.cpp +++ b/RcppTskit/src/tests.cpp @@ -206,3 +206,67 @@ void test_rtsk_edge_table_add_row_forced_error(const SEXP tc) { throw; } } + +// TEST-ONLY +// @title Force tskit-level error path in \\code{rtsk_site_table_add_row} +// @param tc an external pointer to table collection as a +// \code{tsk_table_collection_t} object. +// @return No return value; called for side effects - testing. +// [[Rcpp::export]] +void test_rtsk_site_table_add_row_forced_error(const SEXP tc) { + rtsk_table_collection_t tc_xptr(tc); + tsk_site_table_t &sites = tc_xptr->sites; + tsk_size_t saved_max_rows = sites.max_rows; + tsk_size_t saved_max_rows_increment = sites.max_rows_increment; + sites.max_rows = 1; + sites.max_rows_increment = static_cast(TSK_MAX_ID) + 1; + const Rcpp::RawVector ancestral_state = Rcpp::RawVector::create('A'); + try { + (void)rtsk_site_table_add_row(tc, 0.5, ancestral_state, R_NilValue); + // Lines below not hit by tests because rtsk_site_table_add_row() + // throws error # nocov start + sites.max_rows = saved_max_rows; + sites.max_rows_increment = saved_max_rows_increment; + return; + // # nocov end + } catch (...) { + sites.max_rows = saved_max_rows; + sites.max_rows_increment = saved_max_rows_increment; + throw; + } +} + +// TEST-ONLY +// @title Force tskit-level error path in \\code{rtsk_mutation_table_add_row} +// @param tc an external pointer to table collection as a +// \code{tsk_table_collection_t} object. +// @return No return value; called for side effects - testing. +// [[Rcpp::export]] +void test_rtsk_mutation_table_add_row_forced_error(const SEXP tc) { + rtsk_table_collection_t tc_xptr(tc); + tsk_mutation_table_t &mutations = tc_xptr->mutations; + tsk_size_t saved_max_rows = mutations.max_rows; + tsk_size_t saved_max_rows_increment = mutations.max_rows_increment; + mutations.max_rows = 1; + mutations.max_rows_increment = static_cast(TSK_MAX_ID) + 1; + const tsk_id_t site = + mutations.num_rows > 0 ? mutations.site[0] : static_cast(0); + const tsk_id_t node = + mutations.num_rows > 0 ? mutations.node[0] : static_cast(0); + const Rcpp::RawVector derived_state = Rcpp::RawVector::create('T'); + try { + (void)rtsk_mutation_table_add_row( + tc, static_cast(site), static_cast(node), -1, + TSK_UNKNOWN_TIME, derived_state, R_NilValue); + // Lines below not hit by tests because rtsk_mutation_table_add_row() + // throws error # nocov start + mutations.max_rows = saved_max_rows; + mutations.max_rows_increment = saved_max_rows_increment; + return; + // # nocov end + } catch (...) { + mutations.max_rows = saved_max_rows; + mutations.max_rows_increment = saved_max_rows_increment; + throw; + } +} diff --git a/RcppTskit/tests/testthat/test_TableCollection.R b/RcppTskit/tests/testthat/test_TableCollection.R index f31751a..68e8304 100644 --- a/RcppTskit/tests/testthat/test_TableCollection.R +++ b/RcppTskit/tests/testthat/test_TableCollection.R @@ -846,3 +846,398 @@ test_that("edge_table_add_row wrapper expands the table collection and handles i regexp = "TSK_ERR_TABLE_OVERFLOW" ) }) + +test_that("site_table_add_row wrapper expands the table collection and handles inputs", { + ts_file <- system.file("examples/test.trees", package = "RcppTskit") + tc_xptr <- rtsk_table_collection_load(ts_file) + + n_before <- rtsk_table_collection_get_num_sites(tc_xptr) + m_before <- rtsk_table_collection_metadata_length(tc_xptr)[["sites"]] + + new_id <- rtsk_site_table_add_row( + tc = tc_xptr, + position = 0.5, + ancestral_state = charToRaw("A"), + metadata = charToRaw("abc") + ) + expect_equal(new_id, as.integer(n_before)) # since IDs are 0-based + expect_equal( + as.integer(rtsk_table_collection_get_num_sites(tc_xptr)), + as.integer(n_before) + 1L + ) + expect_equal( + as.integer(rtsk_table_collection_metadata_length(tc_xptr)[["sites"]]), + as.integer(m_before) + 3L + ) + + tc <- TableCollection$new(xptr = tc_xptr) + n_before_method <- tc$num_sites() + new_id_method <- tc$site_table_add_row(position = 1.5, ancestral_state = "G") + expect_equal(new_id_method, as.integer(n_before_method)) + expect_equal( + as.integer(tc$num_sites()), + as.integer(n_before_method) + 1L + ) + + tc_xptr <- rtsk_table_collection_load(ts_file) + + n0 <- as.integer(rtsk_table_collection_get_num_sites(tc_xptr)) + m0 <- as.integer(rtsk_table_collection_metadata_length(tc_xptr)[["sites"]]) + + id0 <- rtsk_site_table_add_row( + tc = tc_xptr, + position = 2.5, + ancestral_state = NULL, + metadata = NULL + ) + expect_equal(id0, n0) + expect_equal( + as.integer(rtsk_table_collection_get_num_sites(tc_xptr)), + n0 + 1L + ) + expect_equal( + as.integer(rtsk_table_collection_metadata_length(tc_xptr)[["sites"]]), + m0 + ) + + expect_error( + rtsk_site_table_add_row( + tc = tc_xptr, + position = NA_real_, + ancestral_state = charToRaw("A") + ), + regexp = "position must not be NA_real_ in rtsk_site_table_add_row" + ) + expect_error( + rtsk_site_table_add_row( + tc = tc_xptr, + position = Inf, + ancestral_state = charToRaw("A") + ), + regexp = "position must be finite in rtsk_site_table_add_row" + ) + + tc <- TableCollection$new(xptr = tc_xptr) + n_before_method <- as.integer(tc$num_sites()) + expect_no_error( + tc$site_table_add_row( + position = 3.5, + ancestral_state = NULL, + metadata = NULL + ) + ) + expect_equal(as.integer(tc$num_sites()), n_before_method + 1L) + + m_before_char <- as.integer(rtsk_table_collection_metadata_length(tc$xptr)[[ + "sites" + ]]) + expect_no_warning( + tc$site_table_add_row( + position = 4.5, + ancestral_state = "T", + metadata = "abc" + ) + ) + expect_equal( + as.integer(rtsk_table_collection_metadata_length(tc$xptr)[["sites"]]), + m_before_char + 3L + ) + m_before_raw <- as.integer(rtsk_table_collection_metadata_length(tc$xptr)[[ + "sites" + ]]) + expect_no_error( + tc$site_table_add_row( + position = 5.5, + ancestral_state = charToRaw("C"), + metadata = charToRaw("xyz") + ) + ) + expect_equal( + as.integer(rtsk_table_collection_metadata_length(tc$xptr)[["sites"]]), + m_before_raw + 3L + ) + + expect_error( + tc$site_table_add_row(position = NULL, ancestral_state = "A"), + regexp = "position must be a non-NA finite numeric scalar!" + ) + expect_error( + tc$site_table_add_row(position = NaN, ancestral_state = "A"), + regexp = "position must be a non-NA finite numeric scalar!" + ) + expect_error( + tc$site_table_add_row(position = 6.5, ancestral_state = c("A", "B")), + regexp = "ancestral_state must be NULL, a raw vector, or a length-1 non-NA character string!" + ) + expect_error( + tc$site_table_add_row(position = 6.5, ancestral_state = NA_character_), + regexp = "ancestral_state must be NULL, a raw vector, or a length-1 non-NA character string!" + ) + expect_error( + tc$site_table_add_row(position = 6.5, ancestral_state = 1L), + regexp = "ancestral_state must be NULL, a raw vector, or a length-1 non-NA character string!" + ) + expect_error( + tc$site_table_add_row( + position = 6.5, + ancestral_state = "A", + metadata = c("a", "b") + ), + regexp = "metadata must be NULL, a raw vector, or a length-1 non-NA character string!" + ) + expect_error( + test_rtsk_site_table_add_row_forced_error(tc$xptr), + regexp = "TSK_ERR_TABLE_OVERFLOW" + ) +}) + +test_that("mutation_table_add_row wrapper expands the table collection and handles inputs", { + ts_file <- system.file("examples/test.trees", package = "RcppTskit") + tc_xptr <- rtsk_table_collection_load(ts_file) + expect_gt(as.integer(rtsk_table_collection_get_num_sites(tc_xptr)), 0L) + expect_gt(as.integer(rtsk_table_collection_get_num_nodes(tc_xptr)), 0L) + site <- 0L + node <- 0L + + n_before <- rtsk_table_collection_get_num_mutations(tc_xptr) + m_before <- rtsk_table_collection_metadata_length(tc_xptr)[["mutations"]] + + new_id <- rtsk_mutation_table_add_row( + tc = tc_xptr, + site = site, + node = node, + parent = -1L, + time = NaN, + derived_state = charToRaw("T"), + metadata = charToRaw("abc") + ) + expect_equal(new_id, as.integer(n_before)) # since IDs are 0-based + expect_equal( + as.integer(rtsk_table_collection_get_num_mutations(tc_xptr)), + as.integer(n_before) + 1L + ) + expect_equal( + as.integer(rtsk_table_collection_metadata_length(tc_xptr)[["mutations"]]), + as.integer(m_before) + 3L + ) + + tc <- TableCollection$new(xptr = tc_xptr) + n_before_method <- tc$num_mutations() + new_id_method <- tc$mutation_table_add_row( + site = site, + node = node, + derived_state = "C" + ) + expect_equal(new_id_method, as.integer(n_before_method)) + expect_equal( + as.integer(tc$num_mutations()), + as.integer(n_before_method) + 1L + ) + + tc_xptr <- rtsk_table_collection_load(ts_file) + site <- 0L + node <- 0L + + n0 <- as.integer(rtsk_table_collection_get_num_mutations(tc_xptr)) + m0 <- as.integer(rtsk_table_collection_metadata_length(tc_xptr)[[ + "mutations" + ]]) + + id0 <- rtsk_mutation_table_add_row( + tc = tc_xptr, + site = site, + node = node, + parent = -1L, + time = NaN, + derived_state = NULL, + metadata = NULL + ) + expect_equal(id0, n0) + expect_equal( + as.integer(rtsk_table_collection_get_num_mutations(tc_xptr)), + n0 + 1L + ) + expect_equal( + as.integer(rtsk_table_collection_metadata_length(tc_xptr)[["mutations"]]), + m0 + ) + + expect_error( + rtsk_mutation_table_add_row( + tc = tc_xptr, + site = NA_integer_, + node = node, + parent = -1L, + time = NaN, + derived_state = charToRaw("T") + ), + regexp = "site must not be NA_integer_ in rtsk_mutation_table_add_row" + ) + expect_error( + rtsk_mutation_table_add_row( + tc = tc_xptr, + site = site, + node = NA_integer_, + parent = -1L, + time = NaN, + derived_state = charToRaw("T") + ), + regexp = "node must not be NA_integer_ in rtsk_mutation_table_add_row" + ) + expect_error( + rtsk_mutation_table_add_row( + tc = tc_xptr, + site = site, + node = node, + parent = NA_integer_, + time = NaN, + derived_state = charToRaw("T") + ), + regexp = "parent must not be NA_integer_ in rtsk_mutation_table_add_row" + ) + expect_error( + rtsk_mutation_table_add_row( + tc = tc_xptr, + site = site, + node = node, + parent = -1L, + time = NA_real_, + derived_state = charToRaw("T") + ), + regexp = "time must not be NA_real_ in rtsk_mutation_table_add_row" + ) + expect_error( + rtsk_mutation_table_add_row( + tc = tc_xptr, + site = site, + node = node, + parent = -1L, + time = Inf, + derived_state = charToRaw("T") + ), + regexp = "time must be finite or NaN in rtsk_mutation_table_add_row" + ) + + tc <- TableCollection$new(xptr = tc_xptr) + n_before_method <- as.integer(tc$num_mutations()) + expect_no_error( + tc$mutation_table_add_row( + site = site, + node = node, + parent = -1L, + time = NaN, + derived_state = NULL, + metadata = NULL + ) + ) + expect_equal(as.integer(tc$num_mutations()), n_before_method + 1L) + + m_before_char <- as.integer(rtsk_table_collection_metadata_length(tc$xptr)[[ + "mutations" + ]]) + expect_no_warning( + tc$mutation_table_add_row( + site = site, + node = node, + derived_state = "G", + metadata = "abc" + ) + ) + expect_equal( + as.integer(rtsk_table_collection_metadata_length(tc$xptr)[["mutations"]]), + m_before_char + 3L + ) + m_before_raw <- as.integer(rtsk_table_collection_metadata_length(tc$xptr)[[ + "mutations" + ]]) + expect_no_error( + tc$mutation_table_add_row( + site = site, + node = node, + derived_state = charToRaw("A"), + metadata = charToRaw("xyz") + ) + ) + expect_equal( + as.integer(rtsk_table_collection_metadata_length(tc$xptr)[["mutations"]]), + m_before_raw + 3L + ) + + expect_error( + tc$mutation_table_add_row(site = NULL, node = node, derived_state = "T"), + regexp = "site must be a non-NA integer scalar!" + ) + expect_error( + tc$mutation_table_add_row(site = site, node = NULL, derived_state = "T"), + regexp = "node must be a non-NA integer scalar!" + ) + expect_error( + tc$mutation_table_add_row( + site = site, + node = node, + parent = NULL, + derived_state = "T" + ), + regexp = "parent must be a non-NA integer scalar!" + ) + expect_error( + tc$mutation_table_add_row( + site = site, + node = node, + time = NULL, + derived_state = "T" + ), + regexp = "time must be a non-NA numeric scalar that is finite or NaN!" + ) + expect_error( + tc$mutation_table_add_row( + site = site, + node = node, + time = NA_real_, + derived_state = "T" + ), + regexp = "time must be a non-NA numeric scalar that is finite or NaN!" + ) + expect_error( + tc$mutation_table_add_row( + site = site, + node = node, + time = Inf, + derived_state = "T" + ), + regexp = "time must be a non-NA numeric scalar that is finite or NaN!" + ) + expect_error( + tc$mutation_table_add_row( + site = site, + node = node, + derived_state = c("a", "b") + ), + regexp = "derived_state must be NULL, a raw vector, or a length-1 non-NA character string!" + ) + expect_error( + tc$mutation_table_add_row( + site = site, + node = node, + derived_state = NA_character_ + ), + regexp = "derived_state must be NULL, a raw vector, or a length-1 non-NA character string!" + ) + expect_error( + tc$mutation_table_add_row(site = site, node = node, derived_state = 1L), + regexp = "derived_state must be NULL, a raw vector, or a length-1 non-NA character string!" + ) + expect_error( + tc$mutation_table_add_row( + site = site, + node = node, + derived_state = "T", + metadata = c("a", "b") + ), + regexp = "metadata must be NULL, a raw vector, or a length-1 non-NA character string!" + ) + expect_error( + test_rtsk_mutation_table_add_row_forced_error(tc$xptr), + regexp = "TSK_ERR_TABLE_OVERFLOW" + ) +})