Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,9 @@ export(harmonize_twas)
export(invert_minmax_scaling)
export(lasso_weights)
export(lassosum_rss)
export(ld_loader)
export(lassosum_rss_weights)
export(lbf_to_alpha)
export(ld_loader)
export(load_LD_matrix)
export(load_genotype_region)
export(load_multicontext_sumstats)
Expand Down Expand Up @@ -88,6 +88,8 @@ export(multigene_udr)
export(multivariate_analysis_pipeline)
export(mvsusie_weights)
export(normalize_variant_id)
export(otters_association)
export(otters_weights)
export(parse_cs_corr)
export(parse_dentist_output)
export(parse_region)
Expand Down
12 changes: 5 additions & 7 deletions R/misc.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,11 @@ pval_acat <- function(pvals) {
if (length(pvals) == 1) {
return(pvals[1])
}
stat <- 0.00
pval_min <- 1.00

stat <- sum(qcauchy(pvals))
pval_min <- min(pval_min, min(qcauchy(pvals)))

return(pcauchy(stat / length(pvals), lower.tail = FALSE))
# ACAT statistic: T = mean(tan(pi*(0.5 - p_i)))
# Liu & Xie (2020) "Cauchy combination test"
# For very small p, tan(pi*(0.5-p)) ~ 1/(pi*p), so T is large and positive.
stat <- mean(tan(pi * (0.5 - pvals)))
return(pcauchy(stat, lower.tail = FALSE))
}

pval_hmp <- function(pvals) {
Expand Down
234 changes: 234 additions & 0 deletions R/otters.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,234 @@
#' Train eQTL weights using multiple RSS methods (OTTERS Stage I)
#'
#' Implements the training stage of the OTTERS framework (Omnibus Transcriptome
#' Test using Expression Reference Summary data, Zhang et al. 2024). Trains
#' eQTL effect size weights for a gene region using multiple summary-statistics-based
#' methods in parallel, enabling downstream omnibus TWAS testing.
#'
#' Methods are dispatched dynamically via \code{do.call(paste0(method, "_weights"), ...)},
#' so any function following the \code{*_weights(stat, LD, ...)} convention can be used
#' (e.g., \code{lassosum_rss_weights}, \code{prs_cs_weights}, \code{sdpr_weights},
#' \code{mr_ash_rss_weights}).
#'
#' P+T (pruning and thresholding) is handled internally: for each threshold, SNPs with
#' eQTL p-value below the threshold are selected, and their marginal z-scores (scaled
#' to correlation units: \code{z / sqrt(n)}) are used as weights.
#'
#' @param sumstats A data.frame of eQTL summary statistics. Must contain column \code{z}
#' (z-scores). If \code{z} is absent but \code{beta} and \code{se} are present,
#' z-scores are computed as \code{beta / se}.
#' @param LD LD correlation matrix R for the gene region (single matrix, not a list).
#' Should have row/column names matching variant identifiers if variant alignment
#' is desired.
#' @param n eQTL study sample size (scalar).
#' @param methods Named list of RSS methods and their extra arguments. Each element
#' name must correspond to a \code{*_weights} function in pecotmr (without the
#' \code{_weights} suffix). Defaults match the original OTTERS pipeline
#' (Zhang et al. 2024):
#' \itemize{
#' \item \code{lassosum_rss}: s grid = c(0.2, 0.5, 0.9, 1.0), lambda from
#' 0.0001 to 0.1 (20 values on log scale)
#' \item \code{prs_cs}: phi = 1e-4 (fixed, not learned), 1000 iterations,
#' 500 burn-in, thin = 5
#' \item \code{sdpr}: 1000 iterations, 200 burn-in, thin = 1 (no thinning)
#' }
#' To add learners (e.g., \code{mr_ash_rss}), simply append to this list.
#' @param p_thresholds Numeric vector of p-value thresholds for P+T. Set to
#' \code{NULL} to skip P+T. Default: \code{c(0.001, 0.05)}.
#'
#' @return A named list of weight vectors (one per method). Each vector has length
#' equal to \code{nrow(sumstats)}. P+T results are named \code{PT_<threshold>}.
#'
#' @examples
#' set.seed(42)
#' n <- 500; p <- 20
#' z <- rnorm(p, sd = 2)
#' R <- diag(p)
#' sumstats <- data.frame(z = z)
#' weights <- otters_weights(sumstats, R, n,
#' methods = list(lassosum_rss = list()),
#' p_thresholds = c(0.05))
#'
#' @export
otters_weights <- function(sumstats, LD, n,
methods = list(
lassosum_rss = list(),
prs_cs = list(phi = 1e-4,
n_iter = 1000, n_burnin = 500, thin = 5),
sdpr = list(iter = 1000, burn = 200, thin = 1, verbose = FALSE)
),
p_thresholds = c(0.001, 0.05)) {
# Compute z-scores if not present
if (is.null(sumstats$z)) {
if (!is.null(sumstats$beta) && !is.null(sumstats$se)) {
sumstats$z <- sumstats$beta / sumstats$se
} else {
stop("sumstats must have 'z' or ('beta' and 'se') columns.")
}
}

p <- nrow(sumstats)
z <- sumstats$z

# Build stat object for _weights() convention
# Safeguard: clamp marginal correlations to (-1, 1) as required by lassosum
# (matches OTTERS shrink_factor logic in PRSmodels/lassosum.R lines 71-77)
b <- z / sqrt(n)
max_abs_b <- max(abs(b))
if (max_abs_b >= 1) {
b <- b / (max_abs_b / 0.9999)
}
stat <- list(b = b, n = rep(n, p))

results <- list()

# --- P+T (Pruning and Thresholding) ---
if (!is.null(p_thresholds)) {
pvals <- pchisq(z^2, df = 1, lower.tail = FALSE)
for (thr in p_thresholds) {
selected <- pvals < thr
# Weights = clamped marginal correlation (stat$b) for selected SNPs
w <- ifelse(selected, stat$b, 0)
results[[paste0("PT_", thr)]] <- w
}
}

# --- RSS methods ---
for (method_name in names(methods)) {
fn_name <- paste0(method_name, "_weights")
if (!exists(fn_name, mode = "function")) {
warning(sprintf("Method '%s' not found (looking for function '%s'). Skipping.",
method_name, fn_name))
next
}
tryCatch({
w <- do.call(fn_name, c(list(stat = stat, LD = LD), methods[[method_name]]))
results[[method_name]] <- as.numeric(w)
}, error = function(e) {
warning(sprintf("Method '%s' failed: %s", method_name, e$message))
results[[method_name]] <<- rep(0, p)
})
}

results
}


#' TWAS association testing with omnibus combination (OTTERS Stage II)
#'
#' Computes per-method TWAS z-scores using the FUSION formula and combines
#' p-values across methods via ACAT (Aggregated Cauchy Association Test) or
#' HMP (Harmonic Mean P-value).
#'
#' The FUSION TWAS statistic (Gusev et al. 2016) is:
#' \deqn{Z_{TWAS} = \frac{w^T z}{\sqrt{w^T R w}}}
#' where \eqn{w} are eQTL weights, \eqn{z} are GWAS z-scores, and \eqn{R}
#' is the LD correlation matrix.
#'
#' @param weights Named list of weight vectors (output from \code{\link{otters_weights}}
#' or any named list of numeric vectors).
#' @param gwas_z Numeric vector of GWAS z-scores, same length and order as the
#' weights vectors. Must be aligned to the same variants and allele orientation
#' as the weights and LD matrix. Use \code{\link{allele_qc}} or
#' \code{\link{rss_basic_qc}} for harmonization before calling this function.
#' @param LD LD correlation matrix R, aligned to the same variants as weights
#' and gwas_z.
#' @param combine_method Method to combine p-values across methods: \code{"acat"}
#' (default) or \code{"hmp"}.
#'
#' @return A data.frame with columns:
#' \describe{
#' \item{method}{Method name (per-method rows plus a combined row).}
#' \item{twas_z}{TWAS z-score (\code{NA} for combined row).}
#' \item{twas_pval}{TWAS p-value.}
#' \item{n_snps}{Number of non-zero weight SNPs used.}
#' }
#'
#' @examples
#' set.seed(42)
#' p <- 20
#' gwas_z <- rnorm(p)
#' R <- diag(p)
#' weights <- list(method1 = rnorm(p, sd = 0.01), method2 = rnorm(p, sd = 0.01))
#' otters_association(weights, gwas_z, R)
#'
#' @export
otters_association <- function(weights, gwas_z, LD,
combine_method = c("acat", "hmp")) {
combine_method <- match.arg(combine_method)

# Validate dimensions
p <- length(gwas_z)
if (nrow(LD) != p || ncol(LD) != p) {
stop(sprintf("LD dimensions (%d x %d) do not match gwas_z length (%d).",
nrow(LD), ncol(LD), p))
}
for (nm in names(weights)) {
if (length(weights[[nm]]) != p) {
stop(sprintf("Weight vector '%s' has length %d but gwas_z has length %d.",
nm, length(weights[[nm]]), p))
}
}

results <- data.frame(
method = character(),
twas_z = numeric(),
twas_pval = numeric(),
n_snps = integer(),
stringsAsFactors = FALSE
)

valid_pvals <- c()

for (method_name in names(weights)) {
w <- weights[[method_name]]

# Skip all-zero weights
if (all(w == 0)) {
results <- rbind(results, data.frame(
method = method_name, twas_z = NA_real_,
twas_pval = NA_real_, n_snps = 0L,
stringsAsFactors = FALSE
))
next
}

# Use non-zero SNPs
nz <- which(w != 0)
n_snps <- length(nz)

# Compute TWAS z-score via twas_z()
res <- twas_z(weights = w[nz], z = gwas_z[nz], R = LD[nz, nz, drop = FALSE])

z_val <- as.numeric(res$z)
p_val <- as.numeric(res$pval)

results <- rbind(results, data.frame(
method = method_name, twas_z = z_val,
twas_pval = p_val, n_snps = n_snps,
stringsAsFactors = FALSE
))

if (!is.na(p_val) && is.finite(p_val) && p_val > 0 && p_val < 1) {
valid_pvals <- c(valid_pvals, p_val)
}
}

# Combine p-values across methods
if (length(valid_pvals) >= 2) {
combined_pval <- if (combine_method == "acat") {
pval_acat(valid_pvals)
} else {
pval_hmp(valid_pvals)
}
results <- rbind(results, data.frame(
method = paste0(toupper(combine_method), "_combined"),
twas_z = NA_real_,
twas_pval = as.numeric(combined_pval),
n_snps = NA_integer_,
stringsAsFactors = FALSE
))
}

results
}
40 changes: 34 additions & 6 deletions R/regularized_regression.R
Original file line number Diff line number Diff line change
Expand Up @@ -823,7 +823,7 @@ bayes_r_rss_weights <- function(sumstats, LD, ...) {
#' out <- lassosum_rss(bhat, LD, n)
#' @export
lassosum_rss <- function(bhat, LD, n,
lambda = exp(seq(log(0.001), log(0.1), length.out = 20)),
lambda = exp(seq(log(0.0001), log(0.1), length.out = 20)),
thr = 1e-4, maxiter = 10000) {
if (!is.list(LD)) {
stop("Please provide a valid list of LD blocks using 'LD'.")
Expand Down Expand Up @@ -851,10 +851,38 @@ lassosum_rss <- function(bhat, LD, n,
result
}

#' Extract weights from lassosum_rss function
#' @return A numeric vector of the posterior SNP coefficients.
#' Extract weights from lassosum_rss with shrinkage grid search
#'
#' Searches over a grid of shrinkage parameters \code{s} (default:
#' \code{c(0.2, 0.5, 0.9, 1.0)}, matching the original lassosum and OTTERS).
#' For each \code{s}, the LD matrix is shrunk as \code{(1-s)*R + s*I}, then
#' \code{lassosum_rss()} is called across the lambda path. The best
#' \code{(s, lambda)} combination is selected by the lowest objective value.
#'
#' @param stat A list with \code{$b} (effect sizes) and \code{$n} (per-variant sample sizes).
#' @param LD LD correlation matrix R (single matrix, NOT pre-shrunk).
#' @param s Numeric vector of shrinkage parameters to search over. Default:
#' \code{c(0.2, 0.5, 0.9, 1.0)} following Mak et al (2017) and OTTERS.
#' @param ... Additional arguments passed to \code{lassosum_rss()}.
#'
#' @return A numeric vector of the posterior SNP coefficients at the best (s, lambda).
#' @export
lassosum_rss_weights <- function(stat, LD, ...) {
model <- lassosum_rss(bhat = stat$b, LD = list(blk1 = LD), n = median(stat$n), ...)
return(model$beta_est)
lassosum_rss_weights <- function(stat, LD, s = c(0.2, 0.5, 0.9, 1.0), ...) {
n <- median(stat$n)
p <- nrow(LD)
best_fbeta <- Inf
best_beta <- rep(0, p)

for (s_val in s) {
# Shrink LD: R_s = (1 - s) * R + s * I
LD_s <- (1 - s_val) * LD + s_val * diag(p)
model <- lassosum_rss(bhat = stat$b, LD = list(blk1 = LD_s), n = n, ...)
min_fbeta <- min(model$fbeta)
if (min_fbeta < best_fbeta) {
best_fbeta <- min_fbeta
best_beta <- model$beta_est
}
}

return(best_beta)
}
2 changes: 1 addition & 1 deletion man/lassosum_rss.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 18 additions & 4 deletions man/lassosum_rss_weights.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading