From ab9c0d49e03aeb22d425629a678492a742a22783 Mon Sep 17 00:00:00 2001 From: Gao Wang Date: Tue, 7 Apr 2026 09:23:12 -0400 Subject: [PATCH] Add check_ld() for LD matrix quality checking and integrate with OTTERS Add check_ld(R, method) with three modes: - "check": diagnostic only (eigenvalues, PD status, condition number) - "shrink": (1-s)*R + s*I regularization - "eigenfix": set negative eigenvalues to 0, reconstruct (susieR approach) Integrate into otters_weights() via check_ld_method parameter (default "eigenfix"), matching OTTERS which forces PD via SVD before PRS-CS. The check runs once at the top and the repaired LD is used for all methods. Co-Authored-By: Claude Opus 4.6 (1M context) --- NAMESPACE | 1 + R/LD.R | 101 +++++++++++++++++++++++++++++++++++++++++++++++++++++ R/otters.R | 20 ++++++++++- 3 files changed, 121 insertions(+), 1 deletion(-) diff --git a/NAMESPACE b/NAMESPACE index fbe51414..16414fcc 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -52,6 +52,7 @@ export(glmnet_weights) export(harmonize_gwas) export(harmonize_twas) export(invert_minmax_scaling) +export(check_ld) export(lasso_weights) export(lassosum_rss) export(lassosum_rss_weights) diff --git a/R/LD.R b/R/LD.R index d5e73218..6d246da5 100644 --- a/R/LD.R +++ b/R/LD.R @@ -783,3 +783,104 @@ extract_block_matrices <- function(matrix, block_metadata, variant_ids) { block_metadata = block_metadata )) } + + +#' Check and optionally repair LD matrix quality +#' +#' Diagnoses positive-definiteness of an LD correlation matrix and optionally +#' repairs it. Downstream methods like PRS-CS require positive-definite LD +#' (Cholesky decomposition), while others (lassosum, SDPR) handle non-PD +#' matrices internally via their own regularization. +#' +#' Three modes are available: +#' \describe{ +#' \item{\code{"check"}}{Diagnostic only — returns eigenvalue statistics +#' without modifying the matrix.} +#' \item{\code{"shrink"}}{Apply shrinkage toward identity: +#' \code{R_s = (1 - shrinkage) * R + shrinkage * I}. Simple and fast; +#' always produces a positive-definite matrix when \code{shrinkage > 0}.} +#' \item{\code{"eigenfix"}}{Set negative eigenvalues to zero and +#' reconstruct the matrix. Matches the approach used in susieR's +#' \code{rss_lambda_constructor} and is the closest positive +#' semidefinite matrix in the Frobenius norm. Does not inflate the +#' diagonal like shrinkage does.} +#' } +#' +#' @param R Symmetric correlation matrix. +#' @param method One of \code{"check"}, \code{"shrink"}, or \code{"eigenfix"}. +#' @param r_tol Eigenvalue tolerance. Eigenvalues with absolute value below +#' \code{r_tol} are treated as zero. Default: \code{1e-8}. +#' @param shrinkage Shrinkage parameter for \code{method = "shrink"}. +#' Default: \code{0.01}. +#' +#' @return A list with components: +#' \describe{ +#' \item{R}{The (possibly repaired) LD matrix.} +#' \item{is_pd}{Logical: is the matrix positive definite?} +#' \item{is_psd}{Logical: is the matrix positive semidefinite (within r_tol)?} +#' \item{min_eigenvalue}{Smallest eigenvalue of the original matrix.} +#' \item{n_negative}{Number of negative eigenvalues (below -r_tol).} +#' \item{condition_number}{Ratio of largest to smallest positive eigenvalue +#' (\code{Inf} if any eigenvalue is zero).} +#' \item{method_applied}{Character: \code{"none"}, \code{"shrink"}, or +#' \code{"eigenfix"}.} +#' } +#' +#' @examples +#' # A well-conditioned matrix +#' R_good <- diag(5) +#' check_ld(R_good)$is_pd # TRUE +#' +#' # A matrix with negative eigenvalues +#' R_bad <- matrix(0.9, 3, 3); diag(R_bad) <- 1; R_bad[1,3] <- R_bad[3,1] <- -0.5 +#' check_ld(R_bad)$is_psd # FALSE +#' R_fixed <- check_ld(R_bad, method = "eigenfix")$R +#' check_ld(R_fixed)$is_psd # TRUE +#' +#' @export +check_ld <- function(R, + method = c("check", "shrink", "eigenfix"), + r_tol = 1e-8, + shrinkage = 0.01) { + method <- match.arg(method) + p <- nrow(R) + + # Eigen decomposition (symmetric) + eig <- eigen(R, symmetric = TRUE) + vals <- eig$values + + # Diagnostics + min_eval <- min(vals) + n_neg <- sum(vals < -r_tol) + pos_vals <- vals[vals > r_tol] + cond_num <- if (length(pos_vals) > 0) max(pos_vals) / min(pos_vals) else Inf + is_psd <- !any(vals < -r_tol) + is_pd <- all(vals > r_tol) + + method_applied <- "none" + R_out <- R + + if (method == "shrink" && !is_pd) { + R_out <- (1 - shrinkage) * R + shrinkage * diag(p) + method_applied <- "shrink" + } else if (method == "eigenfix" && !is_psd) { + # Set negative eigenvalues to zero and reconstruct + # (closest PSD matrix in Frobenius norm — susieR approach) + vals_fixed <- pmax(vals, 0) + R_out <- eig$vectors %*% diag(vals_fixed) %*% t(eig$vectors) + # Restore exact symmetry and unit diagonal + R_out <- (R_out + t(R_out)) / 2 + diag(R_out) <- 1 + method_applied <- "eigenfix" + } + + list( + R = R_out, + is_pd = is_pd, + is_psd = is_psd, + min_eigenvalue = min_eval, + n_negative = n_neg, + condition_number = cond_num, + method_applied = method_applied + ) +} diff --git a/R/otters.R b/R/otters.R index 404db65b..84e84773 100644 --- a/R/otters.R +++ b/R/otters.R @@ -35,6 +35,10 @@ #' To add learners (e.g., \code{mr_ash_rss}), simply append to this list. #' @param p_thresholds Numeric vector of p-value thresholds for P+T. Set to #' \code{NULL} to skip P+T. Default: \code{c(0.001, 0.05)}. +#' @param check_ld_method LD quality check method passed to \code{\link{check_ld}}. +#' Default \code{"eigenfix"} sets negative eigenvalues to zero (required for +#' PRS-CS Cholesky, matching OTTERS' SVD-based PD forcing). Set to \code{NULL} +#' to skip checking. #' #' @return A named list of weight vectors (one per method). Each vector has length #' equal to \code{nrow(sumstats)}. P+T results are named \code{PT_}. @@ -57,7 +61,21 @@ otters_weights <- function(sumstats, LD, n, n_iter = 1000, n_burnin = 500, thin = 5), sdpr = list(iter = 1000, burn = 200, thin = 1, verbose = FALSE) ), - p_thresholds = c(0.001, 0.05)) { + p_thresholds = c(0.001, 0.05), + check_ld_method = "eigenfix") { + # Check and optionally repair LD matrix quality + # PRS-CS requires positive-definite LD for Cholesky; OTTERS forces PD via SVD. + # Default "eigenfix" sets negative eigenvalues to 0 (susieR approach). + # Set to NULL to skip (e.g., if LD is known to be clean). + if (!is.null(check_ld_method)) { + ld_check <- check_ld(LD, method = check_ld_method) + if (ld_check$method_applied != "none") { + message(sprintf("check_ld: repaired LD via '%s' (min eigenvalue was %.2e, %d negative).", + ld_check$method_applied, ld_check$min_eigenvalue, ld_check$n_negative)) + } + LD <- ld_check$R + } + # Compute z-scores if not present if (is.null(sumstats$z)) { if (!is.null(sumstats$beta) && !is.null(sumstats$se)) {