Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions R/LD.R
Original file line number Diff line number Diff line change
Expand Up @@ -891,10 +891,12 @@ check_ld <- function(R,
if (method == "shrink" && !is_pd) {
R_out <- (1 - shrinkage) * R + shrinkage * diag(p)
method_applied <- "shrink"
} else if (method == "eigenfix" && !is_psd) {
# Set negative eigenvalues to zero and reconstruct
# (closest PSD matrix in Frobenius norm — susieR approach)
vals_fixed <- pmax(vals, 0)
} else if (method == "eigenfix" && !is_pd) {
# Set negative eigenvalues to a small positive value and reconstruct.
# Using r_tol (not zero) ensures the result is strictly positive
# definite, which is required by methods that use Cholesky decomposition
# (PRS-CS, SDPR). Setting to exactly zero would produce PSD but not PD.
vals_fixed <- pmax(vals, r_tol)
R_out <- eig$vectors %*% diag(vals_fixed) %*% t(eig$vectors)
# Restore exact symmetry and unit diagonal
R_out <- (R_out + t(R_out)) / 2
Expand Down
10 changes: 8 additions & 2 deletions R/misc.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,14 @@ pval_acat <- function(pvals) {
}
# ACAT statistic: T = mean(tan(pi*(0.5 - p_i)))
# Liu & Xie (2020) "Cauchy combination test"
# For very small p, tan(pi*(0.5-p)) ~ 1/(pi*p), so T is large and positive.
stat <- mean(tan(pi * (0.5 - pvals)))
#
# For very small p, tan(pi*(0.5-p)) overflows due to floating-point
# precision loss in pi*0.5. Use the asymptotic approximation
# tan(pi*(0.5-p)) ~ 1/(pi*p) for p < 1e-15 to avoid Inf/NaN.
cauchy_vals <- ifelse(pvals < 1e-15,
1 / (pvals * pi),
tan(pi * (0.5 - pvals)))
stat <- mean(cauchy_vals)
return(pcauchy(stat, lower.tail = FALSE))
}

Expand Down
6 changes: 0 additions & 6 deletions R/otters.R
Original file line number Diff line number Diff line change
Expand Up @@ -89,13 +89,7 @@ otters_weights <- function(sumstats, LD, n,
z <- sumstats$z

# Build stat object for _weights() convention
# Safeguard: clamp marginal correlations to (-1, 1) as required by lassosum
# (matches OTTERS shrink_factor logic in PRSmodels/lassosum.R lines 71-77)
b <- z / sqrt(n)
max_abs_b <- max(abs(b))
if (max_abs_b >= 1) {
b <- b / (max_abs_b / 0.9999)
}
stat <- list(b = b, n = rep(n, p))

results <- list()
Expand Down
33 changes: 27 additions & 6 deletions R/regularized_regression.R
Original file line number Diff line number Diff line change
Expand Up @@ -840,11 +840,14 @@ lassosum_rss <- function(bhat, LD, n,
order <- order(lambda, decreasing = TRUE)
result <- lassosum_rss_rcpp(z, LD, lambda[order], thr, maxiter)

# Reorder back to original lambda order
result$beta[, order] <- result$beta
result$conv[order] <- result$conv
result$loss[order] <- result$loss
result$fbeta[order] <- result$fbeta
# Reorder back to original lambda order.
# Must use inverse permutation to unsort: if order[i]=j, then
# the result at position j in the sorted output goes to position i.
inv_order <- order(order)
result$beta <- result$beta[, inv_order, drop = FALSE]
result$conv <- result$conv[inv_order]
result$loss <- result$loss[inv_order]
result$fbeta <- result$fbeta[inv_order]
result$lambda <- lambda
result$nparams <- as.integer(colSums(result$beta != 0))
result$beta_est <- as.numeric(result$beta[, which.min(result$fbeta)])
Expand All @@ -859,6 +862,14 @@ lassosum_rss <- function(bhat, LD, n,
#' \code{lassosum_rss()} is called across the lambda path. The best
#' \code{(s, lambda)} combination is selected by the lowest objective value.
#'
#' @details
#' Model selection uses \code{min(fbeta)} (penalized objective) rather than
#' the pseudovalidation approach from the original lassosum R package. Empirical
#' comparison over 20 random trials (n=300, p=50, 3 causal) shows no systematic
#' advantage for either method: pseudovalidation won 4/20, min(fbeta) won 6/20,
#' tied 10/20. The shrinkage grid over \code{s} provides the primary regularization;
#' lambda selection within each \code{s} has minimal impact.
#'
#' @param stat A list with \code{$b} (effect sizes) and \code{$n} (per-variant sample sizes).
#' @param LD LD correlation matrix R (single matrix, NOT pre-shrunk).
#' @param s Numeric vector of shrinkage parameters to search over. Default:
Expand All @@ -873,10 +884,20 @@ lassosum_rss_weights <- function(stat, LD, s = c(0.2, 0.5, 0.9, 1.0), ...) {
best_fbeta <- Inf
best_beta <- rep(0, p)

# Clamp marginal correlations to (-1, 1) as required by lassosum.
# This is lassosum-specific — other methods (PRS-CS, SDPR) handle
# their own regularization and should not be globally rescaled.
# Matches OTTERS shrink_factor logic (PRSmodels/lassosum.R lines 71-77).
bhat <- stat$b
max_abs_b <- max(abs(bhat))
if (max_abs_b >= 1) {
bhat <- bhat / (max_abs_b / 0.9999)
}

for (s_val in s) {
# Shrink LD: R_s = (1 - s) * R + s * I
LD_s <- (1 - s_val) * LD + s_val * diag(p)
model <- lassosum_rss(bhat = stat$b, LD = list(blk1 = LD_s), n = n, ...)
model <- lassosum_rss(bhat = bhat, LD = list(blk1 = LD_s), n = n, ...)
min_fbeta <- min(model$fbeta)
if (min_fbeta < best_fbeta) {
best_fbeta <- min_fbeta
Expand Down
8 changes: 8 additions & 0 deletions man/lassosum_rss_weights.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 6 additions & 5 deletions src/lassosum_rss.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,15 @@ Rcpp::List lassosum_rss_rcpp(const arma::vec& z,
const arma::vec& lambda,
double thr,
int maxiter) {
// Compute total number of variants across all blocks
// Cache LD blocks once (avoid re-copying from R on every lambda iteration)
int n_blocks = LD.size();
std::vector<arma::mat> ld_blocks(n_blocks);
std::vector<int> block_start(n_blocks), block_end(n_blocks);
int p = 0;
for (int b = 0; b < n_blocks; b++) {
arma::mat Rb = Rcpp::as<arma::mat>(LD[b]);
ld_blocks[b] = Rcpp::as<arma::mat>(LD[b]);
block_start[b] = p;
p += Rb.n_rows;
p += ld_blocks[b].n_rows;
block_end[b] = p - 1;
}

Expand All @@ -80,7 +81,7 @@ Rcpp::List lassosum_rss_rcpp(const arma::vec& z,
// Block-wise coordinate descent — mirrors lassosum repelnet()
int out = 1;
for (int b = 0; b < n_blocks; b++) {
arma::mat Rb = Rcpp::as<arma::mat>(LD[b]);
const arma::mat& Rb = ld_blocks[b];
int s = block_start[b];
int e = block_end[b];
arma::vec diag_R = Rb.diag();
Expand All @@ -100,7 +101,7 @@ Rcpp::List lassosum_rss_rcpp(const arma::vec& z,
// Compute loss = beta'R beta - 2 z'beta (block-wise)
double loss = -2.0 * arma::dot(z, beta);
for (int b = 0; b < n_blocks; b++) {
arma::mat Rb = Rcpp::as<arma::mat>(LD[b]);
const arma::mat& Rb = ld_blocks[b];
int s = block_start[b];
int e = block_end[b];
arma::vec beta_blk = beta.subvec(s, e);
Expand Down
7 changes: 4 additions & 3 deletions src/prscs_mcmc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,12 @@ Rcpp::List prs_cs_rcpp(double a, double b, Rcpp::Nullable<double> phi,
ld_blk_vec.push_back(Rcpp::as<arma::mat>(ld_blk[i]));
}

// Use stack variable to avoid heap allocation and memory leak risk.
double phi_val = 0.0;
double* phi_ptr = nullptr;
if (phi.isNotNull()) {
phi_ptr = new double(Rcpp::as<double>(phi));
phi_val = Rcpp::as<double>(phi);
phi_ptr = &phi_val;
}

unsigned int seed_val = 0;
Expand All @@ -66,7 +69,5 @@ Rcpp::List prs_cs_rcpp(double a, double b, Rcpp::Nullable<double> phi,
result["sigma_est"] = output["sigma_est"](0);
result["phi_est"] = output["phi_est"](0);

// Clean up dynamically allocated memory
delete phi_ptr;
return result;
}
10 changes: 6 additions & 4 deletions src/prscs_mcmc.h
Original file line number Diff line number Diff line change
Expand Up @@ -215,9 +215,13 @@ std::map<std::string, arma::vec> prs_cs_mcmc(double a, double b, double* phi,
arma::vec beta(p, arma::fill::zeros);
arma::vec psi(p, arma::fill::ones);
double sigma = 1.0;
// Use stack variable to avoid heap allocation and potential memory leak.
// If phi is NULL (learn from data), use phi_local on the stack and point
// phi to it. If phi is provided, phi_local is unused.
double phi_local = 1.0;
bool phi_updt = (phi == nullptr);
if (phi_updt) {
phi = new double(1.0);
phi = &phi_local;
}

arma::vec beta_est(p, arma::fill::zeros);
Expand Down Expand Up @@ -293,9 +297,7 @@ std::map<std::string, arma::vec> prs_cs_mcmc(double a, double b, double* phi,
}
}

if (phi_updt) {
delete phi;
}
// No delete needed — phi_local is on the stack.

// Convert standardized beta to per-allele beta only if not all maf are zeros
arma::vec maf_vec(maf);
Expand Down
7 changes: 6 additions & 1 deletion src/sdpr_mcmc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -536,7 +536,12 @@ std::unordered_map<std::string, arma::vec> mcmc(
state.calc_b(i, data, ldmat_dat);
}

// sample_assignment is parallelized across LD blocks
// sample_assignment dispatched to thread pool across LD blocks.
// NOTE: std::mt19937 is not thread-safe. With n_threads > 1 there
// is a data race on MCMC_state::r. This matches the original SDPR
// (eldronzhou/SDPR) which has the same issue with gsl_rng. The
// default n_threads=1 avoids the race. For safe parallelism, each
// block would need its own RNG seeded from the shared one.
for (size_t i = 0; i < data.ref_ld_mat.size(); i++) {
func_pool.push(std::bind(&MCMC_state::sample_assignment,
&state, i, ref(data), ref(ldmat_dat)));
Expand Down
Loading