Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions R/allele_qc.R
Original file line number Diff line number Diff line change
Expand Up @@ -169,10 +169,11 @@ allele_qc <- function(target_data, ref_variants, col_to_flip = NULL,
result <- match_result[match_result$keep, , drop = FALSE]

if (remove_dups) {
dups <- vec_duplicate_detect(result[, c("chrom", "pos", "variants_id_qced")])
dups <- duplicated(result[, c("chrom", "pos", "variants_id_qced")])
if (any(dups)) {
n_removed <- sum(dups)
warning(sprintf("Removed %d duplicate variant(s), keeping first occurrence.", n_removed))
result <- result[!dups, , drop = FALSE]
warning("Duplicate variants were removed.")
}
}

Expand Down
22 changes: 18 additions & 4 deletions R/misc.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,8 @@ pval_hmp <- function(pvals) {
stop("To use this function, please install harmonicmeanp: https://cran.r-project.org/web/packages/harmonicmeanp/index.html")
}
# https://search.r-project.org/CRAN/refmans/harmonicmeanp/html/pLandau.html
pvalues <- unique(pvals)
L <- length(pvalues)
HMP <- L / sum(pvalues^-1)
L <- length(pvals)
HMP <- L / sum(pvals^-1)

LOC_L1 <- 0.874367040387922
SCALE <- 1.5707963267949
Expand Down Expand Up @@ -230,7 +229,22 @@ compute_LD <- function(X, method = c("sample", "population"),
col_means <- colMeans(X, na.rm = TRUE)
# Population variance: E[X^2] - E[X]^2
col_vars <- colMeans(X^2, na.rm = TRUE) - col_means^2
# Center; set NA -> 0 so missing pairs don't contribute to cross-products
# Center; set NA -> 0 so missing pairs don't contribute to cross-products.
# NOTE: the covariance divides by total N (not pairwise non-missing counts),
# which is an approximation that assumes uniform missingness across SNPs.
# With heterogeneous missingness, correlations between high-missing and
# low-missing columns will be slightly deflated. This matches the GCTA
# convention and is standard for PLINK-style LD computation.
if (anyNA(X)) {
na_rates <- colMeans(is.na(X))
if (max(na_rates) - min(na_rates) > 0.1) {
warning("Population LD method with heterogeneous missingness ",
"(max NA rate ", round(max(na_rates), 3),
", min ", round(min(na_rates), 3),
"): correlations may be biased. Consider using method='sample' ",
"which handles missingness via mean imputation.")
}
}
X_c <- sweep(X, 2, col_means)
X_c[is.na(X_c)] <- 0
# Covariance with N denominator
Expand Down
5 changes: 5 additions & 0 deletions R/regularized_regression.R
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,11 @@ sdpr <- function(bhat, LD, n, per_variant_sample_size = NULL, array = NULL, a =
stop("The total sample size 'n' must be a positive integer.")
}

# M must be >= 4 (SDPR uses M-2 indexing in sample_V; M < 4 causes buffer overflow)
if (M < 4) {
stop("'M' must be at least 4.")
}

# Check if per_variant_sample_size vector contains only positive values (if provided)
if (!is.null(per_variant_sample_size) && any(per_variant_sample_size <= 0)) {
stop("The 'per_variant_sample_size' vector must contain only positive values.")
Expand Down
3 changes: 2 additions & 1 deletion R/sumstats_qc.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ rss_basic_qc <- function(sumstats, LD_data, skip_region = NULL, keep_indel = TRU

if (!is.null(skip_region)) {
skip_table <- tibble(region = skip_region) %>%
separate(region, into = c("chrom", "start", "end"), sep = "[-:]")
separate(region, into = c("chrom", "start", "end"), sep = "[-:]") %>%
mutate(start = as.integer(start), end = as.integer(end))

skip_variant <- c()
for (region_index in 1:nrow(skip_table)) {
Expand Down
8 changes: 4 additions & 4 deletions src/prscs_mcmc.h
Original file line number Diff line number Diff line change
Expand Up @@ -164,13 +164,10 @@ double gigrnd(double p, double a, double b, std::mt19937& rng) {

double result = rnd / std::sqrt(a / b);

// Check if the result is zero and replace it with a small value
// Guard against exact zero (can happen with extreme parameters)
if (result == 0.0) {
result = std::numeric_limits<double>::min();
}
if (result > 1.0) {
result = 1.0;
}

return result;
}
Expand Down Expand Up @@ -280,6 +277,9 @@ std::map<std::string, arma::vec> prs_cs_mcmc(double a, double b, double* phi,
for (int jj = 0; jj < p; ++jj) {
psi(jj) = gigrnd(a - 0.5, 2.0 * delta(jj), n * std::pow(beta(jj), 2) / sigma, rng);
}
// Clamp psi to [0, 1] — matches original PRS-CS (Ge et al.):
// psi[psi>1] = 1.0 (prevents explosive local shrinkage)
psi = arma::clamp(psi, 0.0, 1.0);

if (phi_updt) {
std::gamma_distribution<double> gamma_dist_phi(1.0, 1.0 / (*phi + 1.0));
Expand Down
Loading