Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,14 @@ export(bayes_n_weights)
export(bayes_r_rss_weights)
export(bayes_r_weights)
export(calculate_xi_correlation)
export(check_ld)
export(clean_context_names)
export(coloc_post_processor)
export(coloc_wrapper)
export(colocboost_analysis_pipeline)
export(compute_LD)
export(compute_qtl_enrichment)
export(ctwas_bimfile_loader)
export(ctwas_ld_loader)
export(dentist)
export(dentist_from_files)
export(dentist_single_window)
Expand All @@ -52,7 +52,6 @@ export(glmnet_weights)
export(harmonize_gwas)
export(harmonize_twas)
export(invert_minmax_scaling)
export(check_ld)
export(lasso_weights)
export(lassosum_rss)
export(lassosum_rss_weights)
Expand Down
54 changes: 41 additions & 13 deletions R/LD.R
Original file line number Diff line number Diff line change
Expand Up @@ -127,27 +127,55 @@ get_regional_ld_meta <- function(ld_reference_meta_file, region, complete_covera
#' @importFrom utils read.table
#' @importFrom stats setNames
#' @noRd
process_LD_matrix <- function(LD_file_path, bim_file_path) {
process_LD_matrix <- function(LD_file_path, snp_file_path = NULL) {
# Read .cor.xz matrix
LD_file_con <- xzfile(LD_file_path)
LD_matrix <- scan(LD_file_con, quiet = TRUE)
close(LD_file_con)
LD_matrix <- matrix(LD_matrix, ncol = sqrt(length(LD_matrix)), byrow = TRUE)

# Read bim (variant metadata)
bim_file_name <- if (!is.null(bim_file_path)) bim_file_path else paste0(LD_file_path, ".bim")
LD_variants <- read.table(bim_file_name)
col_names <- if (ncol(LD_variants) == 9) {
c("chrom", "variants", "GD", "pos", "A1", "A2", "variance", "allele_freq", "n_nomiss")
} else if (ncol(LD_variants) == 6) {
c("chrom", "variants", "GD", "pos", "A1", "A2")
# Auto-detect variant metadata file: .bim (PLINK1) or .pvar/.pvar.zst (PLINK2)
if (is.null(snp_file_path)) {
candidates <- paste0(LD_file_path, c(".bim", ".pvar", ".pvar.zst"))
found <- candidates[file.exists(candidates)]
if (length(found) == 0) stop("No variant file found for: ", LD_file_path,
" (tried .bim, .pvar, .pvar.zst)")
snp_file_path <- found[1]
}

# Detect format by extension or header
is_pvar <- grepl("\\.(pvar|pvar\\.zst)$", snp_file_path)
if (!is_pvar) {
# Check if first line starts with #CHROM (pvar header)
first_line <- readLines(snp_file_path, n = 1)
is_pvar <- grepl("^#CHROM", first_line)
}

if (is_pvar) {
# PLINK2 .pvar format: read via existing read_pvar_text()
LD_variants <- read_pvar_text(snp_file_path)
# read_pvar_text returns: chrom, id, pos, A2 (REF), A1 (ALT)
LD_variants <- LD_variants %>%
mutate(chrom = as.character(as.integer(strip_chr_prefix(chrom))),
variants = normalize_variant_id(id)) %>%
rename(GD = pos) # reuse pos column for ordering
LD_variants$GD <- LD_variants$pos <- as.integer(
sapply(LD_variants$variants, function(v) strsplit(v, ":")[[1]][2]))
} else {
stop("Unexpected number of columns (", ncol(LD_variants), ") in bim file: ", bim_file_name)
# PLINK1 .bim format
LD_variants <- read.table(snp_file_path)
col_names <- if (ncol(LD_variants) == 9) {
c("chrom", "variants", "GD", "pos", "A1", "A2", "variance", "allele_freq", "n_nomiss")
} else if (ncol(LD_variants) == 6) {
c("chrom", "variants", "GD", "pos", "A1", "A2")
} else {
stop("Unexpected number of columns (", ncol(LD_variants), ") in bim file: ", snp_file_path)
}
LD_variants <- LD_variants %>%
setNames(col_names) %>%
mutate(chrom = as.character(as.integer(strip_chr_prefix(chrom))),
variants = normalize_variant_id(variants))
}
LD_variants <- LD_variants %>%
setNames(col_names) %>%
mutate(chrom = as.character(as.integer(strip_chr_prefix(chrom))),
variants = normalize_variant_id(variants))

# Label and symmetrize the matrix
colnames(LD_matrix) <- rownames(LD_matrix) <- LD_variants$variants
Expand Down
8 changes: 0 additions & 8 deletions R/ctwas_wrapper.R
Original file line number Diff line number Diff line change
@@ -1,11 +1,3 @@
#' Utility function to load LD in ctwas analyses, to interface with cTWAS package
#' @param ld_matrix_file_path A string of file path to the LD matrix.
#' @export
ctwas_ld_loader <- function(ld_matrix_file_path) {
ld_loaded <- process_LD_matrix(ld_matrix_file_path, paste0(ld_matrix_file_path, ".bim"))
ld_loaded <- ld_loaded$LD_matrix
return(ld_loaded)
}
#' @importFrom vroom vroom
#' @export
ctwas_bimfile_loader <- function(bim_file_path) {
Expand Down
71 changes: 58 additions & 13 deletions R/ld_loader.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,31 +4,36 @@
#' demand. This avoids loading all blocks into memory simultaneously,
#' which is critical for genome-wide analyses with hundreds of blocks.
#'
#' Three modes are supported:
#' Four modes are supported:
#'
#' \describe{
#' \item{list mode (R)}{Pre-loaded list of LD correlation matrices.
#' Simple but uses more memory. Set \code{R_list}.}
#' \item{list mode (X)}{Pre-loaded list of genotype matrices (n x p_g).
#' Set \code{X_list}.}
#' \item{file mode}{Loads LD from a pecotmr metadata TSV file on the fly
#' \item{region mode}{Loads LD from a pecotmr metadata TSV file on the fly
#' via \code{\link{load_LD_matrix}}. Memory-efficient for large datasets.
#' Set \code{ld_meta_path} and \code{regions}.}
#' \item{LD_info mode}{Loads pre-computed LD blocks from \code{.cor.xz}
#' files listed in an \code{LD_info} data.frame (as returned by
#' \code{\link{get_ctwas_meta_data}}). Set \code{LD_info}.}
#' }
#'
#' @param R_list List of G precomputed LD correlation matrices (p_g x p_g).
#' Mutually exclusive with \code{X_list} and \code{ld_meta_path}.
#' @param X_list List of G genotype matrices (n x p_g). Mutually exclusive
#' with \code{R_list} and \code{ld_meta_path}.
#' @param X_list List of G genotype matrices (n x p_g).
#' @param ld_meta_path Path to a pecotmr LD metadata TSV file (as used by
#' \code{\link{load_LD_matrix}}). Mutually exclusive with \code{R_list}
#' and \code{X_list}.
#' \code{\link{load_LD_matrix}}).
#' @param regions Character vector of G region strings (e.g.,
#' \code{"chr22:17238266-19744294"}). Required when \code{ld_meta_path}
#' is used.
#' @param return_genotype Logical. When using file mode, return the
#' @param LD_info A data.frame with column \code{LD_file} (paths to
#' \code{.cor.xz} LD matrix files) and optionally \code{SNP_file}
#' (paths to companion \code{.bim} files; defaults to
#' \code{paste0(LD_file, ".bim")} if absent). As returned by
#' \code{\link{get_ctwas_meta_data}}.
#' @param return_genotype Logical. When using region mode, return the
#' genotype matrix X (\code{TRUE}) or LD correlation R (\code{FALSE},
#' default). Returning X enables low-rank RSS paths in downstream methods.
#' default).
#' @param max_variants Integer or \code{NULL}. If set, randomly subsample
#' blocks larger than this to control memory usage.
#'
Expand All @@ -46,12 +51,14 @@
#' @export
ld_loader <- function(R_list = NULL, X_list = NULL,
ld_meta_path = NULL, regions = NULL,
LD_info = NULL,
return_genotype = FALSE,
max_variants = NULL) {
# Validate: exactly one source
n_sources <- sum(!is.null(R_list), !is.null(X_list), !is.null(ld_meta_path))
n_sources <- sum(!is.null(R_list), !is.null(X_list),
!is.null(ld_meta_path), !is.null(LD_info))
if (n_sources != 1)
stop("Provide exactly one of R_list, X_list, or ld_meta_path.")
stop("Provide exactly one of R_list, X_list, ld_meta_path, or LD_info.")

if (!is.null(R_list)) {
# List mode (R matrices)
Expand All @@ -73,8 +80,8 @@ ld_loader <- function(R_list = NULL, X_list = NULL,
}
X
}
} else {
# File mode: load on the fly via load_LD_matrix()
} else if (!is.null(ld_meta_path)) {
# Region mode: load on the fly via load_LD_matrix()
if (is.null(regions))
stop("'regions' is required when using ld_meta_path.")

Expand All @@ -97,6 +104,44 @@ ld_loader <- function(R_list = NULL, X_list = NULL,
}
mat
}
} else {
# LD_info mode: load LD blocks by index from file paths
# Supports all three formats:
# 1. Pre-computed .cor.xz + .bim/.pvar (custom block format)
# 2. PLINK1 prefix (.bed/.bim/.fam) — LD computed on the fly
# 3. PLINK2 prefix (.pgen/.pvar/.psam) — LD computed on the fly
if (!is.data.frame(LD_info) || !"LD_file" %in% colnames(LD_info))
stop("LD_info must be a data.frame with column 'LD_file'.")

loader <- function(g) {
ld_path <- LD_info$LD_file[g]

# Auto-detect format by checking what files exist
if (has_plink2_files(ld_path)) {
# PLINK2: load genotypes and compute LD
geno <- load_genotype_region(ld_path)
mat <- compute_LD(geno)
} else if (has_plink1_files(ld_path)) {
# PLINK1: load genotypes and compute LD
geno <- load_genotype_region(ld_path)
mat <- compute_LD(geno)
} else {
# Pre-computed .cor.xz block
snp_file <- if ("SNP_file" %in% colnames(LD_info)) {
LD_info$SNP_file[g]
} else {
NULL # let process_LD_matrix auto-detect .bim/.pvar/.pvar.zst
}
ld <- process_LD_matrix(ld_path, snp_file)
mat <- ld$LD_matrix
}

if (!is.null(max_variants) && ncol(mat) > max_variants) {
keep <- sort(sample(ncol(mat), max_variants))
mat <- mat[keep, keep]
}
mat
}
}

loader
Expand Down
71 changes: 71 additions & 0 deletions man/check_ld.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 0 additions & 14 deletions man/ctwas_ld_loader.Rd

This file was deleted.

27 changes: 17 additions & 10 deletions man/ld_loader.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 7 additions & 1 deletion man/otters_weights.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading