diff --git a/NAMESPACE b/NAMESPACE
index 00b6170..8878dc0 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -12,6 +12,10 @@ export(get_hierarchical_clusters)
 export(get_robust_colocalization)
 export(get_robust_ucos)
 export(get_ucos_summary)
+importFrom(Rfast,correls)
+importFrom(Rfast,med)
+importFrom(Rfast,standardise)
+importFrom(Rfast,upper_tri)
 importFrom(grDevices,adjustcolor)
 importFrom(graphics,abline)
 importFrom(graphics,axis)
diff --git a/R/colocboost.R b/R/colocboost.R
index 3f9f860..ef65278 100644
--- a/R/colocboost.R
+++ b/R/colocboost.R
@@ -22,12 +22,20 @@
 #'                  \code{variant} is required if sumstat for different outcomes do not have the same number of variables.
 #'                  \code{var_y} is the variance of phenotype (default is 1 meaning that the Y is in the \dQuote{standardized} scale).
 #' @param LD A list of correlation matrix indicating the LD matrix for each genotype. It also could be a single matrix if all sumstats were
-#'           obtained from the same genotypes.
+#'           obtained from the same genotypes. Provide either \code{LD} or \code{X_ref}, not both.
+#'           If neither is provided, LD-free mode is used.
+#' @param X_ref A reference panel genotype matrix (N_ref x P) or a list of matrices, as an alternative to providing a precomputed
+#'              \code{LD} matrix. Column names must include variant names matching those in \code{sumstat}.
+#'              When the number of reference panel samples is less than the number of variants (N_ref < P),
+#'              this avoids storing the full P x P LD matrix and reduces memory usage.
+#'              When N_ref >= P, LD is precomputed from \code{X_ref} internally.
+#'              Provide either \code{LD} or \code{X_ref}, not both. If neither is provided, LD-free mode is used.
 #' @param dict_YX A L by 2 matrix of dictionary for \code{X} and \code{Y} if there exist subsets of outcomes corresponding to the same X matrix.
 #'                  The first column should be 1:L for L outcomes. The second column should be the index of \code{X} corresponding to the outcome.
 #'                  The innovation: do not provide the same matrix in \code{X} to reduce the computational burden.
-#' @param dict_sumstatLD A L by 2 matrix of dictionary for \code{sumstat} and \code{LD} if there exist subsets of outcomes corresponding to the same sumstat.
-#'                  The first column should be 1:L for L sumstat The second column should be the index of \code{LD} corresponding to the sumstat.
+#' @param dict_sumstatLD A L by 2 matrix of dictionary for \code{sumstat} and \code{LD} (or \code{X_ref}) if there exist subsets of outcomes
+#'                  corresponding to the same sumstat.
+#'                  The first column should be 1:L for L sumstat The second column should be the index of \code{LD} (or \code{X_ref}) corresponding to the sumstat.
 #'                  The innovation: do not provide the same matrix in \code{LD} to reduce the computational burden.
 #' @param outcome_names The names of outcomes, which has the same order for Y.
 #' @param focal_outcome_idx The index of the focal outcome if perform GWAS-xQTL ColocBoost
@@ -86,6 +94,7 @@
 #' @param check_null_max_ucos The smallest value of change of profile loglikelihood for each outcome in uCoS.
 #' @param weaker_effect If \code{weaker_effect = TRUE}, consider the weaker single effect due to coupling effects
 #' @param LD_free When \code{LD_free = FALSE}, objective function doesn't include LD information.
+#' @param use_entropy A logic variable to consider the heterogeneity of traits for a single-effect.
 #' @param output_level When \code{output_level = 1}, return basic cos details for colocalization results
 #'                     When \code{output_level = 2}, return the ucos details for the single specific effects.
 #'                     When \code{output_level = 3}, return the entire Colocboost model to diagnostic results (more space).
@@ -129,9 +138,10 @@
 #'
 #' @family colocboost
 #' @importFrom stats na.omit
+#' @importFrom Rfast correls standardise upper_tri med
 #' @export
 colocboost <- function(X = NULL, Y = NULL, # individual data
-                       sumstat = NULL, LD = NULL, # summary statistics: either Z, bhat, sebhat, N, var_Y,
+                       sumstat = NULL, LD = NULL, X_ref = NULL, # summary statistics: either Z, bhat, sebhat, N, var_Y,
                        ###### - index dict for X match multiple Y / LD match multiple sumstat
                        dict_YX = NULL, # Y index for 1st column, X index for 2nd column
                        dict_sumstatLD = NULL, # sumstat index for 1st column, LD index for 2nd column
@@ -187,6 +197,7 @@ colocboost <- function(X = NULL, Y = NULL, # individual data
                        check_null_max_ucos = 0.015, # the smallest value of change of profile loglikelihood for each outcome in uCoS.
                        weaker_effect = TRUE,
                        LD_free = FALSE,
+                       use_entropy = FALSE,
                        output_level = 1,
                        ###### - Post filtering parameters
                        cos_npc_cutoff = 0.2, # remove the CoS with cos_npc less than this cutoff
@@ -202,11 +213,15 @@ colocboost <- function(X = NULL, Y = NULL, # individual data
     warning("Error: No individual data (X, Y) or summary statistics (sumstat) or (effect_est, effect_se) are provided! Please check!")
     return(NULL)
   }
+  if (!is.null(LD) && !is.null(X_ref)) {
+    warning("Error: Provide either LD or X_ref, not both.")
+    return(NULL)
+  }
 
   # - check input data: individual level data and summary-level data
   validated_data <- colocboost_validate_input_data(
     X = X, Y = Y,
-    sumstat = sumstat, LD = LD,
+    sumstat = sumstat, LD = LD, X_ref = X_ref,
     dict_YX = dict_YX, dict_sumstatLD = dict_sumstatLD,
     effect_est = effect_est, effect_se = effect_se, effect_n = effect_n,
     overlap_variables = overlap_variables,
@@ -227,6 +242,8 @@ colocboost <- function(X = NULL, Y = NULL, # individual data
   keep_variable_individual <- validated_data$keep_variable_individual
   sumstat <- validated_data$sumstat
   LD <- validated_data$LD
+  X_ref <- validated_data$X_ref
+  ref_label <- validated_data$ref_label
   sumstatLD_dict <- validated_data$sumstatLD_dict
   keep_variable_sumstat <- validated_data$keep_variable_sumstat
   Z <- validated_data$Z
@@ -277,15 +294,15 @@ colocboost <- function(X = NULL, Y = NULL, # individual data
   }
   cb_data <- colocboost_init_data(
     X = X, Y = Y, dict_YX = yx_dict,
-    Z = Z, LD = LD, N_sumstat = N_sumstat, dict_sumstatLD = sumstatLD_dict,
+    Z = Z, LD = LD, X_ref = X_ref, ref_label = ref_label,
+    N_sumstat = N_sumstat, dict_sumstatLD = sumstatLD_dict,
     Var_y = Var_y, SeBhat = SeBhat,
     keep_variables = keep_variables,
     focal_outcome_idx = focal_outcome_idx,
     focal_outcome_variables = focal_outcome_variables,
     overlap_variables = overlap_variables,
     intercept = intercept,
-    standardize = standardize,
-    residual_correlation = residual_correlation
+    standardize = standardize
   )
 
   ##################  colocboost updates   ###################################
@@ -335,6 +352,8 @@ colocboost <- function(X = NULL, Y = NULL, # individual data
     median_cos_abs_corr = median_cos_abs_corr,
     weaker_effect = weaker_effect,
     merge_cos = merge_cos,
+    residual_correlation = residual_correlation,
+    use_entropy = use_entropy,
     tol = tol,
     output_level = output_level
   )
@@ -347,6 +366,8 @@ colocboost <- function(X = NULL, Y = NULL, # individual data
     npc_outcome_cutoff = npc_outcome_cutoff,
     pvalue_cutoff = pvalue_cutoff,
     weight_fudge_factor = weight_fudge_factor,
+    use_entropy = use_entropy,
+    residual_correlation = residual_correlation,
     coverage = coverage
   )
   # ---- post filtering of the colocboost results (get robust trait-specific events)
@@ -377,7 +398,15 @@ colocboost <- function(X = NULL, Y = NULL, # individual data
 #' @param X A list of genotype matrices for different outcomes, or a single matrix if all outcomes share the same genotypes.
 #' @param Y A list of vectors of outcomes or an N by L matrix if it is considered for the same X and multiple outcomes.
 #' @param sumstat A list of data.frames of summary statistics.
-#' @param LD A list of correlation matrices indicating the LD matrix for each genotype.
+#' @param LD A list of correlation matrix indicating the LD matrix for each genotype. It also could be a single matrix if all sumstats were
+#'           obtained from the same genotypes. Provide either \code{LD} or \code{X_ref}, not both.
+#'           If neither is provided, LD-free mode is used.
+#' @param X_ref A reference panel genotype matrix (N_ref x P) or a list of matrices, as an alternative to providing a precomputed
+#'              \code{LD} matrix. Column names must include variant names matching those in \code{sumstat}.
+#'              When the number of reference panel samples is less than the number of variants (N_ref < P),
+#'              this avoids storing the full P x P LD matrix and reduces memory usage.
+#'              When N_ref >= P, LD is precomputed from \code{X_ref} internally.
+#'              Provide either \code{LD} or \code{X_ref}, not both. If neither is provided, LD-free mode is used.
 #' @param dict_YX A L by 2 matrix of dictionary for X and Y if there exist subsets of outcomes corresponding to the same X matrix.
 #' @param dict_sumstatLD A L by 2 matrix of dictionary for sumstat and LD if there exist subsets of outcomes corresponding to the same sumstat.
 #' @param effect_est Matrix of variable regression coefficients (i.e. regression beta values) in the genomic region
@@ -394,6 +423,8 @@ colocboost <- function(X = NULL, Y = NULL, # individual data
 #'   \item{keep_variable_individual}{List of variable names for each X matrix}
 #'   \item{sumstat}{Processed list of summary statistics data.frames}
 #'   \item{LD}{Processed list of LD matrices}
+#'   \item{X_ref}{Processed list of reference genotype matrices}
+#'   \item{ref_label}{Style of reference matrics}
 #'   \item{sumstatLD_dict}{Dictionary mapping sumstat to LD}
 #'   \item{keep_variable_sumstat}{List of variant names for each sumstat}
 #'   \item{Z}{List of z-scores for each outcome}
@@ -408,7 +439,7 @@ colocboost <- function(X = NULL, Y = NULL, # individual data
 #'
 #' @keywords internal
 colocboost_validate_input_data <- function(X = NULL, Y = NULL,
-                                           sumstat = NULL, LD = NULL,
+                                           sumstat = NULL, LD = NULL, X_ref = NULL,
                                            dict_YX = NULL, dict_sumstatLD = NULL,
                                            effect_est = NULL, effect_se = NULL, effect_n = NULL,
                                            overlap_variables = FALSE,
@@ -418,7 +449,7 @@ colocboost_validate_input_data <- function(X = NULL, Y = NULL,
                                            cos_npc_cutoff = 0.2,
                                            npc_outcome_cutoff = 0.2) {
   
-  # - check individual level data
+  ############### Check individual level data ###########################
   if (!is.null(X) & !is.null(Y)) {
     # --- check input
     if (is.data.frame(X))  X <- as.matrix(X)
@@ -557,8 +588,9 @@ colocboost_validate_input_data <- function(X = NULL, Y = NULL,
   } else {
     yx_dict <- keep_variable_individual <- NULL
   }
+  ############### Done! Check individual level data ###########################
   
-  # - check summary-level data
+  ############### Check summary level data ###########################
   if ((!is.null(sumstat)) | (!is.null(effect_est) & !is.null(effect_se))) {
     # --- check input of (effect_est, effect_se)
     if ((!is.null(effect_est) & !is.null(effect_se))) {
@@ -649,8 +681,9 @@ colocboost_validate_input_data <- function(X = NULL, Y = NULL,
       }
       return(xx)
     })
+    ############### Done! Check summary level data ###########################
     
-    # --- check input of LD
+    ############### Check input of LD or reference X_ref ###########################
     M_updated <- M
     min_abs_corr_updated <- min_abs_corr
     jk_equiv_corr_updated <- jk_equiv_corr
@@ -659,10 +692,13 @@ colocboost_validate_input_data <- function(X = NULL, Y = NULL,
     cos_npc_cutoff_updated <- cos_npc_cutoff
     npc_outcome_cutoff_updated <- npc_outcome_cutoff
     
-    if (is.null(LD)) {
+    # --- Handle X_ref: convert to LD when N_ref >= P, or keep for on-the-fly computation
+    
+    
+    if (is.null(LD) && is.null(X_ref)) {
       # if no LD input, set diagonal matrix to LD
       warning(
-        "Providing the LD for summary statistics data is highly recommended. ",
+        "Providing the LD or X_ref for summary statistics data is highly recommended. ",
         "Without LD, only a single iteration will be performed under the assumption of one causal variable per outcome. ",
         "Additionally, the purity of CoS cannot be evaluated!"
       )
@@ -677,61 +713,98 @@ colocboost_validate_input_data <- function(X = NULL, Y = NULL,
       func_simplex_updated <- "only_z2z"
       cos_npc_cutoff_updated <- 0
       npc_outcome_cutoff_updated <- 0
+      ref_label <- "No_ref"
       
     } else {
       
-      if (is.data.frame(LD)) LD <- as.matrix(LD)
-      if (is.matrix(LD)) LD <- list(LD)
-      # - check if NA in LD matrix
-      num_na <- sapply(LD, sum)
-      if (any(is.na(num_na))){
-        warning("Error: Input LD must not contain missing values (NA).")
-        return(NULL)
+      # -- Determine reference list and variant extraction for LD or X_ref
+      if (!is.null(LD)){
+        
+        if (is.data.frame(LD)) LD <- as.matrix(LD)
+        if (is.matrix(LD)) LD <- list(LD)
+        # - check if NA in LD matrix
+        num_na <- sapply(LD, sum)
+        if (any(is.na(num_na))){
+          warning("Error: Input LD must not contain missing values (NA).")
+          return(NULL)
+        }
+        ref_list <- LD
+        ref_label <- "LD"
+        
+      } else {
+        if (is.data.frame(X_ref)) X_ref <- as.matrix(X_ref)
+        if (is.matrix(X_ref)) X_ref <- list(X_ref)
+        
+        # When N_ref >= P, precompute LD (avoids repeated O(N_ref*P) in boosting loop)
+        # When N_ref < P, keep X_ref for on-the-fly computation (avoids P*P memory)
+        all_large <- all(sapply(X_ref, function(xr) nrow(xr) >= ncol(xr)))
+        if (all_large) {
+          message("N_ref >= P: precomputing LD from X_ref.")
+          LD <- lapply(X_ref, function(xr) {
+            ld <- get_cormat(xr)
+            rownames(ld) <- colnames(ld) <- colnames(xr)
+            ld
+          })
+          X_ref <- NULL
+          ref_list <- LD
+          ref_label <- "LD"
+        } else {
+          # N_ref < P: standardize and keep for on-the-fly crossprod/(N_ref-1)
+          for (idx in seq_along(X_ref)) {
+            X_ref[[idx]] <- standardise(X_ref[[idx]], center = TRUE, scale = TRUE)
+            X_ref[[idx]][which(is.na(X_ref[[idx]]))] <- 0
+          }
+          ref_list <- X_ref
+          ref_label <- "X_ref"
+        }
       }
-      # Create sumstat-LD mapping ===
-      if (length(LD) == 1) {
+
+      # -- Create sumstat - LD/X_ref mapping ===
+      if (length(ref_list) == 1) {
         sumstatLD_dict <- rep(1, length(sumstat))
-      } else if (length(LD) == length(sumstat)) {
+      } else if (length(ref_list) == length(sumstat)) {
         sumstatLD_dict <- seq_along(sumstat)
       } else {
         if (is.null(dict_sumstatLD)) {
           warning('Error: Please provide dict_sumstatLD: you have ', length(sumstat), 
-                  ' sumstats but only ', length(LD), ' LD matrices')
+                  ' sumstats but only ', length(ref_list), ' ', ref_label, ' matrices')
           return(NULL)
         } else {
-          # - dict for sumstat to LD mapping
+          # - dict for sumstat to LD/X_ref mapping
           sumstatLD_dict <- rep(NA, length(sumstat))
           for (i in 1:length(sumstat)) {
             tmp <- unique(dict_sumstatLD[dict_sumstatLD[, 1] == i, 2])
             if (length(tmp) == 0) {
-              warning(paste("Error: You don't provide matched LD for sumstat", i))
+              warning("Error: You don't provide matched", ref_label, "for sumstat", i)
               return(NULL)
             } else if (length(tmp) != 1) {
-              warning(paste("Error: You provide multiple matched LD for sumstat", i))
+              warning("Error: You provide multiple matched", ref_label, "for sumstat", i)
               return(NULL)
             } else {
               sumstatLD_dict[i] <- tmp
             }
           }
-          if (max(sumstatLD_dict) > length(LD)) {
-            warning("Error: You don't provide enough LD matrices!")
+          if (max(sumstatLD_dict) > length(ref_list)) {
+            warning("Error: You don't provide enough", ref_label, "matrices!")
             return(NULL)
           }
         }
       }
       
-      # === Filter variants for each sumstat ===
+      # -- Filter variants for each sumstat
+      get_ref_variants <- function(ref_mat) colnames(ref_mat)
+      ref_ncol <- function(ref_mat) ncol(ref_mat)
       for (i in seq_along(sumstat)) {
         # Get sumstat variants (adjust field name based on your data structure)
         sumstat_variants <- sumstat[[i]]$variant
         n_total <- length(sumstat_variants)
-        # Get LD variants
+        # Get LD/X_ref variants
         ld_idx <- sumstatLD_dict[i]
-        current_ld <- LD[[ld_idx]]
-        ld_variants <- rownames(current_ld)
+        current_ref <- ref_list[[ld_idx]]
+        ld_variants <- get_ref_variants(current_ref)
         if (is.null(ld_variants)) {
-          if (ncol(current_ld) != n_total){
-            warning('Error: LD matrix ', ld_idx, ' has no rownames. Please ensure all LD matrices have variant names as rownames.')
+          if (ncol(current_ref) != n_total){
+            warning('Error: ', ref_label, ' matrix ', ld_idx, ' has no variant names. Please ensure all ', ref_label, ' matrices have variant names.')
             return(NULL)
           }
         }
@@ -744,7 +817,7 @@ colocboost_validate_input_data <- function(X = NULL, Y = NULL,
                   ' variants since those variants are not in LD matrix ', ld_idx)
           keep_idx <- match(common_variants, sumstat_variants)
           if (length(keep_idx) == 0){
-            warning('Error: Sumstat data ', i, ' is empty after filtering. Returning NULL')
+            warning('Error: Sumstat data ', i, ' is empty after filtering. Please check variant in sumstat data! Returning NULL!')
             return(NULL)
           }
           # Filter all relevant fields - ADJUST THESE FIELD NAMES TO YOUR DATA
@@ -755,6 +828,7 @@ colocboost_validate_input_data <- function(X = NULL, Y = NULL,
     keep_variable_sumstat <- lapply(sumstat, function(xx) {
       xx$variant
     })
+    ############### Done! Check input of LD or reference X_ref ###########################
     
     # - checking sample size existency
     n_exist <- sapply(sumstat, function(ss) {
@@ -834,7 +908,7 @@ colocboost_validate_input_data <- function(X = NULL, Y = NULL,
       Z[[i.summstat]] <- z
     }
   } else {
-    Z <- N_sumstat <- Var_y <- SeBhat <- sumstatLD_dict <- keep_variable_sumstat <- NULL
+    Z <- N_sumstat <- Var_y <- SeBhat <- sumstatLD_dict <- keep_variable_sumstat <- X_ref <- ref_label <- NULL
     M_updated <- M
     min_abs_corr_updated <- min_abs_corr
     jk_equiv_corr_updated <- jk_equiv_corr
@@ -851,6 +925,8 @@ colocboost_validate_input_data <- function(X = NULL, Y = NULL,
     keep_variable_individual = keep_variable_individual,
     sumstat = sumstat,
     LD = LD,
+    X_ref = X_ref,
+    ref_label = ref_label,
     sumstatLD_dict = sumstatLD_dict,
     keep_variable_sumstat = keep_variable_sumstat,
     Z = Z,
diff --git a/R/colocboost_assemble.R b/R/colocboost_assemble.R
index 94cbbe5..96b8ab1 100644
--- a/R/colocboost_assemble.R
+++ b/R/colocboost_assemble.R
@@ -36,6 +36,8 @@ colocboost_assemble <- function(cb_obj,
                                 median_cos_abs_corr = 0.8,
                                 weaker_effect = TRUE,
                                 merge_cos = TRUE,
+                                residual_correlation = NULL,
+                                use_entropy = FALSE,
                                 tol = 1e-9,
                                 output_level = 1) {
   if (!inherits(cb_obj, "colocboost")) {
@@ -84,6 +86,7 @@ colocboost_assemble <- function(cb_obj,
     cb_obj <- get_max_profile(cb_obj, check_null_max = check_null_max, 
                               check_null_max_ucos = check_null_max_ucos, 
                               check_null_method = check_null_method)
+    
     # --------- about colocalized confidence sets ---------------------------------
     out_cos <- colocboost_assemble_cos(cb_obj,
       coverage = coverage,
@@ -98,6 +101,8 @@ colocboost_assemble <- function(cb_obj,
       median_abs_corr = median_abs_corr,
       min_cluster_corr = min_cluster_corr,
       median_cos_abs_corr = median_cos_abs_corr,
+      use_entropy = use_entropy,
+      residual_correlation = residual_correlation,
       tol = tol
     )
 
@@ -132,10 +137,13 @@ colocboost_assemble <- function(cb_obj,
           }
         }
         if (!is.null(cb_obj_single$cb_data$data[[1]][["XtY"]])) {
+          X_dict <- cb_obj$cb_data$dict[i]
           if (is.null(cb_obj_single$cb_data$data[[1]]$XtX)) {
-            X_dict <- cb_obj$cb_data$dict[i]
             cb_obj_single$cb_data$data[[1]]$XtX <- cb_obj$cb_data$data[[X_dict]]$XtX
           }
+          if (is.null(cb_obj_single$cb_data$data[[1]]$ref_label)) {
+            cb_obj_single$cb_data$data[[1]]$ref_label <- cb_obj$cb_data$data[[X_dict]]$ref_label
+          }
         }
         class(cb_obj_single) <- "colocboost"
         out_ucos_each <- colocboost_assemble_ucos(cb_obj_single,
@@ -208,6 +216,8 @@ colocboost_assemble <- function(cb_obj,
 
     ############# - extract colocboost output - ####################
     # - colocalization results
+    cb_obj$cb_model_para$use_entropy <- use_entropy
+    cb_obj$cb_model_para$residual_correlation <- residual_correlation
     cb_obj$cb_model_para$weight_fudge_factor <- weight_fudge_factor
     cb_obj$cb_model_para$coverage <- coverage
     cb_obj$cb_model_para$min_abs_corr <- min_abs_corr
diff --git a/R/colocboost_assemble_cos.R b/R/colocboost_assemble_cos.R
index 6a1243a..5912070 100644
--- a/R/colocboost_assemble_cos.R
+++ b/R/colocboost_assemble_cos.R
@@ -12,6 +12,8 @@ colocboost_assemble_cos <- function(cb_obj,
                                     median_abs_corr = NULL,
                                     min_cluster_corr = 0.8,
                                     median_cos_abs_corr = 0.8,
+                                    use_entropy = FALSE,
+                                    residual_correlation = NULL,
                                     tol = 1e-9) {
   if (!inherits(cb_obj, "colocboost")) {
     stop("Input must from colocboost function!")
@@ -43,7 +45,8 @@ colocboost_assemble_cos <- function(cb_obj,
         X = cb_data$data[[X_dict]]$X, Xcorr = cb_data$data[[X_dict]]$XtX,
         N = cb_data$data[[coloc_outcomes[iiii]]]$N, n = n_purity, coverage = sec_coverage_thresh,
         min_abs_corr = min_abs_corr, median_abs_corr = median_abs_corr,
-        miss_idx = cb_data$data[[coloc_outcomes[iiii]]]$variable_miss
+        miss_idx = cb_data$data[[coloc_outcomes[iiii]]]$variable_miss,
+        ref_label = cb_data$data[[X_dict]]$ref_label
       )
       check_purity[iiii] <- length(tmp) == 1
     }
@@ -53,7 +56,8 @@ colocboost_assemble_cos <- function(cb_obj,
       pos_purity <- which(check_purity)
       avWeight <- avWeight[, pos_purity, drop = FALSE]
       coloc_outcomes <- coloc_outcomes[pos_purity]
-      weights <- get_integrated_weight(avWeight, weight_fudge_factor = weight_fudge_factor)
+      weights <- get_integrated_weight(avWeight, weight_fudge_factor = weight_fudge_factor, 
+                                       use_entropy = use_entropy, residual_correlation = residual_correlation)
       coloc_cos <- get_in_cos(weights, coverage = coverage)
       evidence_strength <- sum(weights[coloc_cos[[1]]])
 
@@ -83,7 +87,8 @@ colocboost_assemble_cos <- function(cb_obj,
         p_tmp <- matrix(get_purity(pos,
           X = cb_data$data[[X_dict]]$X,
           Xcorr = cb_data$data[[X_dict]]$XtX,
-          N = cb_data$data[[i]]$N, n = n_purity
+          N = cb_data$data[[i]]$N, n = n_purity,
+          ref_label = cb_data$data[[X_dict]]$ref_label
         ), 1, 3)
         purity <- c(purity, list(p_tmp))
       }
@@ -148,7 +153,8 @@ colocboost_assemble_cos <- function(cb_obj,
             X = cb_data$data[[X_dict]]$X, Xcorr = cb_data$data[[X_dict]]$XtX,
             N = cb_data$data[[coloc_outcomes[iiii]]]$N, n = n_purity, coverage = sec_coverage_thresh,
             min_abs_corr = min_abs_corr, median_abs_corr = median_abs_corr,
-            miss_idx = cb_data$data[[coloc_outcomes[iiii]]]$variable_miss
+            miss_idx = cb_data$data[[coloc_outcomes[iiii]]]$variable_miss,
+            ref_label = cb_data$data[[X_dict]]$ref_label
           )
           check_purity[iiii] <- length(tmp) == 1
         }
@@ -158,7 +164,8 @@ colocboost_assemble_cos <- function(cb_obj,
           pos_purity <- which(check_purity)
           avWeight <- avWeight[, pos_purity, drop = FALSE]
           coloc_outcomes <- coloc_outcomes[pos_purity]
-          weights <- get_integrated_weight(avWeight, weight_fudge_factor = weight_fudge_factor)
+          weights <- get_integrated_weight(avWeight, weight_fudge_factor = weight_fudge_factor, 
+                                           use_entropy = use_entropy, residual_correlation = residual_correlation)
           coloc_cos <- get_in_cos(weights, coverage = coverage)
           evidence_strength <- sum(weights[coloc_cos[[1]]])
 
@@ -208,7 +215,8 @@ colocboost_assemble_cos <- function(cb_obj,
               X = cb_data$data[[X_dict]]$X, Xcorr = cb_data$data[[X_dict]]$XtX,
               N = cb_data$data[[coloc_outcomes[iiii]]]$N, n = n_purity, coverage = sec_coverage_thresh,
               min_abs_corr = min_abs_corr, median_abs_corr = median_abs_corr,
-              miss_idx = cb_data$data[[coloc_outcomes[iiii]]]$variable_miss
+              miss_idx = cb_data$data[[coloc_outcomes[iiii]]]$variable_miss,
+              ref_label = cb_data$data[[X_dict]]$ref_label
             )
             check_purity[[iiii]] <- tmp
           }
@@ -245,7 +253,8 @@ colocboost_assemble_cos <- function(cb_obj,
         for (i.w in 1:length(avWeight_coloc)) {
           w <- avWeight_coloc[[i.w]]
           if (sum(w) != 0) {
-            weights <- get_integrated_weight(w, weight_fudge_factor = weight_fudge_factor)
+            weights <- get_integrated_weight(w, weight_fudge_factor = weight_fudge_factor, 
+                                             use_entropy = use_entropy, residual_correlation = residual_correlation)
             csets <- get_in_cos(weights, coverage = coverage)
             coloc_cos[[fl]] <- unlist(csets)
             evidence_strength[[fl]] <- sum(weights[coloc_cos[[fl]]])
@@ -351,7 +360,8 @@ colocboost_assemble_cos <- function(cb_obj,
                 X = cb_data$data[[X_dict]]$X,
                 Xcorr = cb_data$data[[X_dict]]$XtX,
                 miss_idx = cb_data$data[[i]]$variable_miss,
-                P = cb_model_para$P
+                P = cb_model_para$P,
+                ref_label = cb_data$data[[X_dict]]$ref_label
               )
             }
             res <- Reduce(pmax, res)
@@ -438,7 +448,8 @@ colocboost_assemble_cos <- function(cb_obj,
           tmp <- matrix(get_purity(pos,
             X = cb_data$data[[X_dict]]$X,
             Xcorr = cb_data$data[[X_dict]]$XtX,
-            N = cb_data$data[[i3]]$N, n = n_purity
+            N = cb_data$data[[i3]]$N, n = n_purity,
+            ref_label = cb_data$data[[X_dict]]$ref_label
           ), 1, 3)
           p_tmp <- rbind(p_tmp, tmp)
         }
diff --git a/R/colocboost_assemble_ucos.R b/R/colocboost_assemble_ucos.R
index 3336f19..2b44f54 100644
--- a/R/colocboost_assemble_ucos.R
+++ b/R/colocboost_assemble_ucos.R
@@ -64,7 +64,8 @@ colocboost_assemble_ucos <- function(cb_obj_single,
       }
       purity <- matrix(get_purity(pos,
         X = cb_data$data[[1]]$X, Xcorr = cb_data$data[[1]]$XtX,
-        N = cb_data$data[[1]]$N, n = n_purity
+        N = cb_data$data[[1]]$N, n = n_purity,
+        ref_label = cb_data$data[[1]]$ref_label
       ), 1, 3)
       purity <- as.data.frame(purity)
       colnames(purity) <- c("min_abs_corr", "mean_abs_corr", "median_abs_corr")
@@ -131,7 +132,8 @@ colocboost_assemble_ucos <- function(cb_obj_single,
         X = cb_data$data[[1]]$X, Xcorr = cb_data$data[[1]]$XtX,
         N = cb_data$data[[1]]$N, n = n_purity, coverage = coverage,
         min_abs_corr = min_abs_corr, median_abs_corr = median_abs_corr,
-        miss_idx = cb_data$data[[1]]$variable_miss
+        miss_idx = cb_data$data[[1]]$variable_miss,
+        ref_label = cb_data$data[[1]]$ref_label
       )
       if (length(check_purity) != 0) {
         w <- w[check_purity]
@@ -237,7 +239,8 @@ colocboost_assemble_ucos <- function(cb_obj_single,
               X = cb_data$data[[1]]$X,
               Xcorr = cb_data$data[[1]]$XtX,
               miss_idx = cb_data$data[[1]]$variable_miss,
-              P = cb_model_para$P
+              P = cb_model_para$P,
+              ref_label = cb_data$data[[1]]$ref_label
             )
             min_between[i.between, j.between] <- min_between[j.between, i.between] <- res[1]
             max_between[i.between, j.between] <- max_between[j.between, i.between] <- res[2]
@@ -301,7 +304,8 @@ colocboost_assemble_ucos <- function(cb_obj_single,
             purity,
             matrix(get_purity(pos,
               X = cb_data$data[[1]]$X, Xcorr = cb_data$data[[1]]$XtX,
-              N = cb_data$data[[1]]$N, n = n_purity
+              N = cb_data$data[[1]]$N, n = n_purity,
+              ref_label = cb_data$data[[1]]$ref_label
             ), 1, 3)
           )
       }
diff --git a/R/colocboost_check_update_jk.R b/R/colocboost_check_update_jk.R
index d449b03..aef6b0b 100644
--- a/R/colocboost_check_update_jk.R
+++ b/R/colocboost_check_update_jk.R
@@ -61,13 +61,10 @@ boost_check_update_jk_nofocal <- function(cb_model, cb_model_para, cb_data) {
     model_update <- cb_model[pos.update]
     # data_update = cb_data$data[pos.update]
     X_dict <- cb_data$dict[pos.update]
-    # adj_dependence
-    adj_dep <- sapply(pos.update, function(ii) cb_data$data[[ii]]$dependency)
 
     # -- define jk and jk_each
     cor_vals_each <- do.call(cbind, lapply(model_update, function(cc) cc$correlation))
-    # abs_cor_vals_each <- sweep(abs(cor_vals_each), 2, adj_dep, `*`)
-    abs_cor_vals_each <- abs(cor_vals_each) * rep(adj_dep, each = nrow(cor_vals_each))
+    abs_cor_vals_each <- abs(cor_vals_each)
     abs_cor_vals <- rowSums(abs_cor_vals_each)
 
     # jk <- which(abs_cor_vals == max(abs_cor_vals))
@@ -122,7 +119,9 @@ boost_check_update_jk_nofocal <- function(cb_model, cb_model_para, cb_data) {
             YtY = data_update[[ii]]$YtY,
             XtY = data_update[[ii]]$XtY,
             beta_k = model_update[[ii]]$beta,
-            miss_idx = data_update[[ii]]$variable_miss
+            miss_idx = data_update[[ii]]$variable_miss,
+            ref_label = cb_data$data[[X_dict[ii]]]$ref_label,
+            XtX_beta_cache = model_update[[ii]]$XtX_beta_cache
           )
         })
         change_res_each <- as.numeric(unlist(change_res_each))
@@ -197,7 +196,9 @@ boost_check_update_jk_nofocal <- function(cb_model, cb_model_para, cb_data) {
             YtY = data_update[[ii]]$YtY,
             XtY = data_update[[ii]]$XtY,
             beta_k = model_update[[ii]]$beta,
-            miss_idx = data_update[[ii]]$variable_miss
+            miss_idx = data_update[[ii]]$variable_miss,
+            ref_label = cb_data$data[[X_dict[ii]]]$ref_label,
+            XtX_beta_cache = model_update[[ii]]$XtX_beta_cache
           )
         })
         change_res_each <- as.numeric(unlist(change_res_each))
@@ -241,7 +242,9 @@ boost_check_update_jk_nofocal <- function(cb_model, cb_model_para, cb_data) {
             YtY = data_update[[ii]]$YtY,
             XtY = data_update[[ii]]$XtY,
             beta_k = model_update[[ii]]$beta,
-            miss_idx = data_update[[ii]]$variable_miss
+            miss_idx = data_update[[ii]]$variable_miss,
+            ref_label = cb_data$data[[X_dict[ii]]]$ref_label,
+            XtX_beta_cache = model_update[[ii]]$XtX_beta_cache
           )
         })
         change_res_each <- as.numeric(unlist(change_res_each))
@@ -346,13 +349,10 @@ boost_check_update_jk_focal <- function(cb_model, cb_model_para, cb_data,
     model_update <- cb_model[pos.update]
     # data_update = cb_data$data[pos.update]
     X_dict <- cb_data$dict[pos.update]
-    # adj_dependence
-    adj_dep <- sapply(pos.update, function(ii) cb_data$data[[ii]]$dependency)
 
     # -- define jk and jk_each
     cor_vals_each <- do.call(cbind, lapply(model_update, function(cc) as.vector(cc$correlation)))
-    # abs_cor_vals_each <- sweep(abs(cor_vals_each), 2, adj_dep, `*`)
-    abs_cor_vals_each <- abs(cor_vals_each) * rep(adj_dep, each = nrow(cor_vals_each))
+    abs_cor_vals_each <- abs(cor_vals_each)
     # jk_each <- apply(abs_cor_vals_each, 2, which.max)
     jk_each <- sapply(1:ncol(abs_cor_vals_each), function(j) which.max(abs_cor_vals_each[, j]))
     pp_focal <- which(pos.update == focal_outcome_idx)
@@ -368,7 +368,8 @@ boost_check_update_jk_focal <- function(cb_model, cb_model_para, cb_data,
           X = cb_data$data[[X_dict[pp_focal]]]$X,
           XtX = cb_data$data[[X_dict[pp_focal]]]$XtX,
           N = cb_data$data[[X_dict[pp_focal]]]$N,
-          remain_jk = 1:cb_model_para$P
+          remain_jk = 1:cb_model_para$P,
+          ref_label = cb_data$data[[X_dict[pp_focal]]]$ref_label
         )
       })
       # ----- second, if within the same LD buddies, select the following variants
@@ -379,7 +380,8 @@ boost_check_update_jk_focal <- function(cb_model, cb_model_para, cb_data,
         # ----- third, if picked variant within the same LD buddies
         ld_tmp <- get_LD_jk1_jk2(jk_focal, jk_focal_tmp,
           XtX = cb_data$data[[X_dict[pp_focal]]]$XtX,
-          remain_jk = 1:cb_model_para$P
+          remain_jk = 1:cb_model_para$P,
+          ref_label = cb_data$data[[X_dict[pp_focal]]]$ref_label
         )
         if (ld_tmp > jk_equiv_corr) {
           jk_focal <- jk_focal_tmp
@@ -491,7 +493,7 @@ boost_check_update_jk_focal <- function(cb_model, cb_model_para, cb_data,
 #' @importFrom stats cor
 get_LD_jk1_jk2 <- function(jk1, jk2,
                            X = NULL, XtX = NULL, N = NULL,
-                           remain_jk = NULL) {
+                           remain_jk = NULL, ref_label = "LD") {
   if (!is.null(X)) {
     LD_temp <- suppressWarnings({
       get_cormat(X[, c(jk1, jk2)])
@@ -499,12 +501,18 @@ get_LD_jk1_jk2 <- function(jk1, jk2,
     LD_temp[which(is.na(LD_temp))] <- 0
     LD_temp <- LD_temp[1, 2]
   } else if (!is.null(XtX)) {
-    if (length(XtX) == 1){
+    if (identical(ref_label, "No_ref")) {
       LD_temp <- 0
     } else {
       jk1.remain <- match(jk1, remain_jk)
       jk2.remain <- match(jk2, remain_jk)
-      LD_temp <- XtX[jk1.remain, jk2.remain]
+      if (identical(ref_label, "X_ref")) {
+        LD_temp <- suppressWarnings({ get_cormat(XtX[, c(jk1.remain, jk2.remain)]) })
+        LD_temp[which(is.na(LD_temp))] <- 0
+        LD_temp <- LD_temp[1, 2]
+      } else {
+        LD_temp <- XtX[jk1.remain, jk2.remain]
+      }
     }
   }
   return(LD_temp)
@@ -528,7 +536,8 @@ check_jk_jkeach <- function(jk, jk_each,
         X = cb_data$data[[X_dict[i]]]$X,
         XtX = cb_data$data[[X_dict[i]]]$XtX,
         N = data_update[[i]]$N,
-        remain_jk = setdiff(1:length(model_update[[i]]$res), data_update[[i]]$variable_miss)
+        remain_jk = setdiff(1:length(model_update[[i]]$res), data_update[[i]]$variable_miss),
+        ref_label = cb_data$data[[X_dict[i]]]$ref_label
       )
       judge[i] <- (change_each <= jk_equiv_loglik) & (abs(LD_temp) >= jk_equiv_corr)
     } else {
@@ -549,7 +558,7 @@ check_pair_jkeach <- function(jk_each,
   #' @importFrom stats cor
   get_LD_jk_each <- function(jk_each,
                              X = NULL, XtX = NULL, N = NULL,
-                             remain_jk = NULL) {
+                             remain_jk = NULL, ref_label = "LD") {
     if (!is.null(X)) {
       LD_temp <- suppressWarnings({
         get_cormat(X[, jk_each])
@@ -557,11 +566,15 @@ check_pair_jkeach <- function(jk_each,
       LD_temp[which(is.na(LD_temp))] <- 0
       # LD_temp <- LD_temp[1, 2]
     } else if (!is.null(XtX)) {
-      if (length(XtX) == 1){
+      if (identical(ref_label, "No_ref")) {
         LD_temp <- matrix(0, length(jk_each), length(jk_each))
       } else {
         jk.remain <- match(jk_each, remain_jk)
-        LD_temp <- XtX[jk.remain, jk.remain]
+        if (identical(ref_label, "X_ref")) {
+          LD_temp <- suppressWarnings({ get_cormat(XtX[, jk.remain]) })
+        } else {
+          LD_temp <- XtX[jk.remain, jk.remain]
+        }
         LD_temp[which(is.na(LD_temp))] <- 0
       }
     }
@@ -582,7 +595,8 @@ check_pair_jkeach <- function(jk_each,
                    X = cb_data$data[[X_dict[idx]]]$X,
                    XtX = cb_data$data[[X_dict[idx]]]$XtX,
                    N = data_update[[idx]]$N,
-                   remain_jk = setdiff(1:length(model_update[[idx]]$res), data_update[[idx]]$variable_miss)
+                   remain_jk = setdiff(1:length(model_update[[idx]]$res), data_update[[idx]]$variable_miss),
+                   ref_label = cb_data$data[[X_dict[idx]]]$ref_label
     )
   })
 
@@ -612,7 +626,9 @@ estimate_change_profile_res <- function(jk,
                                         X = NULL, res = NULL, N = NULL,
                                         XtX = NULL, YtY = NULL, XtY = NULL,
                                         beta_k = NULL,
-                                        miss_idx = NULL) {
+                                        miss_idx = NULL,
+                                        ref_label = "LD",
+                                        XtX_beta_cache = NULL) {
   if (!is.null(X)) {
     rtr <- sum(res^2) / (N - 1)
     xtr <- t(X[, jk]) %*% res / (N - 1)
@@ -629,10 +645,13 @@ estimate_change_profile_res <- function(jk,
     } else {
       xty <- XtY / scaling_factor
     }
-    if (length(xtx) == 1){
+    if (identical(ref_label, "No_ref")) {
       rtr <- yty - 2 * sum(beta_k * xty) + sum(beta_k^2)
+    } else if (!is.null(XtX_beta_cache)) {
+      rtr <- yty - 2 * sum(beta_k * xty) + sum(XtX_beta_cache * beta_k)
     } else {
-      rtr <- yty - 2 * sum(beta_k * xty) + sum((xtx %*% as.matrix(beta_k)) * beta_k)
+      XtX_beta <- compute_XtX_product(xtx, beta_k, ref_label)
+      rtr <- yty - 2 * sum(beta_k * xty) + sum(XtX_beta * beta_k)
     }
   }
   numerator <- xtr^2 / (2 * rtr)
diff --git a/R/colocboost_inference.R b/R/colocboost_inference.R
index ff9f312..5e9bfd5 100644
--- a/R/colocboost_inference.R
+++ b/R/colocboost_inference.R
@@ -190,7 +190,7 @@ get_n_cluster <- function(hc, Sigma, m = ncol(Sigma), min_cluster_corr = 0.8) {
 #' @keywords cb_post_inference
 #' @noRd
 w_purity <- function(weights, X = NULL, Xcorr = NULL, N = NULL, n = 100, coverage = 0.95,
-                     min_abs_corr = 0.5, median_abs_corr = NULL, miss_idx = NULL) {
+                     min_abs_corr = 0.5, median_abs_corr = NULL, miss_idx = NULL, ref_label = "LD") {
   
   tmp_purity <- apply(weights, 2, function(w) {
     pos <- w_cs(w, coverage = coverage)
@@ -198,7 +198,7 @@ w_purity <- function(weights, X = NULL, Xcorr = NULL, N = NULL, n = 100, coverag
     if (!is.null(Xcorr)) {
       pos <- match(pos, setdiff(1:length(w), miss_idx))
     }
-    get_purity(pos, X = X, Xcorr = Xcorr, N = N, n = n)
+    get_purity(pos, X = X, Xcorr = Xcorr, N = N, n = n, ref_label = ref_label)
   })
   if (is.null(median_abs_corr)) {
     is_pure <- which(tmp_purity[1, ] >= min_abs_corr)
@@ -227,7 +227,8 @@ check_null_post <- function(cb_obj,
   }
 
   get_profile <- function(cs_beta, X = NULL, Y = NULL, N = NULL,
-                          XtX = NULL, YtY = NULL, XtY = NULL, miss_idx, adj_dep = 1) {
+                          XtX = NULL, YtY = NULL, XtY = NULL, miss_idx,
+                          ref_label = "LD") {
     if (!is.null(X)) {
       mean((Y - X %*% as.matrix(cs_beta))^2) * N / (N - 1)
     } else if (!is.null(XtY)) {
@@ -242,20 +243,23 @@ check_null_post <- function(cb_obj,
       } else {
         xty <- XtY / scaling_factor
       }
-      if (length(xtx) == 1){
-        (yty - 2 * sum(cs_beta * xty) + sum(cs_beta^2)) * adj_dep
+      if (identical(ref_label, "No_ref")) {
+        (yty - 2 * sum(cs_beta * xty) + sum(cs_beta^2))
       } else {
-        (yty - 2 * sum(cs_beta * xty) + sum((xtx %*% as.matrix(cs_beta)) * cs_beta)) * adj_dep
+        XtX_beta <- compute_XtX_product(xtx, cs_beta, ref_label)
+        (yty - 2 * sum(cs_beta * xty) + sum(XtX_beta * cs_beta))
       }
     }
   }
 
-  get_cs_obj <- function(cs_beta, res, tau, func_simplex, lambda, adj_dep, LD_free,
+  get_cs_obj <- function(cs_beta, res, tau, func_simplex, lambda, LD_free,
                          X = NULL, Y = NULL, N = NULL,
-                         XtX = NULL, YtY = NULL, XtY = NULL, miss_idx = NULL) {
+                         XtX = NULL, YtY = NULL, XtY = NULL, miss_idx = NULL,
+                         ref_label = "LD") {
     correlation <- get_correlation(
       X = X, res = res, XtY = XtY, N = N, YtY = YtY,
-      XtX = XtX, beta_k = cs_beta, miss_idx = miss_idx
+      XtX = XtX, beta_k = cs_beta, miss_idx = miss_idx,
+      ref_label = ref_label
     )
     z <- get_z(correlation, n = N, res)
     abs_cor <- abs(correlation)
@@ -264,12 +268,12 @@ check_null_post <- function(cb_obj,
     P <- length(z)
     ld_jk <- get_LD_jk(jk,
       X = X, XtX = XtX, N = N,
-      remain_idx = setdiff(1:P, miss_idx), P = P
+      remain_idx = setdiff(1:P, miss_idx), P = P, ref_label = ref_label
     )
     ld_feature <- sqrt(abs(ld_jk))
     # - calculate delta
     delta <- boost_KL_delta(
-      z = z, ld_feature = ld_feature, adj_dep = adj_dep, 
+      z = z, ld_feature = ld_feature, 
       func_simplex = func_simplex, lambda = lambda
     )
     scaling_factor <- if (!is.null(N)) (N - 1) else 1
@@ -280,31 +284,32 @@ check_null_post <- function(cb_obj,
     }
     obj_ld <- if (LD_free) ld_feature else rep(1, length(ld_feature))
     obj_ld[miss_idx] <- 0
-    exp_term <- adj_dep * obj_ld * (abs(cov_Xtr))
+    exp_term <- obj_ld * (abs(cov_Xtr))
     return(tau * matrixStats::logSumExp(exp_term / tau + log(delta)))
   }
 
-  update_res <- function(X = NULL, Y = NULL, XtX = NULL, XtY = NULL, N = NULL, cs_beta, miss_idx) {
+  update_res <- function(X = NULL, Y = NULL, XtX = NULL, XtY = NULL, N = NULL, cs_beta, miss_idx, ref_label = "LD") {
     if (!is.null(X)) {
       return(Y - X %*% cs_beta)
     } else if (!is.null(XtX)) {
       scaling.factor <- if (!is.null(N)) N - 1 else 1
       beta_scaling <- if (!is.null(N)) 1 else 100
-      xtx <- XtX / scaling.factor
       if (length(miss_idx) != 0) {
         xty <- XtY[-miss_idx] / scaling.factor
         res.tmp <- rep(0, length(XtY))
-        if (length(xtx) == 1){
+        if (identical(ref_label, "No_ref")) {
           res.tmp[-miss_idx] <- xty - cs_beta[-miss_idx] / beta_scaling
         } else {
-          res.tmp[-miss_idx] <- xty - xtx %*% (cs_beta[-miss_idx] / beta_scaling)
+          XtX_beta <- compute_XtX_product(XtX, cs_beta[-miss_idx] / beta_scaling, ref_label)
+          res.tmp[-miss_idx] <- xty - XtX_beta / scaling.factor
         }
       } else {
         xty <- XtY / scaling.factor
-        if (length(xtx) == 1){
+        if (identical(ref_label, "No_ref")) {
           res.tmp <- xty - (cs_beta / beta_scaling)
         } else {
-          res.tmp <- xty - xtx %*% (cs_beta / beta_scaling)
+          XtX_beta <- compute_XtX_product(XtX, cs_beta / beta_scaling, ref_label)
+          res.tmp <- xty - XtX_beta / scaling.factor
         }
       }
       return(res.tmp)
@@ -324,13 +329,14 @@ check_null_post <- function(cb_obj,
       cs_beta <- cb_obj$cb_model[[j]]$beta
       cs_beta[cs_variants] <- 0
       X_dict <- cb_data$dict[j]
-      adj_dep <- cb_data$data[[j]]$dependency
+      ref_label_j <- cb_data$data[[X_dict]]$ref_label
       if (check_null_method == "profile") {
         cs_profile <- get_profile(cs_beta,
           X = cb_data$data[[X_dict]]$X, Y = cb_data$data[[j]]$Y,
           XtX = cb_data$data[[X_dict]]$XtX, XtY = cb_data$data[[j]]$XtY,
           YtY = cb_data$data[[j]]$YtY, N = cb_data$data[[j]]$N,
-          miss_idx = cb_data$data[[j]]$variable_miss, adj_dep = adj_dep
+          miss_idx = cb_data$data[[j]]$variable_miss,
+          ref_label = ref_label_j
         )
         last_profile <- extract_last(cb_obj$cb_model[[j]]$profile_loglike_each)
         change <- abs(cs_profile - last_profile)
@@ -348,16 +354,17 @@ check_null_post <- function(cb_obj,
           X = cb_data$data[[X_dict]]$X, Y = cb_data$data[[j]]$Y,
           XtX = cb_data$data[[X_dict]]$XtX, XtY = cb_data$data[[j]]$XtY,
           N = cb_data$data[[j]]$N, cs_beta,
-          miss_idx = cb_data$data[[j]]$variable_miss
+          miss_idx = cb_data$data[[j]]$variable_miss,
+          ref_label = ref_label_j
         )
         cs_obj <- get_cs_obj(cs_beta, res, cb_obj$cb_model_para$tau, cb_obj$cb_model_para$func_simplex,
           cb_obj$cb_model_para$lambda,
-          adj_dep = adj_dep,
           LD_free = cb_obj$cb_model_para$LD_free,
           X = cb_data$data[[X_dict]]$X, N = cb_data$data[[j]]$N,
           XtX = cb_data$data[[X_dict]]$XtX, XtY = cb_data$data[[X_dict]]$XtY,
           YtY = cb_data$data[[X_dict]]$YtY,
-          miss_idx = cb_data$data[[j]]$variable_miss
+          miss_idx = cb_data$data[[j]]$variable_miss,
+          ref_label = ref_label_j
         )
         last_obj <- min(cb_obj$cb_model[[j]]$obj_path)
         change <- abs(cs_obj - last_obj)
@@ -401,7 +408,7 @@ check_null_post <- function(cb_obj,
 #' @keywords cb_post_inference
 #' @noRd
 #' @importFrom stats na.omit
-get_purity <- function(pos, X = NULL, Xcorr = NULL, N = NULL, n = 100) {
+get_purity <- function(pos, X = NULL, Xcorr = NULL, N = NULL, n = 100, ref_label = "LD") {
   get_upper_tri <- Rfast::upper_tri
   get_median <- Rfast::med
 
@@ -420,16 +427,17 @@ get_purity <- function(pos, X = NULL, Xcorr = NULL, N = NULL, n = 100) {
     if (is.null(Xcorr)) {
       X_sub <- X[, pos]
       X_sub <- as.matrix(X_sub)
-      corr <- suppressWarnings({
-        get_cormat(X_sub)
-      })
+      corr <- suppressWarnings({ get_cormat(X_sub) })
       corr[which(is.na(corr))] <- 0
       value <- abs(get_upper_tri(corr))
     } else {
-      if (length(Xcorr) == 1){
+      if (identical(ref_label, "No_ref") || length(Xcorr) == 1) {
         value <- 0
+      } else if (identical(ref_label, "X_ref")) {
+        corr <- suppressWarnings({ get_cormat(Xcorr[, pos]) })
+        corr[which(is.na(corr))] <- 0
+        value <- abs(get_upper_tri(corr))
       } else {
-        Xcorr <- Xcorr # if (!is.null(N)) Xcorr/(N-1) else Xcorr
         value <- abs(get_upper_tri(Xcorr[pos, pos]))
       }
     }
@@ -445,7 +453,7 @@ get_purity <- function(pos, X = NULL, Xcorr = NULL, N = NULL, n = 100) {
 #' @keywords cb_post_inference
 #' @noRd
 #' @importFrom stats na.omit
-get_between_purity <- function(pos1, pos2, X = NULL, Xcorr = NULL, miss_idx = NULL, P = NULL) {
+get_between_purity <- function(pos1, pos2, X = NULL, Xcorr = NULL, miss_idx = NULL, P = NULL, ref_label = "LD") {
   get_matrix_mult <- function(X_sub1, X_sub2) {
     X_sub1 <- t(X_sub1)
     X_sub2 <- t(X_sub2)
@@ -465,15 +473,21 @@ get_between_purity <- function(pos1, pos2, X = NULL, Xcorr = NULL, miss_idx = NU
     X_sub2 <- scale(X[, pos2, drop = FALSE], center = T, scale = F)
     value <- abs(get_matrix_mult(X_sub1, X_sub2))
   } else {
-    if (sum(Xcorr)==1){
+    if (identical(ref_label, "No_ref") || sum(Xcorr) == 1) {
       value <- 0
     } else {
-      if (length(miss_idx)!=0){
+      if (length(miss_idx) != 0) {
         pos1 <- na.omit(match(pos1, setdiff(1:P, miss_idx)))
         pos2 <- na.omit(match(pos2, setdiff(1:P, miss_idx)))
       }
       if (length(pos1) != 0 & length(pos2) != 0) {
-        value <- abs(Xcorr[pos1, pos2])
+        if (identical(ref_label, "X_ref")) {
+          X_sub1 <- scale(Xcorr[, pos1, drop = FALSE], center = T, scale = F)
+          X_sub2 <- scale(Xcorr[, pos2, drop = FALSE], center = T, scale = F)
+          value <- abs(get_matrix_mult(X_sub1, X_sub2))
+        } else {
+          value <- abs(Xcorr[pos1, pos2])
+        }
       } else {
         value <- 0
       }
@@ -488,17 +502,20 @@ get_between_purity <- function(pos1, pos2, X = NULL, Xcorr = NULL, miss_idx = NU
 #' @importFrom stats var
 #' @importFrom utils tail
 get_cos_evidence <- function(cb_obj, coloc_out, data_info) {
-  get_cos_config <- function(w, config_idx, weight_fudge_factor = 1.5, coverage = 0.95) {
+  get_cos_config <- function(w, config_idx, weight_fudge_factor = 1.5, use_entropy = FALSE, 
+                             residual_correlation = NULL, coverage = 0.95) {
     w_outcome <- colnames(w)
     config_outcome <- paste0("outcome", config_idx)
     pos <- which(w_outcome %in% config_outcome)
     w_config <- w[, pos, drop = FALSE]
-    int_w <- get_integrated_weight(w_config, weight_fudge_factor = weight_fudge_factor)
+    int_w <- get_integrated_weight(w_config, weight_fudge_factor = weight_fudge_factor, 
+                                   use_entropy = use_entropy, residual_correlation = residual_correlation)
     unlist(get_in_cos(int_w, coverage = coverage))
   }
 
   get_cos_profile <- function(cs_beta, outcome_idx, X = NULL, Y = NULL, N = NULL,
-                              XtX = NULL, YtY = NULL, XtY = NULL, miss_idx = NULL, adj_dep = 1) {
+                              XtX = NULL, YtY = NULL, XtY = NULL, miss_idx = NULL,
+                              ref_label = "LD") {
     if (!is.null(X)) {
       cos_profile <- mean((Y - X %*% as.matrix(cs_beta))^2) * N / (N - 1)
       yty <- var(Y)
@@ -514,10 +531,11 @@ get_cos_evidence <- function(cb_obj, coloc_out, data_info) {
       } else {
         xty <- XtY / scaling_factor
       }
-      if (length(xtx) == 1){
-        cos_profile <- (yty - 2 * sum(cs_beta * xty) + sum(cs_beta^2)) * adj_dep
+      if (identical(ref_label, "No_ref")) {
+        cos_profile <- (yty - 2 * sum(cs_beta * xty) + sum(cs_beta^2))
       } else {
-        cos_profile <- (yty - 2 * sum(cs_beta * xty) + sum((xtx %*% as.matrix(cs_beta)) * cs_beta)) * adj_dep
+        XtX_beta <- compute_XtX_product(xtx, cs_beta, ref_label)
+        cos_profile <- (yty - 2 * sum(cs_beta * xty) + sum(XtX_beta * cs_beta))
       }
     }
     delta <- yty - cos_profile
@@ -543,7 +561,7 @@ get_cos_evidence <- function(cb_obj, coloc_out, data_info) {
       XtX = cb_data$data[[X_dict]]$XtX, XtY = cb_data$data[[outcome_idx]]$XtY,
       YtY = cb_data$data[[outcome_idx]]$YtY, N = cb_data$data[[outcome_idx]]$N,
       miss_idx = cb_data$data[[outcome_idx]]$variable_miss,
-      adj_dep = cb_data$data[[outcome_idx]]$dependency
+      ref_label = cb_data$data[[X_dict]]$ref_label
     )
     max_profile <- max(cb_obj$cb_model[[outcome_idx]]$profile_loglike_each)
     ifelse(max_profile < cos_profile, 0, max_profile - cos_profile)
@@ -584,7 +602,10 @@ get_cos_evidence <- function(cb_obj, coloc_out, data_info) {
     w <- avWeight[[i]]
     outcomes <- coloc_out$coloc_outcomes[[i]]
     # most likely cos
-    cos <- get_cos_config(w, outcomes, weight_fudge_factor = cb_obj$cb_model_para$weight_fudge_factor, coverage = cb_obj$cb_model_para$coverage)
+    cos <- get_cos_config(w, outcomes, weight_fudge_factor = cb_obj$cb_model_para$weight_fudge_factor, 
+                          use_entropy = cb_obj$cb_model_para$use_entropy,
+                          residual_correlation = cb_obj$cb_model_para$residual_correlation,
+                          coverage = cb_obj$cb_model_para$coverage)
     profile_change_outcome <- sapply(outcomes, function(outcome_idx) {
       get_outcome_profile_change(outcome_idx, cos, cb_obj, check_null_max)
     })
@@ -612,7 +633,8 @@ get_cos_evidence <- function(cb_obj, coloc_out, data_info) {
 get_ucos_evidence <- function(cb_obj, ucoloc_info) {
     
   get_ucos_profile <- function(cs_beta, outcome_idx, X = NULL, Y = NULL, N = NULL,
-                              XtX = NULL, YtY = NULL, XtY = NULL, miss_idx = NULL, adj_dep = 1) {
+                              XtX = NULL, YtY = NULL, XtY = NULL, miss_idx = NULL,
+                              ref_label = "LD") {
     if (!is.null(X)) {
       cos_profile <- mean((Y - X %*% as.matrix(cs_beta))^2) * N / (N - 1)
       yty <- var(Y)
@@ -628,10 +650,11 @@ get_ucos_evidence <- function(cb_obj, ucoloc_info) {
       } else {
         xty <- XtY / scaling_factor
       }
-      if (length(xtx) == 1){
-        cos_profile <- (yty - 2 * sum(cs_beta * xty) + sum(cs_beta^2)) * adj_dep
+      if (identical(ref_label, "No_ref")) {
+        cos_profile <- (yty - 2 * sum(cs_beta * xty) + sum(cs_beta^2)) 
       } else {
-        cos_profile <- (yty - 2 * sum(cs_beta * xty) + sum((xtx %*% as.matrix(cs_beta)) * cs_beta)) * adj_dep
+        XtX_beta <- compute_XtX_product(xtx, cs_beta, ref_label)
+        cos_profile <- (yty - 2 * sum(cs_beta * xty) + sum(XtX_beta * cs_beta))
       }
     }
     delta <- yty - cos_profile
@@ -657,7 +680,7 @@ get_ucos_evidence <- function(cb_obj, ucoloc_info) {
       XtX = cb_data$data[[X_dict]]$XtX, XtY = cb_data$data[[outcome_idx]]$XtY,
       YtY = cb_data$data[[outcome_idx]]$YtY, N = cb_data$data[[outcome_idx]]$N,
       miss_idx = cb_data$data[[outcome_idx]]$variable_miss,
-      adj_dep = cb_data$data[[outcome_idx]]$dependency
+      ref_label = cb_data$data[[X_dict]]$ref_label
     )
     max_profile <- max(cb_obj$cb_model[[outcome_idx]]$profile_loglike_each)
     ifelse(max_profile < cos_profile, 0, max_profile - cos_profile)
diff --git a/R/colocboost_init.R b/R/colocboost_init.R
index 4fa8815..077a217 100644
--- a/R/colocboost_init.R
+++ b/R/colocboost_init.R
@@ -28,14 +28,14 @@ colocboost_inits <- function() {
 #' @noRd
 #' @keywords cb_objects
 colocboost_init_data <- function(X, Y, dict_YX,
-                                 Z, LD, N_sumstat, dict_sumstatLD,
+                                 Z, LD, X_ref, ref_label,
+                                 N_sumstat, dict_sumstatLD,
                                  Var_y, SeBhat,
                                  keep_variables,
                                  focal_outcome_idx = NULL,
                                  focal_outcome_variables = TRUE,
                                  overlap_variables = FALSE,
-                                 intercept = TRUE, standardize = TRUE,
-                                 residual_correlation = NULL) {
+                                 intercept = TRUE, standardize = TRUE) {
   #################  initialization #######################################
   cb_data <- list("data" = list())
   class(cb_data) <- "colocboost"
@@ -98,6 +98,7 @@ colocboost_init_data <- function(X, Y, dict_YX,
     )
     for (i in 1:length(Y)) {
       cb_data$data[[flag]] <- ind_formated$result[[i]]
+      cb_data$data[[flag]]$ref_label <- "individual"
       names(cb_data$data)[flag] <- paste0("ind_outcome_", i)
       flag <- flag + 1
     }
@@ -105,39 +106,25 @@ colocboost_init_data <- function(X, Y, dict_YX,
   } 
   n_ind <- flag - 1
   # if summary: XtX XtY, YtY
-  if (!is.null(Z) & !is.null(LD)) {
+  if (!is.null(Z)) {
     ####################### need to consider more #########################
     # ------ only code up one sumstat
     variant_lists <- keep_variables[c((n_ind_variable+1):length(keep_variables))]
     sumstat_formated <- process_sumstat(
-      Z, N_sumstat, Var_y, SeBhat, LD,
+      Z, N_sumstat, Var_y, SeBhat,
+      ld_matrices  =  if (ref_label == "X_ref") X_ref else LD,
       variant_lists, dict_sumstatLD,
-      keep_variable_names
+      keep_variable_names,
+      ref_label = ref_label
     )
     for (i in 1:length(Z)) {
       cb_data$data[[flag]] <- sumstat_formated$results[[i]]
+      cb_data$data[[flag]]$ref_label <- ref_label
       names(cb_data$data)[flag] <- paste0("sumstat_outcome_", i)
       flag <- flag + 1
     }
     cb_data$dict <- c(cb_data$dict, sumstat_formated$unified_dict + n_ind)
   }
-  # - if residual correlation matrix is not NULL, we need to adjust the study dependence
-  if (is.null(residual_correlation)) {
-    for (i in 1:length(cb_data$data)) {
-      cb_data$data[[i]]$dependency <- 1
-    }
-  } else {
-    pseudo_inverse <- function(residual_correlation) {
-      eigen_Sigma <- eigen(residual_correlation)
-      L <- which(cumsum(eigen_Sigma$values) / sum(eigen_Sigma$values) > 0.999)[1]
-      return(eigen_Sigma$vectors[, 1:L] %*% diag(1 / eigen_Sigma$values[1:L]) %*% t(eigen_Sigma$vectors[, 1:L]))
-    }
-    Theta <- pseudo_inverse(residual_correlation)
-    for (i in 1:length(cb_data$data)) {
-      cb_data$data[[i]]$dependency <- Theta[i, i]
-    }
-  }
-
 
   return(cb_data)
 }
@@ -187,7 +174,7 @@ colocboost_init_model <- function(cb_data,
       YtY = data_each$YtY, XtY = data_each$XtY
     )
     # - initial profile loglikelihood
-    tmp$profile_loglike_each <- estimate_profile_loglike(Y = data_each[["Y"]], N = data_each$N, YtY = data_each$YtY) * data_each$dependency
+    tmp$profile_loglike_each <- estimate_profile_loglike(Y = data_each[["Y"]], N = data_each$N, YtY = data_each$YtY)
     # - initial residual: res for ind; xtr for sumstat
     tmp$res <- inital_residual(Y = data_each[["Y"]], XtY = data_each$XtY)
     # - initial correlation between X and residual
@@ -196,7 +183,8 @@ colocboost_init_model <- function(cb_data,
       N = data_each$N, YtY = data_each$YtY,
       XtX = cb_data$data[[X_dict]]$XtX,
       beta_k = tmp$beta,
-      miss_idx = data_each$variable_miss
+      miss_idx = data_each$variable_miss,
+      ref_label = cb_data$data[[X_dict]]$ref_label
     )
     # - initial z-score between X and residual based on correlation
     tmp$z <- get_z(tmp$correlation, n = data_each$N, tmp$res)
@@ -378,7 +366,8 @@ inital_residual <- function(Y = NULL, XtY = NULL) {
 # - Calculate correlation between X and res
 get_correlation <- function(X = NULL, res = NULL, XtY = NULL, N = NULL,
                             YtY = NULL, XtX = NULL, beta_k = NULL, miss_idx = NULL,
-                            XtX_beta_cache = NULL) {
+                            XtX_beta_cache = NULL,
+                            ref_label = "LD") {
   if (!is.null(X)) {
     corr <- suppressWarnings({
       Rfast::correls(res, X)[, "correlation"]
@@ -400,12 +389,13 @@ get_correlation <- function(X = NULL, res = NULL, XtY = NULL, N = NULL,
       Xtr <- res / scaling_factor
       XtY <- XtY / scaling_factor
     }
-    if (length(XtX) == 1){
+    if (identical(ref_label, "No_ref")) {
       var_r <- YtY - 2 * sum(beta_k * XtY) + sum(beta_k^2)
     } else if (!is.null(XtX_beta_cache)) {
       var_r <- YtY - 2 * sum(beta_k * XtY) + sum(XtX_beta_cache * beta_k)
     } else {
-      var_r <- YtY - 2 * sum(beta_k * XtY) + sum((XtX %*% as.matrix(beta_k)) * beta_k)
+      XtX_beta_val <- compute_XtX_product(XtX, beta_k, ref_label)
+      var_r <- YtY - 2 * sum(beta_k * XtY) + sum(XtX_beta_val * beta_k)
     }
     if (var_r > 1e-6) {
       corr_nomiss <- Xtr / sqrt(var_r)
@@ -561,7 +551,10 @@ get_multiple_testing_correction <- function(z, miss_idx = NULL, func_multi_test
 #'
 #' @return List containing processed data with optimized LD submatrix storage
 #' @noRd
-process_sumstat <- function(Z, N, Var_y, SeBhat, ld_matrices, variant_lists, dict, target_variants) {
+process_sumstat <- function(Z, N, Var_y, SeBhat, 
+                            ld_matrices, variant_lists, dict, 
+                            target_variants,
+                            ref_label = "LD") {
   
   
   # Step 1: Identify unique combinations of (variant list, LD matrix)
@@ -582,7 +575,6 @@ process_sumstat <- function(Z, N, Var_y, SeBhat, ld_matrices, variant_lists, dic
           break
         }
       }
-
       if (!is_duplicate) {
         # If not a duplicate, assign its exact index
         unified_dict[i] <- i
@@ -606,14 +598,12 @@ process_sumstat <- function(Z, N, Var_y, SeBhat, ld_matrices, variant_lists, dic
     current_variants <- variant_lists[[i]]
     current_z <- Z[[i]]
     current_n <- N[[i]]
-
     # Get corresponding LD matrix from original dictionary mapping
     ld_index <- dict[i]
-    current_ld_matrix <- ld_matrices[[ld_index]]
+    current_ref <- ld_matrices[[ld_index]]
 
     # Find common variants between current list and target variants
     common_variants <- intersect(current_variants, target_variants)
-
     # Find variants in target but not in current list
     missing_variants <- setdiff(target_variants, current_variants)
     tmp$variable_miss <- which(target_variants %in% missing_variants)
@@ -625,51 +615,76 @@ process_sumstat <- function(Z, N, Var_y, SeBhat, ld_matrices, variant_lists, dic
     Z_extend[pos_target] <- current_z[pos_z]
 
     # Calculate submatrix for each unique entry (not duplicates)
-    if (length(current_ld_matrix) == 1){
-      ld_submatrix <- current_ld_matrix
+    ref_submatrix <- NULL
+    
+    if (ref_label == "No_ref"){
+      
+      ref_submatrix <- current_ref
+      
+    } else if (ref_label == "X_ref") {
+      
+      # match by column names only (rows = samples)
+      if (length(common_variants) > 0) {
+        if (i == unified_dict[i]) {
+          col_idx <- match(common_variants, colnames(current_ref))
+          col_idx <- col_idx[!is.na(col_idx)]
+          if (length(col_idx) > 0) {
+            ref_submatrix <- current_ref[, col_idx, drop = FALSE]
+            colnames(ref_submatrix) <- common_variants
+          }
+        }
+      }
+      
     } else {
-      ld_submatrix <- NULL
+      
+      # ref_label == "LD": match by both row and column names
       if (length(common_variants) > 0) {
-        # Only include the submatrix if this entry is unique or is the first occurrence
         if (i == unified_dict[i]) {
-          # Check if common_variants and rownames have identical order
-          if (identical(common_variants, rownames(current_ld_matrix))) {
-            # If order is identical, use the matrix directly without reordering
-            ld_submatrix <- current_ld_matrix
+          if (identical(common_variants, rownames(current_ref))) {
+            # Order is identical, use matrix directly without reordering
+            ref_submatrix <- current_ref
           } else {
-            # If order is different, reorder using matched indices
-            matched_indices <- match(common_variants, rownames(current_ld_matrix))
-            ld_submatrix <- current_ld_matrix[matched_indices, matched_indices, drop = FALSE]
-            rownames(ld_submatrix) <- common_variants
-            colnames(ld_submatrix) <- common_variants
+            # Reorder to match common_variants order
+            matched_indices <- match(common_variants, rownames(current_ref))
+            ref_submatrix <- current_ref[matched_indices, matched_indices, drop = FALSE]
+            rownames(ref_submatrix) <- common_variants
+            colnames(ref_submatrix) <- common_variants
           }
         }
       }
+      
     }
 
     # Organize data
     if (is.null(current_n)) {
-      tmp$XtX <- ld_submatrix
+      tmp$XtX <- ref_submatrix
       tmp$XtY <- Z_extend
       tmp$YtY <- 1
     } else {
       if (!is.null(SeBhat[[i]]) & !is.null(Var_y[[i]])) {
+        
         # var_y, shat (and bhat) are provided, so the effects are on the
         # *original scale*.
         adj <- 1 / (Z_extend^2 + current_n - 2)
-        if (!is.null(ld_submatrix)) {
+        tmp$YtY <- (current_n - 1) * Var_y[[i]]
+        tmp$XtY <- Z_extend * sqrt(adj) * Var_y[[i]] / SeBhat[[i]]
+        if (ref_label == "X_ref") {
+          # Store X_ref slice directly; XtX computed on-the-fly downstream
+          tmp$XtX <- ref_submatrix
+        } else {
+          # LD or No_ref: scale to original scale
           XtXdiag <- Var_y[[i]] * adj / (SeBhat[[i]]^2)
-          xtx <- t(ld_submatrix * sqrt(XtXdiag)) * sqrt(XtXdiag)
+          xtx <- t(ref_submatrix * sqrt(XtXdiag)) * sqrt(XtXdiag)
           tmp$XtX <- (xtx + t(xtx)) / 2
         }
-        tmp$YtY <- (current_n - 1) * Var_y[[i]]
-        tmp$XtY <- Z_extend * sqrt(adj) * Var_y[[i]] / SeBhat[[i]]
+        
       } else {
-        if (!is.null(ld_submatrix)) {
-          tmp$XtX <- ld_submatrix
-        }
+        # Standardised scale
         tmp$YtY <- (current_n - 1)
         tmp$XtY <- sqrt(current_n - 1) * Z_extend
+        if (!is.null(ref_submatrix)) {
+          tmp$XtX <- ref_submatrix   # LD, X_ref, or scalar 1 stored as-is
+        }
       }
     }
 
diff --git a/R/colocboost_one_causal.R b/R/colocboost_one_causal.R
index 09692d5..938a7cd 100644
--- a/R/colocboost_one_causal.R
+++ b/R/colocboost_one_causal.R
@@ -121,11 +121,10 @@ boost_check_update_jk_one_causal <- function(cb_model, cb_model_para, cb_data) {
     # -- extract data and model based on pos.update
     model_update <- cb_model[pos.update]
     X_dict <- cb_data$dict[pos.update]
-    adj_dep <- sapply(pos.update, function(ii) cb_data$data[[ii]]$dependency)
 
     # -- define jk and jk_each
     cor_vals_each <- do.call(cbind, lapply(model_update, function(cc) cc$correlation))
-    abs_cor_vals_each <- sweep(abs(cor_vals_each), 2, adj_dep, `*`)
+    abs_cor_vals_each <- abs(cor_vals_each)
     jk_each <- apply(abs_cor_vals_each, 2, function(temp) {
       jk_temp <- which(temp == max(temp))
       return(ifelse(length(jk_temp) == 1, jk_temp, sample(jk_temp, 1)))
@@ -213,9 +212,8 @@ colocboost_diagLD <- function(cb_model, cb_model_para, cb_data) {
     pos.update <- which(cb_model_para$update_y == 1)
     model_update <- cb_model[pos.update]
     X_dict <- cb_data$dict[pos.update]
-    adj_dep <- sapply(pos.update, function(ii) cb_data$data[[ii]]$dependency)
     cor_vals_each <- do.call(cbind, lapply(model_update, function(cc) cc$correlation))
-    abs_cor_vals_each <- sweep(abs(cor_vals_each), 2, adj_dep, `*`)
+    abs_cor_vals_each <- abs(cor_vals_each)
     jk_each <- apply(abs_cor_vals_each, 2, function(temp) {
       jk_temp <- which(temp == max(temp))
       return(ifelse(length(jk_temp) == 1, jk_temp, sample(jk_temp, 1)))
diff --git a/R/colocboost_output.R b/R/colocboost_output.R
index 9502ade..09b3323 100644
--- a/R/colocboost_output.R
+++ b/R/colocboost_output.R
@@ -125,6 +125,8 @@ get_colocboost_summary <- function(cb_output,
 #' @param npc_outcome_cutoff Minimum threshold of normalized probability of colocalized traits in each CoS.
 #' @param pvalue_cutoff Maximum threshold of marginal p-values of colocalized variants on colocalized traits in each CoS.
 #' @param weight_fudge_factor The strength to integrate weight from different outcomes, default is 1.5
+#' @param use_entropy A logic variable to consider the heterogeneity of traits for a single-effect.
+#' @param residual_correlation The residual correlation based on the sample overlap, it is diagonal if it is NULL.
 #' @param coverage A number between 0 and 1 specifying the \dQuote{coverage} of the estimated colocalization confidence sets (CoS) (default is 0.95).
 #'
 #' @return A \code{"colocboost"} object with some or all of the following elements:
@@ -170,6 +172,8 @@ get_robust_colocalization <- function(cb_output,
                                       npc_outcome_cutoff = 0.2,
                                       pvalue_cutoff = NULL,
                                       weight_fudge_factor = 1.5,
+                                      use_entropy = FALSE,
+                                      residual_correlation = NULL,
                                       coverage = 0.95) {
   if (!inherits(cb_output, "colocboost")) {
     stop("Input must from colocboost object!")
@@ -311,11 +315,14 @@ get_robust_colocalization <- function(cb_output,
     w_outcome <- colnames(w)
     config_outcome <- paste0("outcome", config_idx)
     pos <- which(w_outcome %in% config_outcome)
-    w[, pos, drop = FALSE]
+    ww = w[, pos, drop = FALSE]
+    colnames(ww) <- gsub("outcome", "Y", colnames(ww))
+    ww
   })
-  cos_details$cos_weights <- cos_weights
-  int_weight <- lapply(cos_weights, get_integrated_weight, weight_fudge_factor = weight_fudge_factor)
+  int_weight <- lapply(cos_weights, get_integrated_weight, weight_fudge_factor = weight_fudge_factor, 
+                       use_entropy = use_entropy, residual_correlation = residual_correlation)
   names(int_weight) <- names(cos_weights) <- colocset_names
+  cos_details$cos_weights <- cos_weights
   vcp <- as.vector(1 - apply(1 - do.call(cbind, int_weight), 1, prod))
   names(vcp) <- cb_output$data_info$variables
   cb_output$vcp <- vcp
@@ -1168,8 +1175,49 @@ get_cos <- function(cb_output, coverage = 0.95, X = NULL, Xcorr = NULL, n_purity
 #' Get integrated weight from different outcomes
 #' @keywords cb_get_functions
 #' @noRd
-get_integrated_weight <- function(weights, weight_fudge_factor = 1.5) {
-  av <- apply(weights, 1, function(w) prod(w^(weight_fudge_factor / ncol(weights))))
+get_integrated_weight <- function(weights, weight_fudge_factor = 1.5, 
+                                  use_entropy = FALSE,
+                                  residual_correlation = NULL) {
+  
+  # Collapse duplicate outcome columns by geometric mean (only if RE is given
+  # and colnames are parseable as "outcomeN" or "YN").
+  if (!is.null(residual_correlation) && !is.null(colnames(weights))) {
+    idx <- as.integer(sub("^(outcome|Y)", "", colnames(weights)))
+    if (anyDuplicated(idx)) {
+      uniq <- unique(idx)
+      weights <- sapply(uniq, function(k) {
+        cols <- which(idx == k)
+        if (length(cols) == 1) weights[, cols]
+        else exp(rowMeans(log(pmax(weights[, cols, drop = FALSE], 1e-300))))
+      })
+      colnames(weights) <- paste0("outcome", uniq)
+      idx <- uniq
+    }
+  }
+  
+  L <- ncol(weights)
+  
+  if (is.null(residual_correlation)){
+    if (use_entropy){
+      h <- apply(weights, 2, function(w) { w <- w[w > 0]; -sum(w * log(w)) })
+      h <- h + mean(h)
+      alpha <- h / sum(h)
+    } else {
+      alpha <- rep(1 / L, L)
+    }
+  } else {
+    idx <- as.integer(sub("^(outcome|Y)", "", colnames(weights)))
+    RE_inv <- solve(residual_correlation[idx, idx, drop = FALSE])
+    v <- if (use_entropy) {
+      h <- apply(weights, 2, function(w) { w <- w[w > 0]; -sum(w * log(w)) })
+      h + mean(h)                    # same soft floor
+    } else {
+      rep(1, L)
+    }
+    alpha <- as.numeric(RE_inv %*% v)
+    alpha <- alpha / sum(alpha)
+  }
+  av <- apply(weights, 1, function(w) prod(w^(weight_fudge_factor * alpha)))
   return(av / sum(av))
 }
 
@@ -1287,78 +1335,87 @@ get_cos_purity <- function(cos, X = NULL, Xcorr = NULL, n_purity = 100) {
 #' @keywords cb_get_functions
 #' @noRd
 merge_ucos_details <- function(ucos_details, ucos_from_cos) {
-  
-  get_cos_ucos_purity <- function(from_ucos, from_cos){
-    if (is.null(from_cos)){
-      return(from_ucos)
-    } else {
-      cos <- intersect(rownames(from_ucos), rownames(from_cos))
-      tmp_from_ucos <- from_ucos[match(cos, rownames(from_ucos)), , drop = FALSE]
-      tmp_from_cos <- from_cos[match(cos, rownames(from_cos)), , drop = FALSE]
-      cbind(tmp_from_ucos, tmp_from_cos)
-    }
-  }
-  
-  get_ucos_purity <- function(from_ucos, from_cos, cross_from_ucos, cross_from_cos) {
-    
-    from_ucos = ucos_details$ucos_purity$min_abs_cor
-    from_cos = ucos_from_cos$ucos_purity$min_abs_cor
-    cross_from_ucos = ucos_details$cos_ucos_purity$min_abs_cor
-    cross_from_cos = ucos_from_cos$cos_ucos_purity$min_abs_cor
-    
-    
-    for (id in unique(sub(":.*", "", rownames(from_cos)))) {
-      old <- grep(paste0("^", id, ":"), rownames(cross_from_ucos), value = TRUE)[1]
-      new <- grep(paste0("^", id, ":"), rownames(from_cos), value = TRUE)[1]
-      if (!is.na(old) && !is.na(new)) {
-        rownames(cross_from_ucos) <- sub(old, new, rownames(cross_from_ucos), fixed = TRUE)
-        colnames(cross_from_ucos) <- sub(old, new, colnames(cross_from_ucos), fixed = TRUE)
-        if (!is.null(cross_from_cos)){
-          rownames(cross_from_cos) <- sub(old, new, rownames(cross_from_cos), fixed = TRUE)
-          colnames(cross_from_cos) <- sub(old, new, colnames(cross_from_cos), fixed = TRUE)
+
+  # Build a merged purity matrix from up to 5 sources, dimensioned by the
+  # union of uCoS names (NOT by rownames of the inputs).
+    build_merged <- function(stat) {
+        a <- ucos_details$ucos_purity[[stat]]
+        b <- ucos_from_cos$ucos_purity[[stat]]
+        
+        nm_a <- names(ucos_details$ucos$ucos_index)
+        nm_b <- names(ucos_from_cos$ucos$ucos_index)
+        all_ucos <- c(nm_a, nm_b)
+        n <- length(all_ucos)
+        out <- matrix(NA_real_, n, n, dimnames = list(all_ucos, all_ucos))
+        
+        # within-original block
+        if (!is.null(a) && length(nm_a)) {
+            ra <- intersect(nm_a, rownames(a))
+            if (length(ra)) out[ra, ra] <- a[ra, ra, drop = FALSE]
         }
-      }
+        # within-demoted block
+        if (!is.null(b) && length(nm_b)) {
+            rb <- intersect(nm_b, rownames(b))
+            if (length(rb)) out[rb, rb] <- b[rb, rb, drop = FALSE]
+        }
+        # cross block: 0 (no LD info available at this stage)
+        if (length(nm_a) && length(nm_b)) {
+            out[nm_a, nm_b] <- 0
+            out[nm_b, nm_a] <- 0
+        }
+        out
     }
-    all_ucos <- c(rownames(from_ucos), rownames(from_cos))
-    n <- length(all_ucos)
-    result <- matrix(NA_real_, n, n, dimnames = list(all_ucos, all_ucos))
-    mats <- list(from_ucos, from_cos, cross_from_ucos, cross_from_cos)
-    for (i in 1:n) {
-      for (j in 1:n) {
-        for (m in mats) {
-          if (all_ucos[i] %in% rownames(m) && all_ucos[j] %in% colnames(m)) {
-            result[i, j] <- result[j, i] <- m[all_ucos[i], all_ucos[j]]
-            break
-          }
+
+  # cos_ucos_purity for the merged object: stack remaining-CoS rows against
+  # both old and new uCoS columns.
+    build_cos_ucos <- function(stat) {
+        old <- ucos_details$cos_ucos_purity[[stat]]
+        new <- ucos_from_cos$cos_ucos_purity[[stat]]
+        if (is.null(old) && is.null(new)) return(NULL)
+        if (is.null(old)) return(new)
+        if (is.null(new)) return(old)
+        
+        # Both are CoS-rows x uCoS-cols. Align by union of CoS rownames, pad with NA.
+        rn_old <- rownames(old); rn_new <- rownames(new)
+        if (is.null(rn_old)) rn_old <- paste0("cos_old_", seq_len(nrow(old)))
+        if (is.null(rn_new)) rn_new <- paste0("cos_new_", seq_len(nrow(new)))
+        rownames(old) <- rn_old
+        rownames(new) <- rn_new
+        
+        all_rows <- union(rn_old, rn_new)
+        
+        pad <- function(m, ncol_target, colnames_target) {
+            out <- matrix(NA_real_, nrow = length(all_rows), ncol = ncol_target,
+                          dimnames = list(all_rows, colnames_target))
+            out[rownames(m), ] <- m
+            out
         }
-      }
+        
+        old_p <- pad(old, ncol(old), colnames(old))
+        new_p <- pad(new, ncol(new), colnames(new))
+        cbind(old_p, new_p)
     }
-    result
-  }
-  
+
   list(
     "ucos" = list(
-      "ucos_index" = c(ucos_details$ucos$ucos_index, ucos_from_cos$ucos$ucos_index),
+      "ucos_index"     = c(ucos_details$ucos$ucos_index,     ucos_from_cos$ucos$ucos_index),
       "ucos_variables" = c(ucos_details$ucos$ucos_variables, ucos_from_cos$ucos$ucos_variables)
     ),
     "ucos_outcomes" = list(
       "outcome_index" = c(ucos_details$ucos_outcomes$outcome_index, ucos_from_cos$ucos_outcomes$outcome_index),
-      "outcome_name" = c(ucos_details$ucos_outcomes$outcome_name, ucos_from_cos$ucos_outcomes$outcome_name)
+      "outcome_name"  = c(ucos_details$ucos_outcomes$outcome_name,  ucos_from_cos$ucos_outcomes$outcome_name)
     ),
-    "ucos_weight" = c(ucos_details$ucos_weight, ucos_from_cos$ucos_weight),
+    "ucos_weight"        = c(ucos_details$ucos_weight, ucos_from_cos$ucos_weight),
     "ucos_top_variables" = rbind(ucos_details$ucos_top_variables, ucos_from_cos$ucos_top_variables),
     "ucos_purity" = list(
-      "min_abs_cor" = get_ucos_purity(ucos_details$ucos_purity$min_abs_cor, ucos_from_cos$ucos_purity$min_abs_cor, 
-                                      ucos_details$cos_ucos_purity$min_abs_cor, ucos_from_cos$cos_ucos_purity$min_abs_cor),
-      "median_abs_cor" = get_ucos_purity(ucos_details$ucos_purity$median_abs_cor, ucos_from_cos$ucos_purity$median_abs_cor, 
-                                         ucos_details$cos_ucos_purity$median_abs_cor, ucos_from_cos$cos_ucos_purity$median_abs_cor),
-      "max_abs_cor" = get_ucos_purity(ucos_details$ucos_purity$max_abs_cor, ucos_from_cos$ucos_purity$max_abs_cor, 
-                                      ucos_details$cos_ucos_purity$max_abs_cor, ucos_from_cos$cos_ucos_purity$max_abs_cor)
+      "min_abs_cor"    = build_merged("min_abs_cor"),
+      "median_abs_cor" = build_merged("median_abs_cor"),
+      "max_abs_cor"    = build_merged("max_abs_cor")
     ),
     "cos_ucos_purity" = list(
-      "min_abs_cor" = get_cos_ucos_purity(ucos_from_cos$cos_ucos_purity$min_abs_cor, ucos_details$cos_ucos_purity$min_abs_cor),
-      "median_abs_cor" = get_cos_ucos_purity(ucos_from_cos$cos_ucos_purity$median_abs_cor, ucos_details$cos_ucos_purity$median_abs_cor),
-      "max_abs_cor" = get_cos_ucos_purity(ucos_from_cos$cos_ucos_purity$max_abs_cor, ucos_details$cos_ucos_purity$max_abs_cor)
+      "min_abs_cor"    = build_cos_ucos("min_abs_cor"),
+      "median_abs_cor" = build_cos_ucos("median_abs_cor"),
+      "max_abs_cor"    = build_cos_ucos("max_abs_cor")
     ),
     "ucos_outcomes_npc" = rbind(ucos_details$ucos_outcomes_npc, ucos_from_cos$ucos_outcomes_npc)
   )
diff --git a/R/colocboost_update.R b/R/colocboost_update.R
index 0e50aba..7d51bce 100644
--- a/R/colocboost_update.R
+++ b/R/colocboost_update.R
@@ -16,8 +16,6 @@ colocboost_update <- function(cb_model, cb_model_para, cb_data) {
   for (i in pos.update) {
     update_jk <- cb_model_para$update_temp$real_update_jk[i]
     X_dict <- cb_data$dict[i]
-    # adj_dependence
-    adj_dep <- cb_data$data[[i]]$dependency
 
     ########## BEGIN: MAIN CALCULATION ###################
 
@@ -32,7 +30,8 @@ colocboost_update <- function(cb_model, cb_model_para, cb_data) {
         XtX = cb_data$data[[X_dict]]$XtX,
         N = cb_data$data[[i]]$N,
         remain_idx = setdiff(1:cb_model_para$P, cb_data$data[[i]]$variable_miss),
-        P = cb_model_para$P
+        P = cb_model_para$P,
+        ref_label = cb_data$data[[X_dict]]$ref_label
       )
       cb_model[[i]]$ld_jk <- rbind(cb_model[[i]]$ld_jk, ld_jk)
     }
@@ -48,7 +47,7 @@ colocboost_update <- function(cb_model, cb_model_para, cb_data) {
     }
     delta <- boost_KL_delta(
       z = cb_model[[i]]$z,
-      ld_feature = ld_feature, adj_dep = adj_dep,
+      ld_feature = ld_feature, 
       func_simplex = cb_model_para$func_simplex, 
       lambda = lambda_outcome
     )
@@ -64,15 +63,14 @@ colocboost_update <- function(cb_model, cb_model_para, cb_data) {
     if (length(cb_data$data[[i]]$variable_miss) != 0) {
       obj_ld[cb_data$data[[i]]$variable_miss] <- 0
     }
-    exp_term <- adj_dep * obj_ld * (abs(cov_Xtr))
+    exp_term <- obj_ld * (abs(cov_Xtr))
     # - calculate individual objective function
     cb_model[[i]]$obj_path <- c(cb_model[[i]]$obj_path, tau * matrixStats::logSumExp(exp_term / tau + log(delta)))
     cb_model[[i]]$obj_single <- c(cb_model[[i]]$obj_single, abs(cov_Xtr[update_jk]))
 
     exp_term <- exp_term - max(exp_term)
     exp_abs_cor <- delta * exp(exp_term / tau)
-    # weights <- adj_dep * ld_feature / scaling_factor * exp_abs_cor / sum(exp_abs_cor)
-    weights <- adj_dep * obj_ld * exp_abs_cor / sum(exp_abs_cor)
+    weights <- obj_ld * exp_abs_cor / sum(exp_abs_cor)
     weights <- weights / sum(weights)
     # cb_model[[i]]$weights_path <- rbind(cb_model[[i]]$weights_path, as.vector(weights))
     cb_model[[i]]$weights_path <- c(cb_model[[i]]$weights_path, list(as.vector(weights)))
@@ -102,44 +100,32 @@ colocboost_update <- function(cb_model, cb_model_para, cb_data) {
       x <- cb_data$data[[X_dict]]$X
       y <- cb_data$data[[i]]$Y
       beta <- cb_model[[i]]$beta
-      profile_log <- mean((y - x %*% beta)^2) * adj_dep
+      profile_log <- mean((y - x %*% beta)^2)
     } else if (!is.null(cb_data$data[[X_dict]]$XtX)) {
       beta_scaling <- cb_model[[i]]$beta_scaling
       # - summary statistics
       xtx <- cb_data$data[[X_dict]]$XtX
+      ref_label_i <- cb_data$data[[X_dict]]$ref_label
       cb_model[[i]]$res <- rep(0, cb_model_para$P)
       if (length(cb_data$data[[i]]$variable_miss) != 0) {
         beta <- cb_model[[i]]$beta[-cb_data$data[[i]]$variable_miss]  / beta_scaling
         xty <- cb_data$data[[i]]$XtY[-cb_data$data[[i]]$variable_miss]
-        if (length(xtx) == 1){
-          XtX_beta <- beta
-          cb_model[[i]]$res[-cb_data$data[[i]]$variable_miss] <- xty - scaling_factor * beta
-        } else {
-          XtX_beta <- xtx %*% beta
-          cb_model[[i]]$res[-cb_data$data[[i]]$variable_miss] <- xty - scaling_factor * XtX_beta
-        }
+        XtX_beta <- compute_XtX_product(xtx, beta, ref_label_i)
+        cb_model[[i]]$res[-cb_data$data[[i]]$variable_miss] <- xty - scaling_factor * XtX_beta
 
       } else {
         beta <- cb_model[[i]]$beta / beta_scaling
         xty <- cb_data$data[[i]]$XtY
-        if (length(xtx) == 1){
-          XtX_beta <- beta
-          cb_model[[i]]$res <- xty - scaling_factor * beta
-        } else {
-          XtX_beta <- xtx %*% beta
-          cb_model[[i]]$res <- xty - scaling_factor * XtX_beta
-        }
+        XtX_beta <- compute_XtX_product(xtx, beta, ref_label_i)
+        cb_model[[i]]$res <- xty - scaling_factor * XtX_beta
       }
       # - cache XtX %*% beta for reuse in get_correlation (avoids redundant O(P^2) computation)
       cb_model[[i]]$XtX_beta_cache <- XtX_beta
       # - profile-loglikelihood (reuses cached XtX_beta)
       yty <- cb_data$data[[i]]$YtY / scaling_factor
       xty <- xty / scaling_factor
-      if (length(xtx) == 1){
-        profile_log <- (yty - 2 * sum(beta * xty) + sum(beta^2)) * adj_dep
-      } else {
-        profile_log <- (yty - 2 * sum(beta * xty) + sum(XtX_beta * beta)) * adj_dep
-      }
+      profile_log <- (yty - 2 * sum(beta * xty) + sum(XtX_beta * beta))
+      
     }
     cb_model[[i]]$profile_loglike_each <- c(cb_model[[i]]$profile_loglike_each, profile_log)
   }
@@ -149,7 +135,7 @@ colocboost_update <- function(cb_model, cb_model_para, cb_data) {
 
 
 # - calculate LD for update_jk
-get_LD_jk <- function(jk1, X = NULL, XtX = NULL, N = NULL, remain_idx = NULL, P = NULL) {
+get_LD_jk <- function(jk1, X = NULL, XtX = NULL, N = NULL, remain_idx = NULL, P = NULL, ref_label = "LD") {
   if (!is.null(X)) {
     corr <- suppressWarnings({
       Rfast::correls(X[, jk1], X)[, "correlation"]
@@ -158,8 +144,14 @@ get_LD_jk <- function(jk1, X = NULL, XtX = NULL, N = NULL, remain_idx = NULL, P
   } else if (!is.null(XtX)) {
     jk1.remain <- which(remain_idx == jk1)
     corr <- rep(0, P)
-    if (length(XtX) == 1 | length(jk1.remain)==0){
+    if (identical(ref_label, "No_ref") | length(jk1.remain) == 0) {
       corr[remain_idx] <- 1
+    } else if (identical(ref_label, "X_ref")) {
+      corr_common <- suppressWarnings({
+        correls(XtX[, jk1.remain], XtX)[, "correlation"]
+      })
+      corr_common[which(is.na(corr_common))] <- 0
+      corr[remain_idx] <- corr_common
     } else {
       corr[remain_idx] <- XtX[, jk1.remain]
     }
@@ -168,7 +160,7 @@ get_LD_jk <- function(jk1, X = NULL, XtX = NULL, N = NULL, remain_idx = NULL, P
 }
 
 
-boost_KL_delta <- function(z, ld_feature, adj_dep,
+boost_KL_delta <- function(z, ld_feature, 
                            func_simplex = "LD_z2z",
                            lambda = 0.5) {
   # if (!is.null(n)){ z <- z * sqrt( (n-1)/(z^2+n-2) ) }
@@ -177,15 +169,15 @@ boost_KL_delta <- function(z, ld_feature, adj_dep,
   if (func_simplex == "Range_Z") {
     z2z <- lambda * 0.5 * z^2 + (1 - lambda) * abs(z)
     z2z <- (z2z - min(z2z)) / (max(z2z) - min(z2z)) * 5
-    z2z <- adj_dep * ld_feature * z2z
+    z2z <- ld_feature * z2z
     delta <- exp(z2z - max(z2z))
   } else if (func_simplex == "LD_z2z") {
     z2z <- lambda * 0.5 * z^2 + (1 - lambda) * abs(z)
-    z2z <- adj_dep * ld_feature * z2z
+    z2z <- ld_feature * z2z
     delta <- exp(z2z - max(z2z))
   } else if (func_simplex == "only_z2z") {
     z2z <- lambda * 0.5 * z^2 + (1 - lambda) * abs(z)
-    z2z <- adj_dep * z2z
+    z2z <- z2z
     delta <- exp(z2z - max(z2z))
   } else if (func_simplex == "entropy") {
     delta <- rep(1, length(z))
@@ -252,8 +244,6 @@ boost_obj_last <- function(cb_data, cb_model, cb_model_para) {
       abs_cor <- abs(correlation)
       jk <- which(abs_cor == max(abs_cor))
       jk <- ifelse(length(jk) == 1, jk, sample(jk, 1))
-      # adj_dependence
-      adj_dep <- cb_data$data[[i]]$dependency
 
       ########## MAIN CALCULATION ###################
       X_dict <- cb_data$dict[i]
@@ -262,7 +252,8 @@ boost_obj_last <- function(cb_data, cb_model, cb_model_para) {
         XtX = cb_data$data[[X_dict]]$XtX,
         N = cb_data$data[[i]]$N,
         remain_idx = setdiff(1:cb_model_para$P, cb_data$data[[i]]$variable_miss),
-        P = cb_model_para$P
+        P = cb_model_para$P,
+        ref_label = cb_data$data[[X_dict]]$ref_label
       )
       ld_feature <- sqrt(abs(ld_jk))
 
@@ -276,7 +267,7 @@ boost_obj_last <- function(cb_data, cb_model, cb_model_para) {
       }
       delta <- boost_KL_delta(
         z = cb_model[[i]]$z,
-        ld_feature = ld_feature, adj_dep = adj_dep,
+        ld_feature = ld_feature, 
         func_simplex = cb_model_para$func_simplex, 
         lambda = lambda_outcome
       )
@@ -293,7 +284,7 @@ boost_obj_last <- function(cb_data, cb_model, cb_model_para) {
       if (length(cb_data$data[[i]]$variable_miss) != 0) {
         obj_ld[cb_data$data[[i]]$variable_miss] <- 0
       }
-      exp_term <- adj_dep * obj_ld * (abs(cov_Xtr))
+      exp_term <-  obj_ld * (abs(cov_Xtr))
       # - calculate individual objective function
       cb_model[[i]]$obj_path <- c(cb_model[[i]]$obj_path, tau * matrixStats::logSumExp(exp_term / tau + log(delta)))
       cb_model[[i]]$obj_single <- c(cb_model[[i]]$obj_single, abs(cov_Xtr[jk]))
diff --git a/R/colocboost_utils.R b/R/colocboost_utils.R
index b486bf8..7e956bc 100644
--- a/R/colocboost_utils.R
+++ b/R/colocboost_utils.R
@@ -43,7 +43,8 @@ merge_cos_ucos <- function(cb_obj, out_cos, out_ucos, coverage = 0.95,
           X = cb_obj$cb_data$data[[X_dict]]$X,
           Xcorr = cb_obj$cb_data$data[[X_dict]]$XtX,
           miss_idx = cb_obj$cb_data$data[[fine_outcome]]$variable_miss,
-          P = cb_obj$cb_model_para$P
+          P = cb_obj$cb_model_para$P,
+          ref_label = cb_obj$cb_data$data[[X_dict]]$ref_label
         )
         # is.between <- length(intersect(cset1, cset2)) != 0
         is.between <- (abs(res[2] - 1) < tol)
@@ -66,7 +67,8 @@ merge_cos_ucos <- function(cb_obj, out_cos, out_ucos, coverage = 0.95,
             X = cb_obj$cb_data$data[[X_dict]]$X,
             Xcorr = cb_obj$cb_data$data[[X_dict]]$XtX,
             miss_idx = cb_obj$cb_data$data[[ii]]$variable_miss,
-            P = cb_obj$cb_model_para$P
+            P = cb_obj$cb_model_para$P,
+            ref_label = cb_obj$cb_data$data[[X_dict]]$ref_label
           )
         }
         res <- Reduce(pmax, res)
@@ -151,7 +153,8 @@ merge_ucos <- function(cb_obj, past_out,
           X = cb_obj$cb_data$data[[X_dict]]$X,
           Xcorr = cb_obj$cb_data$data[[X_dict]]$XtX,
           miss_idx = cb_obj$cb_data$data[[ii]]$variable_miss,
-          P = cb_obj$cb_model_para$P
+          P = cb_obj$cb_model_para$P,
+          ref_label = cb_obj$cb_data$data[[X_dict]]$ref_label
         )
         flag <- flag + 1
       }
@@ -221,7 +224,8 @@ merge_ucos <- function(cb_obj, past_out,
           tmp <- matrix(get_purity(pos,
             X = cb_obj$cb_data$data[[X_dict]]$X,
             Xcorr = cb_obj$cb_data$data[[X_dict]]$XtX,
-            N = cb_obj$cb_data$data[[i3]]$N, n = n_purity
+            N = cb_obj$cb_data$data[[i3]]$N, n = n_purity,
+            ref_label = cb_obj$cb_data$data[[X_dict]]$ref_label
           ), 1, 3)
           p_tmp <- rbind(p_tmp, tmp)
         }
@@ -286,71 +290,6 @@ merge_ucos <- function(cb_obj, past_out,
   return(ll)
 }
 
-#' @title Set of internal utils functions
-#'
-#' @description
-#' The `colocboost_utils` functions serves as a summary for the following two refine functions.
-#'
-#' @details
-#' The following functions are included in this set:
-#' `merge_cos_ucos` merge a trait-specific confidence set CS into a colocalization set CoS.
-#' `merge_ucos` merge two trait-specific confidence sets.
-#'
-#' These functions are not exported individually and are accessed via `colocboost_utils`.
-#'
-#' @rdname colocboost_utils
-#' @keywords cb_utils
-#' @noRd
-get_vcp <- function(past_out, P) {
-  if (!is.null(past_out$cos$cos$cos)) {
-    avW_coloc_vcp <- sapply(past_out$cos$cos$avWeight, get_integrated_weight)
-  } else {
-    avW_coloc_vcp <- NULL
-  }
-  all_weight <- avW_coloc_vcp
-  if (length(all_weight) == P) {
-    all_weight <- as.matrix(unlist(all_weight))
-  }
-  if (!is.null(all_weight)) {
-    all_weight <- apply(all_weight, 2, as.numeric)
-    all_weight <- as.matrix(all_weight)
-    vcp <- as.vector(1 - apply(1 - all_weight, 1, prod))
-  } else {
-    vcp <- rep(0, P)
-  }
-  return(vcp)
-}
-
-
-get_pip <- function(past_out, R, P) {
-  if (length(past_out$cos$cos$cos) != 0) {
-    av_coloc <- do.call(cbind, past_out$cos$cos$avWeight)
-  } else {
-    av_coloc <- NULL
-  }
-  if (length(past_out$ucos$ucos_each) != 0) {
-    av_noncoloc <- past_out$ucos$avW_ucos_each
-    tmp <- do.call(rbind, strsplit(colnames(av_noncoloc), ":"))
-    colnames(av_noncoloc) <- paste0("outcome", gsub("[^0-9.]+", "", tmp[, 2]))
-  } else {
-    av_noncoloc <- NULL
-  }
-  av_all <- cbind(av_coloc, av_noncoloc)
-  pip <- vector(mode = "list", length = R)
-  if (!is.null(av_all)) {
-    av_name <- colnames(av_all)
-    for (i in 1:R) {
-      pos <- grep(i, av_name)
-      if (length(pos) != 0) {
-        av_i <- as.matrix(av_all[, pos])
-        pip[[i]] <- as.vector(1 - apply(1 - av_i, 1, prod))
-      } else {
-        pip[[i]] <- rep(0, P)
-      }
-    }
-  }
-  return(pip)
-}
 
 check_two_overlap_sets <- function(total, i, j) {
   t1 <- total[[i]]
@@ -663,7 +602,10 @@ get_cos_details <- function(cb_obj, coloc_out, data_info = NULL) {
       pos <- match(data_info$variables, cb_obj$cb_model_para$variables)
       return(w[pos, , drop = FALSE])
     })
-    int_weight <- lapply(cos_weights, get_integrated_weight, weight_fudge_factor = cb_obj$cb_model_para$weight_fudge_factor)
+    int_weight <- lapply(cos_weights, get_integrated_weight, 
+                         weight_fudge_factor = cb_obj$cb_model_para$weight_fudge_factor,
+                         use_entropy = cb_obj$cb_model_para$use_entropy,
+                         residual_correlation = cb_obj$cb_model_para$residual_correlation)
     names(int_weight) <- names(cos_weights) <- colocset_names
 
     # - resummary results
@@ -689,7 +631,8 @@ get_cos_details <- function(cb_obj, coloc_out, data_info = NULL) {
         tmp <- matrix(get_purity(pos,
                                  X = cb_obj$cb_data$data[[X_dict]]$X,
                                  Xcorr = cb_obj$cb_data$data[[X_dict]]$XtX,
-                                 N = cb_obj$cb_data$data[[i3]]$N, n = cb_obj$cb_model_para$n_purity
+                                 N = cb_obj$cb_data$data[[i3]]$N, n = cb_obj$cb_model_para$n_purity,
+                                 ref_label = cb_obj$cb_data$data[[X_dict]]$ref_label
         ), 1, 3)
         p_tmp <- rbind(p_tmp, tmp)
       }
@@ -763,7 +706,8 @@ get_cos_details <- function(cb_obj, coloc_out, data_info = NULL) {
               X = cb_obj$cb_data$data[[X_dict]]$X,
               Xcorr = cb_obj$cb_data$data[[X_dict]]$XtX,
               miss_idx = cb_obj$cb_data$data[[ii]]$variable_miss,
-              P = cb_obj$cb_model_para$P
+              P = cb_obj$cb_model_para$P,
+              ref_label = cb_obj$cb_data$data[[X_dict]]$ref_label
             )
             flag <- flag + 1
           }
@@ -1025,7 +969,8 @@ get_full_output <- function(cb_obj, past_out = NULL, variables = NULL, cb_output
                   X = cb_obj$cb_data$data[[X_dict]]$X,
                   Xcorr = cb_obj$cb_data$data[[X_dict]]$XtX,
                   miss_idx = cb_obj$cb_data$data[[ii]]$variable_miss,
-                  P = cb_obj$cb_model_para$P
+                  P = cb_obj$cb_model_para$P,
+                  ref_label = cb_obj$cb_data$data[[X_dict]]$ref_label
                 )
                 flag <- flag + 1
               }
@@ -1069,7 +1014,8 @@ get_full_output <- function(cb_obj, past_out = NULL, variables = NULL, cb_output
                   X = cb_obj$cb_data$data[[X_dict]]$X,
                   Xcorr = cb_obj$cb_data$data[[X_dict]]$XtX,
                   miss_idx = cb_obj$cb_data$data[[ii]]$variable_miss,
-                  P = cb_obj$cb_model_para$P
+                  P = cb_obj$cb_model_para$P,
+                  ref_label = cb_obj$cb_data$data[[X_dict]]$ref_label
                 )
                 flag <- flag + 1
               }
@@ -1124,3 +1070,28 @@ get_full_output <- function(cb_obj, past_out = NULL, variables = NULL, cb_output
   return(ll)
 }
 
+
+#' Compute XtX %*% beta, dispatching on ref_label
+#' @noRd
+compute_XtX_product <- function(XtX, beta, ref_label = "LD") {
+  if (identical(ref_label, "No_ref")) return(beta)
+  if (identical(ref_label, "X_ref")) {
+    N_ref <- nrow(XtX)
+    temp <- XtX %*% as.matrix(beta)
+    return(as.vector(crossprod(XtX, temp)) / (N_ref - 1))
+  }
+  as.vector(XtX %*% as.matrix(beta))
+}
+
+#' Pseudo-inverse via truncated eigendecomposition (99.9% variance).
+#' Used to invert residual_correlation, which may be rank-deficient when
+#' the submatrix contains duplicated traits.
+#' @noRd
+pseudo_inverse <- function(mat) {
+  eig <- eigen(mat, symmetric = TRUE)
+  keep <- which(cumsum(eig$values) / sum(eig$values) > 0.999)[1]
+  eig$vectors[, 1:keep, drop = FALSE] %*%
+    diag(1 / eig$values[1:keep], keep, keep) %*%
+    t(eig$vectors[, 1:keep, drop = FALSE])
+}
+
diff --git a/R/colocboost_workhorse.R b/R/colocboost_workhorse.R
index 2635e00..2f0a4f7 100644
--- a/R/colocboost_workhorse.R
+++ b/R/colocboost_workhorse.R
@@ -284,7 +284,8 @@ cb_model_update <- function(cb_data, cb_model, cb_model_para) {
       XtX = cb_data$data[[X_dict]]$XtX,
       beta_k = model_each$beta,
       miss_idx = data_each$variable_miss,
-      XtX_beta_cache = model_each$XtX_beta_cache
+      XtX_beta_cache = model_each$XtX_beta_cache,
+      ref_label = cb_data$data[[X_dict]]$ref_label
     )
     cb_model[[i]]$correlation <- tmp
     cb_model[[i]]$z <- get_z(tmp, n = data_each$N, model_each$res)
diff --git a/man/colocboost.Rd b/man/colocboost.Rd
index 4f6c3df..ebbb8bb 100644
--- a/man/colocboost.Rd
+++ b/man/colocboost.Rd
@@ -12,6 +12,7 @@ colocboost(
   Y = NULL,
   sumstat = NULL,
   LD = NULL,
+  X_ref = NULL,
   dict_YX = NULL,
   dict_sumstatLD = NULL,
   outcome_names = NULL,
@@ -62,6 +63,7 @@ colocboost(
   check_null_max_ucos = 0.015,
   weaker_effect = TRUE,
   LD_free = FALSE,
+  use_entropy = FALSE,
   output_level = 1,
   cos_npc_cutoff = 0.2,
   npc_outcome_cutoff = 0.2,
@@ -81,14 +83,23 @@ The columns of data.frame should include either \code{z} or \code{beta}/\code{se
 \code{var_y} is the variance of phenotype (default is 1 meaning that the Y is in the \dQuote{standardized} scale).}
 
 \item{LD}{A list of correlation matrix indicating the LD matrix for each genotype. It also could be a single matrix if all sumstats were
-obtained from the same genotypes.}
+obtained from the same genotypes. Provide either \code{LD} or \code{X_ref}, not both.
+If neither is provided, LD-free mode is used.}
+
+\item{X_ref}{A reference panel genotype matrix (N_ref x P) or a list of matrices, as an alternative to providing a precomputed
+\code{LD} matrix. Column names must include variant names matching those in \code{sumstat}.
+When the number of reference panel samples is less than the number of variants (N_ref < P),
+this avoids storing the full P x P LD matrix and reduces memory usage.
+When N_ref >= P, LD is precomputed from \code{X_ref} internally.
+Provide either \code{LD} or \code{X_ref}, not both. If neither is provided, LD-free mode is used.}
 
 \item{dict_YX}{A L by 2 matrix of dictionary for \code{X} and \code{Y} if there exist subsets of outcomes corresponding to the same X matrix.
 The first column should be 1:L for L outcomes. The second column should be the index of \code{X} corresponding to the outcome.
 The innovation: do not provide the same matrix in \code{X} to reduce the computational burden.}
 
-\item{dict_sumstatLD}{A L by 2 matrix of dictionary for \code{sumstat} and \code{LD} if there exist subsets of outcomes corresponding to the same sumstat.
-The first column should be 1:L for L sumstat The second column should be the index of \code{LD} corresponding to the sumstat.
+\item{dict_sumstatLD}{A L by 2 matrix of dictionary for \code{sumstat} and \code{LD} (or \code{X_ref}) if there exist subsets of outcomes
+corresponding to the same sumstat.
+The first column should be 1:L for L sumstat The second column should be the index of \code{LD} (or \code{X_ref}) corresponding to the sumstat.
 The innovation: do not provide the same matrix in \code{LD} to reduce the computational burden.}
 
 \item{outcome_names}{The names of outcomes, which has the same order for Y.}
@@ -194,6 +205,8 @@ to merge colocalized sets, which may resulting in a huge set.}
 
 \item{LD_free}{When \code{LD_free = FALSE}, objective function doesn't include LD information.}
 
+\item{use_entropy}{A logic variable to consider the heterogeneity of traits for a single-effect.}
+
 \item{output_level}{When \code{output_level = 1}, return basic cos details for colocalization results
 When \code{output_level = 2}, return the ucos details for the single specific effects.
 When \code{output_level = 3}, return the entire Colocboost model to diagnostic results (more space).}
diff --git a/man/colocboost_validate_input_data.Rd b/man/colocboost_validate_input_data.Rd
index d9c45f4..ec7e40a 100644
--- a/man/colocboost_validate_input_data.Rd
+++ b/man/colocboost_validate_input_data.Rd
@@ -9,6 +9,7 @@ colocboost_validate_input_data(
   Y = NULL,
   sumstat = NULL,
   LD = NULL,
+  X_ref = NULL,
   dict_YX = NULL,
   dict_sumstatLD = NULL,
   effect_est = NULL,
@@ -31,7 +32,16 @@ colocboost_validate_input_data(
 
 \item{sumstat}{A list of data.frames of summary statistics.}
 
-\item{LD}{A list of correlation matrices indicating the LD matrix for each genotype.}
+\item{LD}{A list of correlation matrix indicating the LD matrix for each genotype. It also could be a single matrix if all sumstats were
+obtained from the same genotypes. Provide either \code{LD} or \code{X_ref}, not both.
+If neither is provided, LD-free mode is used.}
+
+\item{X_ref}{A reference panel genotype matrix (N_ref x P) or a list of matrices, as an alternative to providing a precomputed
+\code{LD} matrix. Column names must include variant names matching those in \code{sumstat}.
+When the number of reference panel samples is less than the number of variants (N_ref < P),
+this avoids storing the full P x P LD matrix and reduces memory usage.
+When N_ref >= P, LD is precomputed from \code{X_ref} internally.
+Provide either \code{LD} or \code{X_ref}, not both. If neither is provided, LD-free mode is used.}
 
 \item{dict_YX}{A L by 2 matrix of dictionary for X and Y if there exist subsets of outcomes corresponding to the same X matrix.}
 
@@ -57,6 +67,8 @@ A list containing:
 \item{keep_variable_individual}{List of variable names for each X matrix}
 \item{sumstat}{Processed list of summary statistics data.frames}
 \item{LD}{Processed list of LD matrices}
+\item{X_ref}{Processed list of reference genotype matrices}
+\item{ref_label}{Style of reference matrics}
 \item{sumstatLD_dict}{Dictionary mapping sumstat to LD}
 \item{keep_variable_sumstat}{List of variant names for each sumstat}
 \item{Z}{List of z-scores for each outcome}
diff --git a/man/get_robust_colocalization.Rd b/man/get_robust_colocalization.Rd
index 4128c64..87d20f9 100644
--- a/man/get_robust_colocalization.Rd
+++ b/man/get_robust_colocalization.Rd
@@ -14,6 +14,8 @@ get_robust_colocalization(
   npc_outcome_cutoff = 0.2,
   pvalue_cutoff = NULL,
   weight_fudge_factor = 1.5,
+  use_entropy = FALSE,
+  residual_correlation = NULL,
   coverage = 0.95
 )
 }
@@ -28,6 +30,10 @@ get_robust_colocalization(
 
 \item{weight_fudge_factor}{The strength to integrate weight from different outcomes, default is 1.5}
 
+\item{use_entropy}{A logic variable to consider the heterogeneity of traits for a single-effect.}
+
+\item{residual_correlation}{The residual correlation based on the sample overlap, it is diagonal if it is NULL.}
+
 \item{coverage}{A number between 0 and 1 specifying the \dQuote{coverage} of the estimated colocalization confidence sets (CoS) (default is 0.95).}
 }
 \value{
diff --git a/tests/testthat/test_RE.R b/tests/testthat/test_RE.R
new file mode 100644
index 0000000..f6949c4
--- /dev/null
+++ b/tests/testthat/test_RE.R
@@ -0,0 +1,563 @@
+library(testthat)
+
+# ============================================================================
+# Tests for the integration changes across commits c0a2e15 and b61d7cb:
+#   (1) `use_entropy` parameter: heterogeneity-aware alpha for integrating
+#       per-trait weights (vs uniform 1/L).
+#   (2) `residual_correlation` refactor: removed from the core boosting
+#       loop (no more `dependency` field on cb_data), now consumed at
+#       assembly time in `get_integrated_weight` as a GLS-style alpha
+#       adjustment, with:
+#         - duplicate outcome columns collapsed by geometric mean
+#         - colname parsing of both "outcomeN" and "YN"
+#         - sub-selection of residual_correlation[idx, idx]
+# ============================================================================
+
+
+# ---- Shared test data generator (L-adaptive so L=2 works) ----
+generate_entropy_test_data <- function(n = 300, p = 40, L = 3, seed = 2026) {
+  set.seed(seed)
+  sigma <- 0.9^abs(outer(1:p, 1:p, "-"))
+  X <- MASS::mvrnorm(n, rep(0, p), sigma)
+  colnames(X) <- paste0("SNP", 1:p)
+  
+  # Shared causal at SNP10, descending heterogeneous per-trait effects.
+  het_effects <- c(0.9, 0.3, 0.15, 0.1, 0.08)
+  true_beta <- matrix(0, p, L)
+  true_beta[10, ] <- het_effects[seq_len(L)]
+  if (L >= 2) true_beta[25, 2] <- 0.5  # trait-2-specific
+  
+  Y <- matrix(0, n, L)
+  for (l in seq_len(L)) {
+    Y[, l] <- X %*% true_beta[, l] + rnorm(n, 0, 1)
+  }
+  
+  list(
+    X = X, Y = Y,
+    Y_list = lapply(seq_len(L), function(l) Y[, l]),
+    X_list = replicate(L, X, simplify = FALSE),
+    LD = cor(X),
+    true_beta = true_beta
+  )
+}
+
+
+# ============================================================================
+# (1a) Unit tests for get_integrated_weight with use_entropy
+# ============================================================================
+
+test_that("get_integrated_weight: use_entropy=FALSE returns a valid distribution", {
+  w1 <- c(0.6, 0.3, 0.05, 0.05)
+  w2 <- c(0.1, 0.1, 0.4, 0.4)
+  weights <- cbind(w1, w2)
+  
+  result <- get_integrated_weight(weights, weight_fudge_factor = 1.5, use_entropy = FALSE)
+  
+  expect_equal(length(result), nrow(weights))
+  expect_equal(sum(result), 1, tolerance = 1e-10)
+  expect_true(all(result >= 0))
+})
+
+test_that("get_integrated_weight: use_entropy=TRUE returns a valid distribution", {
+  w1 <- c(0.6, 0.3, 0.05, 0.05)   # low entropy
+  w2 <- c(0.25, 0.25, 0.25, 0.25) # max entropy
+  weights <- cbind(w1, w2)
+  
+  result_ent <- get_integrated_weight(weights, use_entropy = TRUE)
+  
+  expect_equal(length(result_ent), nrow(weights))
+  expect_equal(sum(result_ent), 1, tolerance = 1e-10)
+  expect_true(all(result_ent >= 0))
+})
+
+test_that("get_integrated_weight: use_entropy=TRUE and FALSE differ for heterogeneous-entropy traits", {
+  w_sharp <- c(0.85, 0.10, 0.03, 0.02)
+  w_flat  <- c(0.26, 0.25, 0.25, 0.24)
+  weights <- cbind(w_sharp, w_flat)
+  
+  r_uniform <- get_integrated_weight(weights, use_entropy = FALSE)
+  r_entropy <- get_integrated_weight(weights, use_entropy = TRUE)
+  
+  expect_false(isTRUE(all.equal(r_uniform, r_entropy)))
+  expect_equal(sum(r_uniform), 1, tolerance = 1e-10)
+  expect_equal(sum(r_entropy), 1, tolerance = 1e-10)
+})
+
+test_that("get_integrated_weight: identical trait weights give identical results under both modes", {
+  w <- c(0.5, 0.25, 0.15, 0.10)
+  weights <- cbind(w, w, w)
+  
+  r_uniform <- get_integrated_weight(weights, use_entropy = FALSE)
+  r_entropy <- get_integrated_weight(weights, use_entropy = TRUE)
+  
+  expect_equal(r_uniform, r_entropy, tolerance = 1e-10)
+})
+
+test_that("get_integrated_weight: use_entropy handles zero-weight entries without NaN", {
+  # Both columns have mass on the same three rows; column 1 is zero on row 4.
+  # The entropy branch filters w > 0 when computing H, so zeros must not
+  # produce NaN in alpha.
+  w1 <- c(0.5, 0.3, 0.2, 0.0)
+  w2 <- c(0.4, 0.3, 0.2, 0.1)
+  weights <- cbind(w1, w2)
+  
+  expect_error(
+    result <- get_integrated_weight(weights, use_entropy = TRUE),
+    NA
+  )
+  expect_true(all(is.finite(result)))
+  expect_equal(sum(result), 1, tolerance = 1e-10)
+  # Row 4 has w1 = 0, so 0^(positive alpha) = 0 on that row.
+  expect_equal(result[4], 0)
+})
+
+
+# ============================================================================
+# (1b) get_integrated_weight with residual_correlation (new GLS path)
+# ============================================================================
+
+test_that("get_integrated_weight: identity residual_correlation matches NULL residual_correlation", {
+  # With RC = I, alpha = I %*% 1 / sum(I %*% 1) = 1/L, identical to the
+  # legacy uniform-alpha path.
+  set.seed(42)
+  w1 <- c(0.5, 0.2, 0.15, 0.15)
+  w2 <- c(0.1, 0.4, 0.3, 0.2)
+  w3 <- c(0.25, 0.25, 0.3, 0.2)
+  weights <- cbind(w1, w2, w3)
+  colnames(weights) <- paste0("outcome", 1:3)
+  
+  r_null <- get_integrated_weight(weights, residual_correlation = NULL)
+  r_I    <- get_integrated_weight(weights, residual_correlation = diag(3))
+  
+  expect_equal(as.numeric(r_null), as.numeric(r_I), tolerance = 1e-10)
+})
+
+test_that("get_integrated_weight: identity RC also matches NULL RC under use_entropy=TRUE", {
+  set.seed(43)
+  w1 <- c(0.7, 0.1, 0.1, 0.1)
+  w2 <- c(0.1, 0.4, 0.3, 0.2)
+  w3 <- c(0.25, 0.25, 0.25, 0.25)
+  weights <- cbind(w1, w2, w3)
+  colnames(weights) <- paste0("outcome", 1:3)
+  
+  r_null <- get_integrated_weight(weights, use_entropy = TRUE, residual_correlation = NULL)
+  r_I    <- get_integrated_weight(weights, use_entropy = TRUE, residual_correlation = diag(3))
+  
+  expect_equal(as.numeric(r_null), as.numeric(r_I), tolerance = 1e-10)
+})
+
+test_that("get_integrated_weight: asymmetric RC differs from NULL RC", {
+  # Equal-correlation RC (e.g. 0.8 everywhere off-diagonal) is actually a
+  # no-op because its inverse times a vector of 1s is still a multiple of 1.
+  # We need an asymmetric correlation structure for alpha to shift.
+  w1 <- c(0.6, 0.2, 0.1, 0.1)
+  w2 <- c(0.55, 0.25, 0.1, 0.1)
+  w3 <- c(0.1, 0.2, 0.35, 0.35)
+  weights <- cbind(w1, w2, w3)
+  colnames(weights) <- paste0("outcome", 1:3)
+  
+  # Traits 1 and 2 heavily correlated, trait 3 nearly independent.
+  RC <- matrix(c(1,   0.9, 0.1,
+                 0.9, 1,   0.1,
+                 0.1, 0.1, 1), nrow = 3, byrow = TRUE)
+  
+  r_null <- get_integrated_weight(weights, residual_correlation = NULL)
+  r_rc   <- get_integrated_weight(weights, residual_correlation = RC)
+  
+  expect_false(isTRUE(all.equal(as.numeric(r_null), as.numeric(r_rc))))
+  expect_equal(sum(r_rc), 1, tolerance = 1e-10)
+  expect_true(all(is.finite(r_rc)))
+})
+
+test_that("get_integrated_weight: 'outcomeN' and 'YN' colnames parse equivalently", {
+  # `get_robust_colocalization` relabels cos_weights columns from "outcomeN"
+  # to "YN" before calling `get_integrated_weight`, so both prefixes must
+  # produce the same output for the same numerical inputs.
+  w1 <- c(0.5, 0.3, 0.1, 0.1)
+  w2 <- c(0.2, 0.2, 0.3, 0.3)
+  weights_o <- cbind(w1, w2); colnames(weights_o) <- c("outcome1", "outcome2")
+  weights_y <- cbind(w1, w2); colnames(weights_y) <- c("Y1", "Y2")
+  
+  RC <- matrix(c(1, 0.3, 0.3, 1), nrow = 2)
+  
+  r_o <- get_integrated_weight(weights_o, residual_correlation = RC)
+  r_y <- get_integrated_weight(weights_y, residual_correlation = RC)
+  
+  expect_equal(as.numeric(r_o), as.numeric(r_y), tolerance = 1e-10)
+})
+
+test_that("get_integrated_weight: subset of outcomes picks the right RC sub-block", {
+  # 4x4 RC, but weights only carry traits 2 and 4. The function must subset
+  # RC[c(2,4), c(2,4)] automatically from the colnames.
+  w1 <- c(0.7, 0.1, 0.1, 0.1)
+  w2 <- c(0.1, 0.4, 0.3, 0.2)
+  weights <- cbind(w1, w2)
+  colnames(weights) <- c("outcome2", "outcome4")
+  
+  RC_full <- matrix(c(1,   0.2, 0.1, 0.05,
+                      0.2, 1,   0.3, 0.4,
+                      0.1, 0.3, 1,   0.15,
+                      0.05, 0.4, 0.15, 1), nrow = 4, byrow = TRUE)
+  RC_sub  <- RC_full[c(2, 4), c(2, 4)]
+  
+  # Running with full RC (letting the function subset by index) should match
+  # running with the pre-subsetted 2x2 block with colnames relabeled to 1/2.
+  weights_sub <- weights
+  colnames(weights_sub) <- c("outcome1", "outcome2")
+  
+  r_full <- get_integrated_weight(weights, residual_correlation = RC_full)
+  r_sub  <- get_integrated_weight(weights_sub, residual_correlation = RC_sub)
+  
+  expect_equal(as.numeric(r_full), as.numeric(r_sub), tolerance = 1e-10)
+})
+
+test_that("get_integrated_weight: duplicate outcome columns are collapsed by geometric mean", {
+  # Two columns for the same trait (outcome1 appearing twice) should produce
+  # the same integrated weight as a single column whose values are the
+  # geometric mean of the two duplicates.
+  set.seed(77)
+  w1a <- c(0.5, 0.3, 0.1, 0.1)
+  w1b <- c(0.4, 0.4, 0.1, 0.1)
+  w2  <- c(0.2, 0.2, 0.3, 0.3)
+  
+  gm_w1 <- exp((log(w1a) + log(w1b)) / 2)   # geometric mean
+  
+  weights_dup <- cbind(w1a, w1b, w2)
+  colnames(weights_dup) <- c("outcome1", "outcome1", "outcome2")
+  
+  weights_collapsed <- cbind(gm_w1, w2)
+  colnames(weights_collapsed) <- c("outcome1", "outcome2")
+  
+  RC <- matrix(c(1, 0.4, 0.4, 1), nrow = 2)
+  
+  r_dup <- get_integrated_weight(weights_dup, residual_correlation = RC)
+  r_col <- get_integrated_weight(weights_collapsed, residual_correlation = RC)
+  
+  expect_equal(as.numeric(r_dup), as.numeric(r_col), tolerance = 1e-10)
+  expect_equal(sum(r_dup), 1, tolerance = 1e-10)
+})
+
+test_that("get_integrated_weight: duplicate collapse only triggers when RC is provided", {
+  # Without residual_correlation the collapse branch is skipped — two
+  # identical-index columns should NOT be collapsed (legacy behavior).
+  w1a <- c(0.5, 0.3, 0.1, 0.1)
+  w1b <- c(0.4, 0.4, 0.1, 0.1)
+  w2  <- c(0.2, 0.2, 0.3, 0.3)
+  
+  weights_dup <- cbind(w1a, w1b, w2)
+  colnames(weights_dup) <- c("outcome1", "outcome1", "outcome2")
+  
+  r_null <- get_integrated_weight(weights_dup, residual_correlation = NULL)
+  expect_equal(sum(r_null), 1, tolerance = 1e-10)
+  expect_true(all(is.finite(r_null)))
+})
+
+test_that("get_integrated_weight: duplicate collapse + use_entropy + RC composes", {
+  set.seed(91)
+  w1a <- c(0.5, 0.3, 0.1, 0.1)
+  w1b <- c(0.45, 0.35, 0.1, 0.1)
+  w2  <- c(0.25, 0.25, 0.25, 0.25)
+  
+  weights_dup <- cbind(w1a, w1b, w2)
+  colnames(weights_dup) <- c("outcome1", "outcome1", "outcome2")
+  
+  RC <- matrix(c(1, 0.3, 0.3, 1), nrow = 2)
+  
+  expect_error(
+    res <- get_integrated_weight(
+      weights_dup,
+      use_entropy = TRUE,
+      residual_correlation = RC
+    ),
+    NA
+  )
+  expect_equal(sum(res), 1, tolerance = 1e-10)
+  expect_true(all(is.finite(res)))
+})
+
+
+# ============================================================================
+# (2) End-to-end: use_entropy wired through colocboost()
+# ============================================================================
+
+test_that("colocboost runs with use_entropy = TRUE and returns a valid object", {
+  td <- generate_entropy_test_data()
+  
+  suppressWarnings(suppressMessages({
+    res <- colocboost(
+      X = td$X_list, Y = td$Y_list,
+      use_entropy = TRUE, M = 20, output_level = 2
+    )
+  }))
+  
+  expect_s3_class(res, "colocboost")
+  expect_equal(res$data_info$n_outcomes, 3)
+  expect_equal(length(res$data_info$variables), ncol(td$X))
+  if (!is.null(res$vcp)) {
+    expect_true(all(res$vcp >= 0 & res$vcp <= 1 + 1e-10))
+  }
+})
+
+test_that("colocboost use_entropy default is FALSE and preserves legacy behavior", {
+  td <- generate_entropy_test_data()
+  
+  suppressWarnings(suppressMessages({
+    res_default <- colocboost(
+      X = td$X_list, Y = td$Y_list, M = 20, output_level = 2
+    )
+    res_explicit_false <- colocboost(
+      X = td$X_list, Y = td$Y_list,
+      use_entropy = FALSE, M = 20, output_level = 2
+    )
+  }))
+  
+  expect_equal(res_default$vcp, res_explicit_false$vcp)
+  expect_equal(
+    names(res_default$cos_details$cos$cos_index),
+    names(res_explicit_false$cos_details$cos$cos_index)
+  )
+})
+
+test_that("colocboost use_entropy=TRUE can yield different VCP than FALSE under heterogeneous effects", {
+  td <- generate_entropy_test_data()
+  
+  suppressWarnings(suppressMessages({
+    res_F <- colocboost(X = td$X_list, Y = td$Y_list,
+                        use_entropy = FALSE, M = 30, output_level = 2)
+    res_T <- colocboost(X = td$X_list, Y = td$Y_list,
+                        use_entropy = TRUE,  M = 30, output_level = 2)
+  }))
+  
+  if (!is.null(res_F$vcp) && !is.null(res_T$vcp) &&
+      length(res_F$vcp) == length(res_T$vcp)) {
+    expect_false(isTRUE(all.equal(as.numeric(res_F$vcp),
+                                  as.numeric(res_T$vcp),
+                                  tolerance = 1e-12)))
+  } else {
+    succeed("One run returned no colocalization; entropy-vs-uniform comparison skipped")
+  }
+})
+
+test_that("colocboost use_entropy flag is propagated into cb_model_para", {
+  td <- generate_entropy_test_data()
+  
+  suppressWarnings(suppressMessages({
+    res <- colocboost(X = td$X_list, Y = td$Y_list,
+                      use_entropy = TRUE, M = 10, output_level = 3)
+  }))
+  
+  expect_true("diagnostic_details" %in% names(res))
+  expect_equal(res$diagnostic_details$cb_model_para$use_entropy, TRUE)
+})
+
+test_that("get_robust_colocalization accepts and propagates use_entropy", {
+  td <- generate_entropy_test_data()
+  
+  suppressWarnings(suppressMessages({
+    res <- colocboost(X = td$X_list, Y = td$Y_list,
+                      use_entropy = TRUE, M = 20, output_level = 2)
+  }))
+  
+  expect_error(
+    suppressMessages(
+      filtered <- get_robust_colocalization(
+        res,
+        cos_npc_cutoff = 0.2,
+        npc_outcome_cutoff = 0.1,
+        use_entropy = TRUE
+      )
+    ),
+    NA
+  )
+  expect_s3_class(filtered, "colocboost")
+})
+
+
+# ============================================================================
+# (3) End-to-end: residual_correlation wired through colocboost()
+# ============================================================================
+
+test_that("colocboost accepts residual_correlation without error", {
+  td <- generate_entropy_test_data(L = 2, seed = 7)
+  resid_cor <- matrix(c(1, 0.4, 0.4, 1), nrow = 2)
+  
+  expect_error(
+    suppressWarnings(suppressMessages({
+      res <- colocboost(
+        X = td$X_list, Y = td$Y_list,
+        residual_correlation = resid_cor,
+        M = 15, output_level = 2
+      )
+    })),
+    NA
+  )
+  expect_s3_class(res, "colocboost")
+})
+
+test_that("colocboost_init_data no longer creates a 'dependency' field on cb_data entries", {
+  # Structural guard: the per-outcome `dependency` scalar was the mechanism
+  # that used to leak residual_correlation into the fitting loop. It must
+  # stay deleted even though RC is now consumed at assembly time.
+  td <- generate_entropy_test_data(L = 2, seed = 11)
+  Y_list_mat <- lapply(td$Y_list, as.matrix)
+  
+  cb_data <- colocboost_init_data(
+    X = td$X_list, Y = Y_list_mat,
+    dict_YX = c(1, 2),
+    Z = NULL, LD = NULL, X_ref = NULL, ref_label = NULL,
+    N_sumstat = NULL, dict_sumstatLD = NULL,
+    Var_y = NULL, SeBhat = NULL,
+    keep_variables = lapply(td$X_list, colnames)
+  )
+  
+  expect_s3_class(cb_data, "colocboost")
+  for (i in seq_along(cb_data$data)) {
+    expect_false("dependency" %in% names(cb_data$data[[i]]),
+                 info = paste0("data element ", i, " still has a 'dependency' field"))
+  }
+})
+
+test_that("residual_correlation does NOT affect the core boosting loop (only assembly)", {
+  # The refactor keeps residual_correlation out of the fitting loop, so the
+  # fitted per-trait model objects (beta, profile_loglike_each, z) must be
+  # IDENTICAL with and without RC. Only the downstream integrated VCP is
+  # allowed to differ.
+  td <- generate_entropy_test_data(L = 2, seed = 13)
+  resid_cor <- matrix(c(1, 0.6, 0.6, 1), nrow = 2)
+  
+  suppressWarnings(suppressMessages({
+    res_no_rc <- colocboost(
+      X = td$X_list, Y = td$Y_list,
+      M = 20, output_level = 3
+    )
+    res_rc <- colocboost(
+      X = td$X_list, Y = td$Y_list,
+      residual_correlation = resid_cor,
+      M = 20, output_level = 3
+    )
+  }))
+  
+  cb_no <- res_no_rc$diagnostic_details$cb_model
+  cb_rc <- res_rc$diagnostic_details$cb_model
+  expect_equal(length(cb_no), length(cb_rc))
+  
+  for (i in seq_along(cb_no)) {
+    expect_equal(cb_no[[i]]$beta, cb_rc[[i]]$beta, tolerance = 1e-12,
+                 info = paste0("beta differs at outcome ", i))
+    expect_equal(cb_no[[i]]$profile_loglike_each,
+                 cb_rc[[i]]$profile_loglike_each,
+                 tolerance = 1e-12,
+                 info = paste0("profile_loglike_each differs at outcome ", i))
+    expect_equal(cb_no[[i]]$z, cb_rc[[i]]$z, tolerance = 1e-12,
+                 info = paste0("z differs at outcome ", i))
+  }
+})
+
+test_that("identity residual_correlation matches NULL residual_correlation end-to-end", {
+  # RC = I should produce VCP identical to RC = NULL because the GLS alpha
+  # collapses to uniform 1/L for an identity matrix.
+  td <- generate_entropy_test_data(L = 3, seed = 29)
+  
+  suppressWarnings(suppressMessages({
+    res_null <- colocboost(
+      X = td$X_list, Y = td$Y_list,
+      M = 20, output_level = 2
+    )
+    res_I <- colocboost(
+      X = td$X_list, Y = td$Y_list,
+      residual_correlation = diag(3),
+      M = 20, output_level = 2
+    )
+  }))
+  
+  if (!is.null(res_null$vcp) && !is.null(res_I$vcp)) {
+    expect_equal(as.numeric(res_null$vcp), as.numeric(res_I$vcp),
+                 tolerance = 1e-10)
+  }
+  expect_equal(
+    names(res_null$cos_details$cos$cos_index),
+    names(res_I$cos_details$cos$cos_index)
+  )
+})
+
+test_that("residual_correlation is stored on cb_model_para via diagnostic_details", {
+  td <- generate_entropy_test_data(L = 2, seed = 31)
+  RC <- matrix(c(1, 0.35, 0.35, 1), nrow = 2)
+  
+  suppressWarnings(suppressMessages({
+    res <- colocboost(
+      X = td$X_list, Y = td$Y_list,
+      residual_correlation = RC,
+      M = 10, output_level = 3
+    )
+  }))
+  
+  expect_true("diagnostic_details" %in% names(res))
+  stored <- res$diagnostic_details$cb_model_para$residual_correlation
+  expect_false(is.null(stored))
+  expect_equal(stored, RC)
+})
+
+test_that("residual_correlation composes with use_entropy = TRUE", {
+  td <- generate_entropy_test_data(L = 3, seed = 19)
+  resid_cor <- diag(3)
+  resid_cor[1, 2] <- resid_cor[2, 1] <- 0.3
+  
+  expect_error(
+    suppressWarnings(suppressMessages({
+      res <- colocboost(
+        X = td$X_list, Y = td$Y_list,
+        residual_correlation = resid_cor,
+        use_entropy = TRUE,
+        M = 15, output_level = 2
+      )
+    })),
+    NA
+  )
+  expect_s3_class(res, "colocboost")
+  expect_equal(res$data_info$n_outcomes, 3)
+})
+
+test_that("get_robust_colocalization accepts and propagates residual_correlation", {
+  td <- generate_entropy_test_data(L = 3, seed = 37)
+  RC <- matrix(0.2, 3, 3); diag(RC) <- 1
+  
+  suppressWarnings(suppressMessages({
+    res <- colocboost(
+      X = td$X_list, Y = td$Y_list,
+      residual_correlation = RC,
+      M = 20, output_level = 2
+    )
+  }))
+  
+  expect_error(
+    suppressMessages(
+      filtered <- get_robust_colocalization(
+        res,
+        cos_npc_cutoff = 0.2,
+        npc_outcome_cutoff = 0.1,
+        residual_correlation = RC
+      )
+    ),
+    NA
+  )
+  expect_s3_class(filtered, "colocboost")
+})
+
+test_that("residual_correlation end-to-end runs with non-identity RC and L=4", {
+  td <- generate_entropy_test_data(L = 4, seed = 41)
+  RC <- matrix(0.3, 4, 4); diag(RC) <- 1
+  
+  suppressWarnings(suppressMessages({
+    res <- colocboost(
+      X = td$X_list, Y = td$Y_list,
+      residual_correlation = RC,
+      use_entropy = TRUE,
+      M = 20, output_level = 3
+    )
+  }))
+  
+  expect_s3_class(res, "colocboost")
+  expect_equal(res$data_info$n_outcomes, 4)
+  expect_equal(res$diagnostic_details$cb_model_para$residual_correlation, RC)
+})
\ No newline at end of file
diff --git a/tests/testthat/test_Xref.R b/tests/testthat/test_Xref.R
new file mode 100644
index 0000000..5d30abc
--- /dev/null
+++ b/tests/testthat/test_Xref.R
@@ -0,0 +1,758 @@
+library(testthat)
+
+# ============================================================================
+# Shared test data generators for X_ref tests
+# ============================================================================
+
+generate_xref_test_data <- function(n = 200, n_ref = 50, p = 30, L = 2, seed = 42) {
+  set.seed(seed)
+  
+  # Generate X with LD structure (for individual-level truth)
+  sigma <- matrix(0, p, p)
+  for (i in 1:p) {
+    for (j in 1:p) {
+      sigma[i, j] <- 0.9^abs(i - j)
+    }
+  }
+  X <- MASS::mvrnorm(n, rep(0, p), sigma)
+  colnames(X) <- paste0("SNP", 1:p)
+  
+  # Generate X_ref (reference panel, separate samples)
+  X_ref <- MASS::mvrnorm(n_ref, rep(0, p), sigma)
+  colnames(X_ref) <- paste0("SNP", 1:p)
+  
+  # Generate true effects
+  true_beta <- matrix(0, p, L)
+  true_beta[5, 1] <- 0.7   # SNP5 affects trait 1
+  true_beta[5, 2] <- 0.6   # SNP5 also affects trait 2 (colocalized)
+  true_beta[20, 2] <- 0.5  # SNP20 only affects trait 2
+  
+  # Generate Y with some noise
+  Y <- matrix(0, n, L)
+  for (l in 1:L) {
+    Y[, l] <- X %*% true_beta[, l] + rnorm(n, 0, 1)
+  }
+  
+  # Calculate summary statistics
+  beta <- matrix(0, p, L)
+  se <- matrix(0, p, L)
+  for (i in 1:L) {
+    for (j in 1:p) {
+      fit <- summary(lm(Y[, i] ~ X[, j]))$coef
+      if (nrow(fit) == 2) {
+        beta[j, i] <- fit[2, 1]
+        se[j, i] <- fit[2, 2]
+      }
+    }
+  }
+  
+  sumstat_list <- lapply(1:L, function(i) {
+    data.frame(
+      beta = beta[, i],
+      sebeta = se[, i],
+      n = n,
+      variant = colnames(X)
+    )
+  })
+  
+  LD <- cor(X)
+  
+  list(
+    X = X,
+    Y = Y,
+    X_ref = X_ref,
+    LD = LD,
+    sumstat = sumstat_list,
+    true_beta = true_beta
+  )
+}
+
+
+# ============================================================================
+# Test 1: colocboost rejects LD + X_ref together
+# ============================================================================
+
+test_that("colocboost rejects when both LD and X_ref are provided", {
+  test_data <- generate_xref_test_data()
+  
+  expect_warning(
+    result <- colocboost(
+      sumstat = test_data$sumstat,
+      LD = test_data$LD,
+      X_ref = test_data$X_ref,
+      M = 1
+    ),
+    "Provide either LD or X_ref, not both"
+  )
+  expect_null(result)
+})
+
+
+# ============================================================================
+# Test 2: X_ref with N_ref >= P precomputes LD
+# ============================================================================
+
+test_that("X_ref with N_ref >= P precomputes LD and produces valid results", {
+  # N_ref = 50, P = 30 => N_ref >= P, should precompute LD
+  test_data <- generate_xref_test_data(n_ref = 50, p = 30)
+  
+  expect_message(
+    suppressWarnings({
+      result_xref <- colocboost(
+        sumstat = test_data$sumstat,
+        X_ref = test_data$X_ref,
+        M = 10,
+        output_level = 2
+      )
+    }),
+    "N_ref >= P: precomputing LD from X_ref"
+  )
+  
+  expect_s3_class(result_xref, "colocboost")
+  expect_equal(result_xref$data_info$n_outcomes, 2)
+  expect_equal(length(result_xref$data_info$variables), 30)
+})
+
+
+# ============================================================================
+# Test 3: X_ref with N_ref < P keeps X_ref for on-the-fly computation
+# ============================================================================
+
+test_that("X_ref with N_ref < P runs correctly without precomputing LD", {
+  # N_ref = 20, P = 30 => N_ref < P, should keep X_ref
+  test_data <- generate_xref_test_data(n_ref = 50, p = 30)
+  
+  suppressWarnings(suppressMessages({
+    result_xref <- colocboost(
+      sumstat = test_data$sumstat,
+      X_ref = test_data$X_ref,
+      M = 10,
+      output_level = 2
+    )
+  }))
+  
+  expect_s3_class(result_xref, "colocboost")
+  expect_equal(result_xref$data_info$n_outcomes, 2)
+  expect_equal(length(result_xref$data_info$variables), 30)
+})
+
+
+# ============================================================================
+# Test 4: X_ref results are consistent with LD results (N_ref >= P case)
+# ============================================================================
+
+test_that("X_ref (N_ref >= P) produces results consistent with precomputed LD", {
+  test_data <- generate_xref_test_data(n = 200, n_ref = 200, p = 20, seed = 99)
+  
+  # Run with precomputed LD
+  suppressWarnings(suppressMessages({
+    result_ld <- colocboost(
+      sumstat = test_data$sumstat,
+      LD = test_data$LD,
+      M = 50,
+      output_level = 2
+    )
+  }))
+  
+  # Run with X_ref (same samples as LD source, so N_ref >= P => precomputes LD)
+  suppressWarnings(suppressMessages({
+    result_xref <- colocboost(
+      sumstat = test_data$sumstat,
+      X_ref = test_data$X_ref,
+      M = 50,
+      output_level = 2
+    )
+  }))
+  
+  # Both should produce valid colocboost objects
+  expect_s3_class(result_ld, "colocboost")
+  expect_s3_class(result_xref, "colocboost")
+  
+  # Same number of outcomes and variables
+  expect_equal(result_ld$data_info$n_outcomes, result_xref$data_info$n_outcomes)
+  expect_equal(length(result_ld$data_info$variables), length(result_xref$data_info$variables))
+})
+
+
+# ============================================================================
+# Test 5: X_ref with N_ref < P and one iteration (one_causal mode)
+# ============================================================================
+
+test_that("X_ref with N_ref < P works in one-causal mode (M=1 with LD)", {
+  test_data <- generate_xref_test_data(n_ref = 15, p = 30)
+  
+  suppressWarnings(suppressMessages({
+    result <- colocboost(
+      sumstat = test_data$sumstat,
+      X_ref = test_data$X_ref,
+      M = 1,
+      output_level = 2
+    )
+  }))
+  
+  expect_s3_class(result, "colocboost")
+  expect_equal(result$model_info$model_coveraged, "one_causal")
+  expect_equal(result$model_info$n_updates, 1)
+})
+
+
+# ============================================================================
+# Test 6: X_ref with 3 outcomes
+# ============================================================================
+
+test_that("X_ref works with 3 outcomes", {
+  set.seed(123)
+  n <- 200
+  n_ref <- 25
+  p <- 25
+  L <- 3
+  
+  sigma <- 0.9^abs(outer(1:p, 1:p, "-"))
+  X <- MASS::mvrnorm(n, rep(0, p), sigma)
+  colnames(X) <- paste0("SNP", 1:p)
+  X_ref <- MASS::mvrnorm(n_ref, rep(0, p), sigma)
+  colnames(X_ref) <- paste0("SNP", 1:p)
+  
+  true_beta <- matrix(0, p, L)
+  true_beta[5, 1] <- 0.6
+  true_beta[5, 2] <- 0.5
+  true_beta[15, 3] <- 0.7
+  
+  Y <- matrix(0, n, L)
+  for (l in 1:L) {
+    Y[, l] <- X %*% true_beta[, l] + rnorm(n, 0, 1)
+  }
+  
+  beta <- se <- matrix(0, p, L)
+  for (i in 1:L) {
+    for (j in 1:p) {
+      fit <- summary(lm(Y[, i] ~ X[, j]))$coef
+      if (nrow(fit) == 2) { beta[j, i] <- fit[2, 1]; se[j, i] <- fit[2, 2] }
+    }
+  }
+  sumstat <- lapply(1:L, function(i) data.frame(beta = beta[, i], sebeta = se[, i], n = n, variant = colnames(X)))
+  
+  suppressWarnings(suppressMessages({
+    result <- colocboost(
+      sumstat = sumstat,
+      X_ref = X_ref,
+      M = 10,
+      output_level = 2
+    )
+  }))
+  
+  expect_s3_class(result, "colocboost")
+  expect_equal(result$data_info$n_outcomes, 3)
+})
+
+
+# ============================================================================
+# Test 7: X_ref with focal outcome
+# ============================================================================
+
+test_that("X_ref works with focal outcome", {
+  test_data <- generate_xref_test_data(n_ref = 20, p = 30)
+  
+  suppressWarnings(suppressMessages({
+    result <- colocboost(
+      sumstat = test_data$sumstat,
+      X_ref = test_data$X_ref,
+      focal_outcome_idx = 1,
+      M = 10,
+      output_level = 2
+    )
+  }))
+  
+  expect_s3_class(result, "colocboost")
+  expect_equal(result$data_info$outcome_info$is_focal[1], TRUE)
+  expect_equal(result$data_info$outcome_info$is_focal[2], FALSE)
+})
+
+
+# ============================================================================
+# Test 8: X_ref with dict_sumstatLD mapping
+# ============================================================================
+
+test_that("X_ref works with dict_sumstatLD mapping for shared reference panel", {
+  test_data <- generate_xref_test_data(n_ref = 20, p = 30)
+  
+  # Both sumstats share the same X_ref
+  suppressWarnings(suppressMessages({
+    result <- colocboost(
+      sumstat = test_data$sumstat,
+      X_ref = list(test_data$X_ref),
+      dict_sumstatLD = matrix(c(1, 1, 2, 1), ncol = 2),
+      M = 10,
+      output_level = 2
+    )
+  }))
+  
+  expect_s3_class(result, "colocboost")
+  expect_equal(result$data_info$n_outcomes, 2)
+})
+
+
+# ============================================================================
+# Test 9: X_ref with multiple reference panels
+# ============================================================================
+
+test_that("X_ref works with multiple reference panels", {
+  test_data <- generate_xref_test_data(n_ref = 20, p = 30)
+  
+  # Two separate X_ref panels (identical for testing)
+  suppressWarnings(suppressMessages({
+    result <- colocboost(
+      sumstat = test_data$sumstat,
+      X_ref = list(test_data$X_ref, test_data$X_ref),
+      M = 10,
+      output_level = 2
+    )
+  }))
+  
+  expect_s3_class(result, "colocboost")
+  expect_equal(result$data_info$n_outcomes, 2)
+})
+
+
+# ============================================================================
+# Test 10: X_ref with missing variants (partial overlap)
+# ============================================================================
+
+test_that("X_ref handles partial variant overlap with sumstat", {
+  test_data <- generate_xref_test_data(n_ref = 20, p = 30)
+  
+  # X_ref has fewer variants than sumstat
+  X_ref_partial <- test_data$X_ref[, 1:25]  # Only first 25 of 30 SNPs
+  
+  suppressWarnings(suppressMessages({
+    result <- colocboost(
+      sumstat = test_data$sumstat,
+      X_ref = X_ref_partial,
+      M = 10,
+      output_level = 2
+    )
+  }))
+  
+  expect_s3_class(result, "colocboost")
+  # Should have 25 variables (intersection)
+  expect_equal(length(result$data_info$variables), 25)
+})
+
+
+# ============================================================================
+# Test 11: compute_XtX_product correctness
+# ============================================================================
+
+test_that("compute_XtX_product produces correct results for all ref_label types", {
+  # Access internal function
+  compute_XtX_product <- get("compute_XtX_product", envir = asNamespace("colocboost"))
+  
+  set.seed(42)
+  n_ref <- 50
+  p <- 20
+  
+  sigma <- 0.9^abs(outer(1:p, 1:p, "-"))
+  X_ref_raw <- MASS::mvrnorm(n_ref, rep(0, p), sigma)
+  
+  # Standardize X_ref (as done in colocboost_validate_input_data)
+  X_ref <- Rfast::standardise(X_ref_raw, center = TRUE, scale = TRUE)
+  X_ref[is.na(X_ref)] <- 0
+  
+  # Precompute LD from X_ref
+  LD <- get_cormat(X_ref_raw)
+  
+  beta <- rnorm(p)
+  
+  # Test LD case
+  result_ld <- compute_XtX_product(LD, beta, ref_label = "LD")
+  expected_ld <- as.vector(LD %*% beta)
+  expect_equal(result_ld, expected_ld)
+  
+  # Test X_ref case
+  result_xref <- compute_XtX_product(X_ref, beta, ref_label = "X_ref")
+  # Should approximate LD %*% beta
+  # X_ref' * X_ref / (N_ref - 1) ≈ LD
+  expected_xref <- as.vector(crossprod(X_ref, X_ref %*% beta) / (n_ref - 1))
+  expect_equal(result_xref, expected_xref)
+  
+  # Test No_ref case
+  result_noref <- compute_XtX_product(1, beta, ref_label = "No_ref")
+  expect_equal(result_noref, beta)
+  
+  # X_ref and LD results should be similar (not exact due to finite N_ref)
+  expect_equal(result_xref, expected_ld, tolerance = 0.3)
+})
+
+
+# ============================================================================
+# Test 12: X_ref validation in colocboost_validate_input_data
+# ============================================================================
+
+test_that("colocboost_validate_input_data correctly handles X_ref", {
+  test_data <- generate_xref_test_data(n_ref = 20, p = 30)
+  
+  # Test N_ref < P case
+  validated <- colocboost_validate_input_data(
+    sumstat = test_data$sumstat,
+    X_ref = test_data$X_ref
+  )
+  
+  expect_equal(validated$ref_label, "X_ref")
+  expect_true(!is.null(validated$X_ref))
+  expect_null(validated$LD)
+  
+  # Test N_ref >= P case
+  test_data_large <- generate_xref_test_data(n_ref = 50, p = 30)
+  
+  validated_large <- suppressMessages(
+    colocboost_validate_input_data(
+      sumstat = test_data_large$sumstat,
+      X_ref = test_data_large$X_ref
+    )
+  )
+  
+  expect_equal(validated_large$ref_label, "LD")
+  expect_null(validated_large$X_ref)
+  expect_true(!is.null(validated_large$LD))
+})
+
+
+# ============================================================================
+# Test 13: X_ref with output_level 3 (diagnostic details)
+# ============================================================================
+
+test_that("X_ref works with output_level 3 and has correct ref_label in cb_data", {
+  test_data <- generate_xref_test_data(n_ref = 20, p = 30)
+  
+  suppressWarnings(suppressMessages({
+    result <- colocboost(
+      sumstat = test_data$sumstat,
+      X_ref = test_data$X_ref,
+      M = 10,
+      output_level = 3
+    )
+  }))
+  
+  expect_s3_class(result, "colocboost")
+  expect_true("diagnostic_details" %in% names(result))
+  
+  # Check that ref_label is correctly stored in the model's cb_data
+  cb_data <- result$diagnostic_details$cb_data
+  for (i in seq_along(cb_data$data)) {
+    ref_label_i <- cb_data$data[[i]]$ref_label
+    expect_true(!is.null(ref_label_i),
+                info = paste("ref_label missing for data element", i))
+    expect_true(ref_label_i %in% c("X_ref", "LD", "No_ref", "individual"),
+                info = paste("Invalid ref_label for data element", i))
+  }
+})
+
+
+# ============================================================================
+# Test 14: X_ref with XtX_beta_cache in diagnostic output
+# ============================================================================
+
+test_that("X_ref model has XtX_beta_cache in diagnostic output", {
+  test_data <- generate_xref_test_data(n_ref = 20, p = 30)
+  
+  suppressWarnings(suppressMessages({
+    result <- colocboost(
+      sumstat = test_data$sumstat,
+      X_ref = test_data$X_ref,
+      M = 10,
+      output_level = 3
+    )
+  }))
+  
+  cb_model <- result$diagnostic_details$cb_model
+  for (i in seq_along(cb_model)) {
+    expect_true("XtX_beta_cache" %in% names(cb_model[[i]]),
+                info = paste("XtX_beta_cache missing for outcome", i))
+  }
+})
+
+
+# ============================================================================
+# Test 15: X_ref purity functions work correctly
+# ============================================================================
+
+test_that("purity functions dispatch correctly for X_ref", {
+  get_purity <- get("get_purity", envir = asNamespace("colocboost"))
+  get_between_purity <- get("get_between_purity", envir = asNamespace("colocboost"))
+  
+  set.seed(42)
+  n_ref <- 50
+  p <- 20
+  
+  sigma <- 0.9^abs(outer(1:p, 1:p, "-"))
+  X_ref <- MASS::mvrnorm(n_ref, rep(0, p), sigma)
+  X_ref <- Rfast::standardise(X_ref, center = TRUE, scale = TRUE)
+  X_ref[is.na(X_ref)] <- 0
+  
+  LD <- cor(MASS::mvrnorm(200, rep(0, p), sigma))
+  
+  # Test get_purity with X_ref
+  purity_xref <- get_purity(c(1, 2, 3), Xcorr = X_ref, ref_label = "X_ref")
+  expect_length(purity_xref, 3)
+  expect_true(all(purity_xref >= 0 & purity_xref <= 1))
+  
+  # Test get_purity with LD
+  purity_ld <- get_purity(c(1, 2, 3), Xcorr = LD, ref_label = "LD")
+  expect_length(purity_ld, 3)
+  expect_true(all(purity_ld >= 0 & purity_ld <= 1))
+  
+  # Test get_purity with No_ref
+  purity_noref <- get_purity(c(1, 2, 3), Xcorr = 1, ref_label = "No_ref")
+  expect_equal(purity_noref, c(0, 0, 0))
+  
+  # Results should be similar between X_ref and LD (not exact due to different N)
+  expect_equal(purity_xref[1], purity_ld[1], tolerance = 0.3)
+  
+  # Test get_between_purity with X_ref
+  between_xref <- get_between_purity(c(1, 2), c(3, 4), Xcorr = X_ref,
+                                     miss_idx = NULL, P = p, ref_label = "X_ref")
+  expect_length(between_xref, 3)
+  expect_true(all(between_xref >= 0 & between_xref <= 1))
+  
+  # Test get_between_purity with LD
+  between_ld <- get_between_purity(c(1, 2), c(3, 4), Xcorr = LD,
+                                   miss_idx = NULL, P = p, ref_label = "LD")
+  expect_length(between_ld, 3)
+  
+  # Test get_between_purity with No_ref
+  between_noref <- get_between_purity(c(1, 2), c(3, 4), Xcorr = 1,
+                                      miss_idx = NULL, P = p, ref_label = "No_ref")
+  expect_equal(between_noref, c(0, 0, 0))
+})
+
+
+# ============================================================================
+# Test 16: X_ref with get_robust_colocalization post-processing
+# ============================================================================
+
+test_that("get_robust_colocalization works with X_ref results", {
+  test_data <- generate_xref_test_data(n_ref = 20, p = 30)
+  
+  suppressWarnings(suppressMessages({
+    result <- colocboost(
+      sumstat = test_data$sumstat,
+      X_ref = test_data$X_ref,
+      M = 10,
+      output_level = 2
+    )
+  }))
+  
+  # Should not error
+  expect_error(
+    suppressMessages(
+      filtered <- get_robust_colocalization(result, cos_npc_cutoff = 0.2, npc_outcome_cutoff = 0.1)
+    ),
+    NA
+  )
+  expect_s3_class(filtered, "colocboost")
+})
+
+
+# ============================================================================
+# Test 17: X_ref with get_robust_ucos post-processing
+# ============================================================================
+
+test_that("get_robust_ucos works with X_ref results", {
+  test_data <- generate_xref_test_data(n_ref = 500, p = 30)
+  
+  suppressWarnings(suppressMessages({
+    result <- colocboost(
+      sumstat = test_data$sumstat,
+      X_ref = test_data$X_ref,
+      M = 10,
+      output_level = 2
+    )
+  }))
+  
+  skip_if(is.null(result$ucos_details), "No ucos detected")
+  
+  expect_error(
+    suppressMessages(
+      filtered <- get_robust_ucos(result, npc_outcome_cutoff = 0.1)
+    ),
+    NA
+  )
+  expect_s3_class(filtered, "colocboost")
+})
+
+
+# ============================================================================
+# Test 18: X_ref plotting works
+# ============================================================================
+
+test_that("colocboost_plot works with X_ref results", {
+  test_data <- generate_xref_test_data(n_ref = 20, p = 30)
+  
+  suppressWarnings(suppressMessages({
+    result <- colocboost(
+      sumstat = test_data$sumstat,
+      X_ref = test_data$X_ref,
+      M = 10,
+      output_level = 2
+    )
+  }))
+  
+  expect_error(suppressWarnings(colocboost_plot(result)), NA)
+  expect_error(suppressWarnings(colocboost_plot(result, y = "vcp")), NA)
+})
+
+
+# ============================================================================
+# Test 19: X_ref with get_colocboost_summary
+# ============================================================================
+
+test_that("get_colocboost_summary works with X_ref results", {
+  test_data <- generate_xref_test_data(n_ref = 20, p = 30)
+  
+  suppressWarnings(suppressMessages({
+    result <- colocboost(
+      sumstat = test_data$sumstat,
+      X_ref = test_data$X_ref,
+      M = 10,
+      output_level = 2
+    )
+  }))
+  
+  summary1 <- get_colocboost_summary(result, summary_level = 1)
+  expect_type(summary1, "list")
+  
+  summary2 <- get_colocboost_summary(result, summary_level = 2)
+  expect_type(summary2, "list")
+})
+
+
+# ============================================================================
+# Test 20: X_ref with HyPrColoc format input
+# ============================================================================
+
+test_that("X_ref works with effect_est and effect_se input", {
+  test_data <- generate_xref_test_data(n_ref = 20, p = 30)
+  
+  beta <- se <- matrix(0, 30, 2)
+  for (i in 1:2) {
+    for (j in 1:30) {
+      fit <- summary(lm(test_data$Y[, i] ~ test_data$X[, j]))$coef
+      if (nrow(fit) == 2) { beta[j, i] <- fit[2, 1]; se[j, i] <- fit[2, 2] }
+    }
+  }
+  rownames(beta) <- rownames(se) <- colnames(test_data$X)
+  
+  suppressWarnings(suppressMessages({
+    result <- colocboost(
+      effect_est = beta,
+      effect_se = se,
+      effect_n = rep(200, 2),
+      X_ref = test_data$X_ref,
+      M = 10,
+      output_level = 2
+    )
+  }))
+  
+  expect_s3_class(result, "colocboost")
+  expect_equal(result$data_info$n_outcomes, 2)
+})
+
+
+# ============================================================================
+# Test 21: LD_jk functions dispatch correctly for X_ref
+# ============================================================================
+
+test_that("get_LD_jk and get_LD_jk1_jk2 dispatch correctly for X_ref", {
+  get_LD_jk <- get("get_LD_jk", envir = asNamespace("colocboost"))
+  get_LD_jk1_jk2 <- get("get_LD_jk1_jk2", envir = asNamespace("colocboost"))
+  
+  set.seed(42)
+  n_ref <- 50
+  p <- 20
+  
+  sigma <- 0.9^abs(outer(1:p, 1:p, "-"))
+  X_ref <- MASS::mvrnorm(n_ref, rep(0, p), sigma)
+  X_ref <- Rfast::standardise(X_ref, center = TRUE, scale = TRUE)
+  X_ref[is.na(X_ref)] <- 0
+  
+  LD <- cor(MASS::mvrnorm(200, rep(0, p), sigma))
+  
+  remain_idx <- 1:p
+  
+  # get_LD_jk with X_ref
+  ld_jk_xref <- get_LD_jk(5, XtX = X_ref, remain_idx = remain_idx, P = p, ref_label = "X_ref")
+  expect_length(ld_jk_xref, p)
+  expect_equal(ld_jk_xref[5], 1.0, tolerance = 0.01)  # Self-correlation ~ 1
+  
+  # get_LD_jk with LD
+  ld_jk_ld <- get_LD_jk(5, XtX = LD, remain_idx = remain_idx, P = p, ref_label = "LD")
+  expect_length(ld_jk_ld, p)
+  expect_equal(ld_jk_ld[5], 1.0, tolerance = 0.01)
+  
+  # get_LD_jk with No_ref
+  ld_jk_noref <- get_LD_jk(5, XtX = 1, remain_idx = remain_idx, P = p, ref_label = "No_ref")
+  expect_true(all(ld_jk_noref[remain_idx] == 1))
+  
+  # get_LD_jk1_jk2 with X_ref
+  ld_pair_xref <- get_LD_jk1_jk2(1, 2, XtX = X_ref, remain_jk = remain_idx, ref_label = "X_ref")
+  expect_true(abs(ld_pair_xref) <= 1)
+  expect_true(abs(ld_pair_xref) > 0.5)  # Adjacent SNPs with 0.9 LD should be high
+  
+  # get_LD_jk1_jk2 with LD
+  ld_pair_ld <- get_LD_jk1_jk2(1, 2, XtX = LD, remain_jk = remain_idx, ref_label = "LD")
+  expect_true(abs(ld_pair_ld) <= 1)
+  
+  # get_LD_jk1_jk2 with No_ref
+  ld_pair_noref <- get_LD_jk1_jk2(1, 2, XtX = 1, remain_jk = remain_idx, ref_label = "No_ref")
+  expect_equal(ld_pair_noref, 0)
+})
+
+
+# ============================================================================
+# Test 22: ref_label is never NULL in internal cb_data after processing
+# ============================================================================
+
+test_that("ref_label is always explicitly set in cb_data, never NULL", {
+  test_data <- generate_xref_test_data(n_ref = 20, p = 30)
+  
+  # Build cb_data directly to inspect ref_label
+  sumstat <- test_data$sumstat
+  Z_list <- lapply(sumstat, function(s) s$beta / s$sebeta)
+  
+  # X_ref case
+  cb_data_xref <- colocboost_init_data(
+    X = NULL, Y = NULL, dict_YX = NULL,
+    Z = Z_list, LD = NULL, X_ref = list(test_data$X_ref), ref_label = "X_ref",
+    N_sumstat = lapply(sumstat, function(s) s$n[1]),
+    dict_sumstatLD = c(1, 1), Var_y = NULL, SeBhat = NULL,
+    keep_variables = lapply(sumstat, function(s) s$variant)
+  )
+  for (i in seq_along(cb_data_xref$data)) {
+    expect_equal(cb_data_xref$data[[i]]$ref_label, "X_ref")
+  }
+  
+  # LD case
+  cb_data_ld <- colocboost_init_data(
+    X = NULL, Y = NULL, dict_YX = NULL,
+    Z = Z_list, LD = list(test_data$LD), X_ref = NULL, ref_label = "LD",
+    N_sumstat = lapply(sumstat, function(s) s$n[1]),
+    dict_sumstatLD = c(1, 1), Var_y = NULL, SeBhat = NULL,
+    keep_variables = lapply(sumstat, function(s) s$variant)
+  )
+  for (i in seq_along(cb_data_ld$data)) {
+    expect_equal(cb_data_ld$data[[i]]$ref_label, "LD")
+  }
+  
+  # Individual data case
+  Y_list <- lapply(1:2, function(l) as.matrix(test_data$Y[, l]))
+  X_list <- list(test_data$X, test_data$X)
+  cb_data_ind <- colocboost_init_data(
+    X = X_list, Y = Y_list, dict_YX = c(1, 2),
+    Z = NULL, LD = NULL, X_ref = NULL, ref_label = NULL,
+    N_sumstat = NULL, dict_sumstatLD = NULL, Var_y = NULL, SeBhat = NULL,
+    keep_variables = lapply(X_list, colnames)
+  )
+  for (i in seq_along(cb_data_ind$data)) {
+    expect_equal(cb_data_ind$data[[i]]$ref_label, "individual")
+  }
+})
\ No newline at end of file
diff --git a/tests/testthat/test_inference.R b/tests/testthat/test_inference.R
index 9c2c9d0..1b23c0f 100644
--- a/tests/testthat/test_inference.R
+++ b/tests/testthat/test_inference.R
@@ -790,6 +790,8 @@ generate_test_cb_obj_with_ucos <- function(n = 100, p = 20, L = 2, seed = 42) {
     dict_YX = c(1, 2),
     Z = NULL,
     LD = NULL,
+    X_ref = NULL,
+    ref_label = NULL,
     N_sumstat = NULL,
     dict_sumstatLD = NULL,
     Var_y = NULL,
@@ -935,6 +937,8 @@ test_that("get_ucos_evidence handles individual-level data", {
     dict_YX = c(1, 2),
     Z = NULL,
     LD = NULL,
+    X_ref = NULL,
+    ref_label = NULL,
     N_sumstat = NULL,
     dict_sumstatLD = NULL,
     Var_y = NULL,
@@ -1040,6 +1044,8 @@ test_that("get_ucos_evidence handles summary statistics data", {
     dict_YX = NULL,
     Z = Z_list,
     LD = list(LD_matrix),
+    X_ref = NULL,
+    ref_label = "LD",
     N_sumstat = lapply(sumstat_list, function(s) s$n[1]),
     dict_sumstatLD = c(1, 1),
     Var_y = NULL,
diff --git a/tests/testthat/test_model.R b/tests/testthat/test_model.R
index df998e8..fa28c1d 100644
--- a/tests/testthat/test_model.R
+++ b/tests/testthat/test_model.R
@@ -237,6 +237,7 @@ test_that("colocboost correctly maps focal outcome to keep_variables with dict_k
         dict_YX = dict_YX,
         Z = lapply(sumstat_list, function(s) s$beta / s$sebeta),  # z-scores
         LD = list(LD_superset),
+        ref_label = "LD",
         N_sumstat = lapply(sumstat_list, function(s) s$n[1]),
         dict_sumstatLD = dict_sumstatLD,
         Var_y = NULL,
diff --git a/tests/testthat/test_utils.R b/tests/testthat/test_utils.R
index fb1b999..e62fcf9 100644
--- a/tests/testthat/test_utils.R
+++ b/tests/testthat/test_utils.R
@@ -571,6 +571,7 @@ test_that("colocboost_init_data handles complex dictionary mappings", {
         dict_YX = dict_YX,
         Z = Z_list,
         LD = LD_list,
+        ref_label = "LD",
         N_sumstat = N_sumstat,
         dict_sumstatLD = dict_sumstatLD,
         Var_y = NULL,
diff --git a/vignettes/Input_Data_Format.Rmd b/vignettes/Input_Data_Format.Rmd
index d1deba3..9a7aca5 100644
--- a/vignettes/Input_Data_Format.Rmd
+++ b/vignettes/Input_Data_Format.Rmd
@@ -59,6 +59,8 @@ head(Sumstat_5traits$sumstat[[1]])
 
 - `LD` is a matrix of LD. This matrix does not need to contain the exact same variants as in `sumstat`, but the `colnames` and `rownames` of `LD` should include the `variant` names for proper alignment.
 
+- `X_ref` (alternative to `LD`) is a reference panel genotype matrix (N_ref x P). When the number of variants is large, passing `X_ref` directly avoids storing the full P x P LD matrix. See [Summary Statistics Colocalization](https://statfungen.github.io/colocboost/articles/Summary_Level_Colocalization.html) for details.
+
 The input format for multiple traits is similar, but `sumstat` should be a list of data frames `sumstat = list(sumstat1, sumstat2, sumstat3)`. 
 The flexibility of input format for multiple traits is as follows (see detailed usage with different input formats, 
 refer to [Summary Statistics Colocalization](https://statfungen.github.io/colocboost/articles/Summary_Level_Colocalization.html)):
diff --git a/vignettes/Partial_Overlap_Variants.Rmd b/vignettes/Partial_Overlap_Variants.Rmd
index e179fa4..b74d7f3 100644
--- a/vignettes/Partial_Overlap_Variants.Rmd
+++ b/vignettes/Partial_Overlap_Variants.Rmd
@@ -22,7 +22,7 @@ This vignette demonstrates how ColocBoost handles partial overlapping variants a
 library(colocboost)
 ```
 
-![](../man/figures/missing_representation.png)
+![Illustration of partial overlapping variants across traits](../man/figures/missing_representation.png)
 
 
 
diff --git a/vignettes/Summary_Statistics_Colocalization.Rmd b/vignettes/Summary_Statistics_Colocalization.Rmd
index 3177a68..2dc127a 100644
--- a/vignettes/Summary_Statistics_Colocalization.Rmd
+++ b/vignettes/Summary_Statistics_Colocalization.Rmd
@@ -177,7 +177,31 @@ res$cos_details$cos$cos_index
 ```
 
 
-## 3.4. HyPrColoc compatible format: effect size and standard error matrices
+## 3.4. Using a reference panel genotype matrix (X_ref) instead of LD
+
+When the number of variants P is very large, storing the full P x P LD matrix may be infeasible.
+If you have the reference panel genotype matrix from which LD would be computed, you can pass it directly via `X_ref`.
+ColocBoost will compute LD products on the fly, avoiding the P x P memory cost.
+
+This is beneficial when the reference panel sample size (N_ref) is less than the number of variants (P).
+When N_ref >= P, ColocBoost automatically precomputes the LD matrix internally for efficiency.
+
+Provide either `LD` or `X_ref`, not both. The `dict_sumstatLD` dictionary works with `X_ref` the same way as with `LD`.
+
+```{r x-ref-example}
+# Use genotype matrix directly as reference panel
+data("Ind_5traits")
+X_ref <- Ind_5traits$X[[1]]
+
+# Run colocboost with X_ref instead of LD
+res <- colocboost(sumstat = Sumstat_5traits$sumstat, X_ref = X_ref)
+
+# Identified CoS
+res$cos_details$cos$cos_index
+```
+
+
+## 3.5. HyPrColoc compatible format: effect size and standard error matrices
 
 ColocBoost also provides a flexibility to use HyPrColoc compatible format for summary statistics with and without LD matrix.