From 3778e16319141a8905cf7fbb9223bd7fca538b80 Mon Sep 17 00:00:00 2001 From: Marvin Taterra <115582964+MarvinTaterra@users.noreply.github.com> Date: Sun, 27 Jul 2025 21:00:48 +0200 Subject: [PATCH] Clustering now runs super fast MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit before : O(P² × L² × D) after: O(P² × L²) P - paths (500) L - average pathway lengths D - Number of Close Residue Pairs for average systems 100x speed ups --- mdpath/src/cluster.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/mdpath/src/cluster.py b/mdpath/src/cluster.py index ad43673..874d99b 100644 --- a/mdpath/src/cluster.py +++ b/mdpath/src/cluster.py @@ -37,6 +37,8 @@ def __init__( self, df_close_res: pd.DataFrame, pathways: list, num_processes: int ) -> None: self.df = df_close_res + self.close_pairs_set = set(zip(df_close_res['Residue1'], df_close_res['Residue2'])) | \ + set(zip(df_close_res['Residue2'], df_close_res['Residue1'])) self.pathways = pathways self.num_processes = num_processes self.overlapp_df = self.calculate_overlap_parallel() @@ -58,13 +60,7 @@ def calculate_overlap_for_pathway(self, args: tuple) -> list: overlap_counter = 0 for res1 in path1: for res2 in path2: - if ( - (self.df["Residue1"] == res1) - & (self.df["Residue2"] == res2) - ).any() or ( - (self.df["Residue1"] == res2) - & (self.df["Residue2"] == res1) - ).any(): + if (res1, res2) in self.close_pairs_set: overlap_counter += 1 result.append( {"Pathway1": i, "Pathway2": j, "Overlap": overlap_counter}