From 33f1fb10b5cc2c6b7f0aaa87ef7225feb3e4f09c Mon Sep 17 00:00:00 2001 From: Mitchell Anthony Wood Date: Mon, 3 Nov 2025 14:08:17 -0700 Subject: [PATCH 1/6] adding wasserstein metric as a loss function --- GRSlib/io/sections/scoring.py | 3 + GRSlib/motion/create.py | 14 ++-- GRSlib/motion/lossfunc/moments.py | 4 +- GRSlib/motion/lossfunc/wasserstein.py | 104 ++++++++++++++++++++++++++ GRSlib/motion/motion.py | 6 +- GRSlib/motion/scoring_factory.py | 1 + 6 files changed, 120 insertions(+), 12 deletions(-) create mode 100644 GRSlib/motion/lossfunc/wasserstein.py diff --git a/GRSlib/io/sections/scoring.py b/GRSlib/io/sections/scoring.py index fc46a5f..bbba5bb 100644 --- a/GRSlib/io/sections/scoring.py +++ b/GRSlib/io/sections/scoring.py @@ -22,6 +22,9 @@ def __init__(self, name, config, pt,infile, args): #Score reduction if exact moment value is matched self.smartmask = self.get_value("SCORING", "smartmask", 0, interpreter="int") #Use a subset of descriptors of given count + elif self.score_type == "wasserstein": + self.smartmask = self.get_value("SCORING", "smartmask", 0, interpreter="int") + #Use a subset of descriptors of given count elif self.score_type == "entropy": self.internal_entropy = self.get_value("SCORING", "internal_entropy", 1.0) diff --git a/GRSlib/motion/create.py b/GRSlib/motion/create.py index ee9b027..6b643a5 100644 --- a/GRSlib/motion/create.py +++ b/GRSlib/motion/create.py @@ -92,13 +92,13 @@ def from_template(self,*args): # print("Starting population using provided template") population = [] duplicate = self.convert.lammps_to_ase(args[0][0]) - for remaining in range(self.config.sections["GENETIC"].population_size): - new_candidate = ASETools.get_random_pos(duplicate, self.config.sections["GENETIC"].min_atoms, - self.config.sections["GENETIC"].max_atoms, self.config.sections["BASIS"].elements[0]) - population.append(new_candidate) + for candidate in range(self.config.sections["GENETIC"].population_size): + population.append(duplicate) +# for remaining in range(self.config.sections["GENETIC"].population_size-int(self.config.sections["GENETIC"].population_size/2)): +# new_candidate = ASETools.get_random_pos(duplicate, self.config.sections["GENETIC"].min_atoms, +# self.config.sections["GENETIC"].max_atoms, self.config.sections["BASIS"].elements[0]) +# population.append(new_candidate) -# for candidate in range(self.config.sections["GENETIC"].population_size): -# population.append(duplicate) return population def from_lattice(self,*args): @@ -109,7 +109,7 @@ def from_lattice(self,*args): def from_random(self,*args): #More of a super function that will call a bunch of the ones below - #print("Starting population of random low energy structures of provided elements") +# print("Starting population of random low energy structures of provided elements") population = [] # From types, find cell num_ele = len(self.config.sections["BASIS"].elements) diff --git a/GRSlib/motion/lossfunc/moments.py b/GRSlib/motion/lossfunc/moments.py index ceb5a87..149315d 100644 --- a/GRSlib/motion/lossfunc/moments.py +++ b/GRSlib/motion/lossfunc/moments.py @@ -34,14 +34,14 @@ def __call__(self, *args): energy[0] = self.config.sections["SCORING"].strength_target*score #Scaled score (energy) between current and target forces = self.grad_loss(current_desc, self.target_desc) #Forces between current and target beta[:,:]= 0 - beta[:,self.mask] = self.config.sections["SCORING"].strength_target*forces #Scaled forces between current and target + beta[:,:] = self.config.sections["SCORING"].strength_target*forces #Scaled forces between current and target #TODO Explain score = self.construct_loss(self.prior_desc, self.target_desc) # energy[0] += self.config.sections["SCORING"].strength_prior*score #Scaled score (energy) between current and prior # print(" Target, Prior Scores: ", energy[0], score) forces = self.grad_loss(current_desc, self.prior_desc) #Forces between current and prior structures - beta[:,self.mask] += self.config.sections["SCORING"].strength_prior*forces #Scaled forces between current and prior + beta[:,:] += self.config.sections["SCORING"].strength_prior*forces #Scaled forces between current and prior elif self.mode=="update": self.update(args) diff --git a/GRSlib/motion/lossfunc/wasserstein.py b/GRSlib/motion/lossfunc/wasserstein.py new file mode 100644 index 0000000..fe3509e --- /dev/null +++ b/GRSlib/motion/lossfunc/wasserstein.py @@ -0,0 +1,104 @@ +from GRSlib.motion.scoring import Scoring +import jax.numpy as jnp +import numpy as np +import scipy as sp +from scipy.stats import wasserstein_distance +from jax import grad, jit +from functools import partial + +class Wasserstein(Scoring): + def __init__(self, *args): #pt, config, target_desc, prior_desc): + self.pt, self.config, descriptors = args + self.target_desc = descriptors.get('target',None).copy() + self.prior_desc = descriptors.get('prior',None).copy() + self.n_descriptors = np.shape(self.target_desc)[1] + self.mask = list(range(self.n_descriptors)) + self.n_params = 1 #Variables LAMMPS needs to know about + self.n_elements = self.config.sections['BASIS'].numtypes #Variables LAMMPS needs to know about + self.loss_ff_grad = grad(self.construct_loss) + self.grad_loss = grad(self.construct_loss) + self.mode = "update" + + def __call__(self, *args): + #The arguments that this function brings in are super improtant and are expected by LAMMPS MLIAP package. + #They are (elems, current_desc, beta, energy) + #Integer values of LAMMPS atom types are in elems + #Descriptors as a per-atom array into current_desc. + #Per-atom forces are expected for beta + #Per-atom energy is expected for energy, need to do some testing if per-atom values can be reported back. + if self.mode=="score": + elems, current_desc, beta, energy = args + self.n_atoms = np.shape(current_desc)[0] + + #TODO Explain + score = self.construct_loss(current_desc, self.target_desc) + energy[:] = 0 + energy[0] = self.config.sections["SCORING"].strength_target*score #Scaled score (energy) between current and target + forces = self.grad_loss(current_desc, self.target_desc) #Forces between current and target + beta[:,:]= 0 + beta[:,:] = self.config.sections["SCORING"].strength_target*forces #Scaled forces between current and target + + #TODO Explain + score = self.construct_loss(self.prior_desc, self.target_desc) +# energy[0] += self.config.sections["SCORING"].strength_prior*score #Scaled score (energy) between current and prior +# print(" Target, Prior Scores: ", energy[0], score) + forces = self.grad_loss(current_desc, self.prior_desc) #Forces between current and prior structures + beta[:,:] += self.config.sections["SCORING"].strength_prior*forces #Scaled forces between current and prior + + elif self.mode=="update": + self.update(args) + beta = self.grad_loss(self.target_desc, self.target_desc) + + + def set_mode_update(self): + self.mode="update" + + def set_mode_score(self): + self.mode="score" + + def update(self,*args): + pt, config, descriptors = args[0] + self.target_desc = descriptors.get('target',None).copy() + self.prior_desc = descriptors.get('prior',None).copy() + self.n_descriptors = np.shape(self.target_desc)[1] + if self.config.sections["SCORING"].smartmask > 0: + indices = [] + target_std = np.std(self.target_desc, axis=0) + nmax_var = len(target_std) - int(np.ceil(self.config.sections["SCORING"].smartmask/2)) + nmin_var = int(np.floor(self.config.sections["SCORING"].smartmask/2)) + list_max_var = sorted(target_std,key=lambda x: x)[nmax_var:] + list_min_var = sorted(target_std,key=lambda x: x)[:nmin_var] + for val in list_max_var: + indices.append(np.where(target_std==val)[0][0]) + for val in list_min_var: + indices.append(np.where(target_std==val)[0][0]) + + self.mask = np.zeros(len(target_std), dtype=int) + self.mask[indices] = 1 + + else: + self.mask = np.zeros(self.n_descriptors, dtype=int) + 1 + + + if self.n_elements > 1: + self.current_desc = self.current_desc.flatten() + self.target_desc = self.target_desc.flatten() + self.prior_desc = self.prior_desc.flatten() + self.mode = "score" + + @partial(jit, static_argnums=(0,)) + def construct_loss(self, current_desc, target_desc): + #This needs to be a dynamic call like is done in descriptor calcs + num_descriptors = jnp.shape(self.target_desc)[1] + earthmover = 0.0 + for dimension in range(num_descriptors): + curr_max, curr_min = jnp.amax(current_desc[:,dimension], axis=0),jnp.amin(current_desc[:,dimension], axis=0) + target_max, target_min = jnp.amax(target_desc[:,dimension], axis=0),jnp.amin(target_desc[:,dimension], axis=0) + low_limit = jnp.min(curr_min, target_min) + high_limit = jnp.max(curr_max, target_max) + histo_target, edges = jnp.histogram(target_desc[:,dimension], bins=100, range=(low_limit,high_limit), density=True) + histo_curr, edges = jnp.histogram(current_desc[:,dimension], bins=100, range=(low_limit,high_limit), density=True) + earthmover += wasserstein_distance(edges[:-1], edges[:-1], histo_target, histo_curr) + return earthmover + + diff --git a/GRSlib/motion/motion.py b/GRSlib/motion/motion.py index 0bcc842..88ef318 100644 --- a/GRSlib/motion/motion.py +++ b/GRSlib/motion/motion.py @@ -32,7 +32,7 @@ def fire_min(self,data): min_style fire min_modify integrator eulerexplicit tmax 10.0 tmin 0.0 delaystep 5 dtgrow 1.1 dtshrink 0.5 alpha0 0.1 alphashrink 0.99 vdfmax 100000 halfstepback no initialdelay no dump 1 all custom 1 minimize_fire.dump id type x y z fx fy fz - displace_atoms all random 0.01 0.01 0.01 %s units box + displace_atoms all random 0.1 0.1 0.1 %s units box minimize 1e-6 1e-6 %s %s write_data %s_last.data""" % (np.random.randint(low=1, high=99999),self.config.sections['GRADIENT'].nsteps, self.config.sections['GRADIENT'].nsteps, self.config.sections['TARGET'].job_prefix) before_score, after_score = self.scoring.add_cmds_before_score(add_cmds,data) @@ -50,7 +50,7 @@ def line_min(self,data): min_style cg min_modify dmax 0.05 line quadratic dump 1 all custom 1 minimize_line.dump id type x y z fx fy fz - displace_atoms all random 0.01 0.01 0.01 %s units box + displace_atoms all random 0.1 0.1 0.1 %s units box minimize 1e-6 1e-6 %s %s write_data %s_last.data """ % (np.random.randint(low=1, high=99999),self.config.sections['GRADIENT'].nsteps, self.config.sections['GRADIENT'].nsteps, self.config.sections['TARGET'].job_prefix) @@ -70,7 +70,7 @@ def box_min(self,data): min_modify dmax 0.05 line quadratic dump 1 all custom 1 minimize_box.dump id type x y z fx fy fz fix box all box/relax iso 0.0 vmax 0.001 - displace_atoms all random 0.01 0.01 0.01 %s units box + displace_atoms all random 0.1 0.1 0.1 %s units box minimize 1e-6 1e-6 %s %s write_data %s_last.data""" % (np.random.randint(low=1, high=99999),self.config.sections['GRADIENT'].nsteps, self.config.sections['GRADIENT'].nsteps, self.config.sections['TARGET'].job_prefix) before_score, after_score = self.scoring.add_cmds_before_score(add_cmds,data) diff --git a/GRSlib/motion/scoring_factory.py b/GRSlib/motion/scoring_factory.py index f9276c0..41195a4 100644 --- a/GRSlib/motion/scoring_factory.py +++ b/GRSlib/motion/scoring_factory.py @@ -2,6 +2,7 @@ from GRSlib.motion.scoring import Scoring from GRSlib.motion.lossfunc.moments import Moments from GRSlib.motion.lossfunc.entropy import Entropy +from GRSlib.motion.lossfunc.wasserstein import Wasserstein # from GRSlib.motion.lossfunc.moments import * # Need to direct the scoring class to the appropiate loss function generator, this will connect From d9201db1edd74ce2ce72e6d8d370dfb40ac25873 Mon Sep 17 00:00:00 2001 From: Mitchell Anthony Wood Date: Tue, 2 Dec 2025 14:19:46 -0700 Subject: [PATCH 2/6] Stashing changes 2Dec25 --- GRSlib/io/sections/genetic.py | 12 ++++----- GRSlib/motion/create.py | 3 +-- GRSlib/motion/lossfunc/moments.py | 45 +++++++++++++++---------------- GRSlib/motion/motion.py | 3 ++- 4 files changed, 30 insertions(+), 33 deletions(-) diff --git a/GRSlib/io/sections/genetic.py b/GRSlib/io/sections/genetic.py index 613a0c0..19c39d3 100644 --- a/GRSlib/io/sections/genetic.py +++ b/GRSlib/io/sections/genetic.py @@ -11,15 +11,13 @@ def __init__(self, name, config, pt,infile, args): 'composition_constraint', 'composition', 'start_type', 'lattice_type', 'structure_template', 'frac_pop_per_ref', 'dev_per_ref','reference_phases'] self._check_section() self.mutation_rate = self.get_value("GENETIC", "mutation_rate", 0.5, interpreter="float") - self.frac_pop_per_ref = [float(k) for k in self.get_value("GENETIC", "frac_pop_per_ref", "1.0").split()] - self.dev_per_ref =[float(kk) for kk in self.get_value("GENETIC", "dev_per_ref", "0.2").split()] - self.reference_phases = self.get_value("GENETIC", "reference_phases", "bcc").split() + #self.reference_phases = self.get_value("GENETIC", "reference_phases", "bcc").split() self.mutation_types = self.get_value("GENETIC", "mutation_types", {"perturb": 0.5, "change_ele": 0.0, "atom_count" : 0.1, "volume" : 0.2, "minimize" : 0.2}, interpreter="dict") self.population_size = self.get_value("GENETIC", "population_size", 20, interpreter="int") self.ngenerations = self.get_value("GENETIC", "ngenerations", 10, interpreter="int") self.max_atoms = self.get_value("GENETIC", "max_atoms", 100, interpreter="int") self.min_atoms = self.get_value("GENETIC", "min_atoms", 10, interpreter="int") - self.reference_phases = self.get_value("GENETIC","reference_phases", "bcc").split() + #self.reference_phases = self.get_value("GENETIC","reference_phases", "bcc").split() self.max_length_aspect = self.get_value("GENETIC", "max_length_aspec", 3.0, interpreter="float") self.max_angle_aspect = self.get_value("GENETIC", "max_angle_aspec", 3.0, interpreter="float") self.density_ratio = self.get_value("GENETIC", "density_ratio", 1.3, interpreter="float") #This will allow for 30% changes in either direction of density @@ -34,7 +32,9 @@ def __init__(self, name, config, pt,infile, args): self.lattice_type = self.get_value("GENETIC", "lattice_type", None) elif self.start_type == "template": self.structure_template = self.get_value("GENETIC", "structure_template", None) -# elif self.start_type == "phases": - # self.reference_phases = self.get_value("GENETIC", "reference_phases", None, interpreter="list") + elif self.start_type == "phases": + self.reference_phases = self.get_value("GENETIC","reference_phases", "bcc").split() + self.frac_pop_per_ref = [float(k) for k in self.get_value("GENETIC", "frac_pop_per_ref", "1.0").split()] + self.dev_per_ref =[float(kk) for kk in self.get_value("GENETIC", "dev_per_ref", "0.2").split()] self.delete() diff --git a/GRSlib/motion/create.py b/GRSlib/motion/create.py index cd76b2e..945bdb4 100644 --- a/GRSlib/motion/create.py +++ b/GRSlib/motion/create.py @@ -111,8 +111,7 @@ def from_lattice(self,*args): def from_phases(self,*args): # similar to template but start from a known lattice verbose = False - #value to compress/expand cells by (see TODO below) - compex = 0.0 + compex = 0.0 #value to compress/expand cells by (see TODO below) population = [] references_to_try = self.config.sections["GENETIC"].reference_phases #= ['hpc','fcc','bcc'] pop_size = self.config.sections["GENETIC"].population_size diff --git a/GRSlib/motion/lossfunc/moments.py b/GRSlib/motion/lossfunc/moments.py index 86a4d28..2f30bd7 100644 --- a/GRSlib/motion/lossfunc/moments.py +++ b/GRSlib/motion/lossfunc/moments.py @@ -32,18 +32,27 @@ def __call__(self, *args): #TODO Explain score = self.construct_loss(current_desc, self.target_desc) + energy[:] = 0 - energy[0] = self.config.sections["SCORING"].strength_target*score #Scaled score (energy) between current and target forces = self.grad_loss(current_desc, self.target_desc) #Forces between current and target beta[:,:]= 0 - beta[:,:] = self.config.sections["SCORING"].strength_target*forces #Scaled forces between current and target + if self.config.sections['SCORING'].norm_by_numdesc == True: + energy[0] = self.config.sections["SCORING"].strength_target*score/self.n_descriptors #Scaled score (energy) between current and target + beta[:,:] = self.config.sections["SCORING"].strength_target*forces/self.n_descriptors #Scaled forces between current and target + else: + energy[0] = self.config.sections["SCORING"].strength_target*score + beta[:,:] = self.config.sections["SCORING"].strength_target*forces + #TODO Explain score = self.construct_loss(self.prior_desc, self.target_desc) # energy[0] += self.config.sections["SCORING"].strength_prior*score #Scaled score (energy) between current and prior # print(" Target, Prior Scores: ", energy[0], score) forces = self.grad_loss(current_desc, self.prior_desc) #Forces between current and prior structures - beta[:,:] += self.config.sections["SCORING"].strength_prior*forces #Scaled forces between current and prior + if self.divide_by_numdesc: + beta[:,:] += self.config.sections["SCORING"].strength_prior*forces/self.n_descriptors #Scaled forces between current and prior + else: + beta[:,:] += self.config.sections["SCORING"].strength_prior*forces elif self.mode=="update": self.update(args) @@ -125,13 +134,10 @@ def first_moment(self, current_desc, target_desc): current_avg = jnp.average(current_desc, axis=0)*self.mask target_avg = jnp.average(target_desc, axis=0)*self.mask tst_residual = jnp.sum(jnp.nan_to_num(jnp.abs(current_avg-target_avg))) - tst_residual_av = jnp.average(jnp.nan_to_num(jnp.abs(current_avg-target_avg))) +# tst_residual_av = jnp.average(jnp.nan_to_num(jnp.abs(current_avg-target_avg))) is_zero = jnp.array(jnp.isclose(tst_residual,jnp.zeros(tst_residual.shape)),dtype=int) bonus = -jnp.sum(is_zero*(float(self.config.sections['SCORING'].moments_bonus[0]))) - if self.divide_by_numdesc: - tst_residual_final = tst_residual_av*float(self.config.sections['SCORING'].moments_coeff[0]) + bonus #MAE + bonus - else: - tst_residual_final = tst_residual*float(self.config.sections['SCORING'].moments_coeff[0]) + bonus #MAE + bonus + tst_residual_final = tst_residual*float(self.config.sections['SCORING'].moments_coeff[0]) + bonus #MAE + bonus return tst_residual_final @partial(jit, static_argnums=(0,)) @@ -139,13 +145,10 @@ def second_moment(self, current_desc, target_desc): current_std = jnp.std(current_desc, axis=0)*self.mask target_std = jnp.std(target_desc, axis=0)*self.mask tst_residual = jnp.sum(jnp.nan_to_num(jnp.abs(current_std-target_std))) - tst_residual_av = jnp.average(jnp.nan_to_num(jnp.abs(current_std-target_std))) +# tst_residual_av = jnp.average(jnp.nan_to_num(jnp.abs(current_std-target_std))) is_zero = jnp.array(jnp.isclose(tst_residual,jnp.zeros(tst_residual.shape)),dtype=int) bonus = -jnp.sum(is_zero*float(self.config.sections['SCORING'].moments_bonus[1])) - if self.divide_by_numdesc: - tst_residual_final = tst_residual_av*float(self.config.sections['SCORING'].moments_coeff[1]) + bonus #MAE + bonus - else: - tst_residual_final = tst_residual*float(self.config.sections['SCORING'].moments_coeff[1]) + bonus #MAE + bonus + tst_residual_final = tst_residual*float(self.config.sections['SCORING'].moments_coeff[1]) + bonus #MAE + bonus return tst_residual_final @partial(jit, static_argnums=(0,)) @@ -162,13 +165,10 @@ def third_moment(self, current_desc, target_desc): target_skew = 3.0*(target_avg-target_med)/target_std tst_residual = jnp.sum(jnp.nan_to_num(jnp.abs(current_skew-target_skew))) - tst_residual_av = jnp.average(jnp.nan_to_num(jnp.abs(current_skew-target_skew))) +# tst_residual_av = jnp.average(jnp.nan_to_num(jnp.abs(current_skew-target_skew))) is_zero = jnp.array(jnp.isclose(tst_residual,jnp.zeros(tst_residual.shape)),dtype=int) - bonus = -jnp.sum(is_zero*float(self.config.sections['SCORING'].moments_bonus[2])) - if self.divide_by_numdesc: - tst_residual_final = tst_residual_av*float(self.config.sections['SCORING'].moments_coeff[2]) + bonus #MAE + bonus - else: - tst_residual_final = tst_residual*float(self.config.sections['SCORING'].moments_coeff[2]) + bonus #MAE + bonus + bonus = -jnp.sum(is_zero*float(self.config.sections['SCORING'].moments_bonus[2])) + tst_residual_final = tst_residual*float(self.config.sections['SCORING'].moments_coeff[2]) + bonus #MAE + bonus return tst_residual_final @partial(jit, static_argnums=(0,)) @@ -183,11 +183,8 @@ def fourth_moment(self, current_desc, target_desc): target_kurt = jnp.average(((target_desc-target_avg)/target_std)**4.0)-3.0 tst_residual = jnp.sum(jnp.nan_to_num(jnp.abs(current_kurt-target_kurt))) - tst_residual_av = jnp.average(jnp.nan_to_num(jnp.abs(current_kurt-target_kurt))) +# tst_residual_av = jnp.average(jnp.nan_to_num(jnp.abs(current_kurt-target_kurt))) is_zero = jnp.array(jnp.isclose(tst_residual,jnp.zeros(tst_residual.shape)),dtype=int) bonus = -jnp.sum(is_zero*float(self.config.sections['SCORING'].moments_bonus[3])) - if self.divide_by_numdesc: - tst_residual_final = tst_residual_av*float(self.config.sections['SCORING'].moments_coeff[3]) + bonus #MAE + bonus - else: - tst_residual_final = tst_residual*float(self.config.sections['SCORING'].moments_coeff[3]) + bonus #MAE + bonus + tst_residual_final = tst_residual*float(self.config.sections['SCORING'].moments_coeff[3]) + bonus #MAE + bonus return tst_residual_final diff --git a/GRSlib/motion/motion.py b/GRSlib/motion/motion.py index 092684d..23a8dfb 100644 --- a/GRSlib/motion/motion.py +++ b/GRSlib/motion/motion.py @@ -49,6 +49,7 @@ def line_min(self,data,tole=0.0,tolf=1.e-10,dmax=0.5): min_style cg min_modify dmax %2.2f line quadratic dump 1 all custom 1 minimize_line.dump id type x y z fx fy fz + displace_atoms all random 0.01 0.01 0.01 12345 units box minimize %1.1E %1.1E %s %s write_data %s_last.data """ % (dmax,tole,tolf, self.config.sections['GRADIENT'].nsteps, self.config.sections['GRADIENT'].nsteps, self.config.sections['TARGET'].job_prefix) @@ -261,7 +262,7 @@ def unique_selection(self, data): #scores.append([iteration, candidate, file_name, self.scoring.get_score(lammps_data)]) score_win = self.scoring.get_score(self.convert.ase_to_lammps(atoms_winner,file_name +'-win')) score_conv = self.scoring.get_score(lammps_data) - print('score comp', candidate, len(batch), score_conv, score_win) + #print('score comp', candidate, len(batch), score_conv, score_win) #TODO help james understand why the 'winner' score not the lowest candidate score in line below # winner is now lowest score #print('score comp', candidate, len(batch), score_conv, score_win) From 0df220f4c9c002c62f06626962e6cb3dc33b8410 Mon Sep 17 00:00:00 2001 From: Mitchell Anthony Wood Date: Tue, 2 Dec 2025 14:21:08 -0700 Subject: [PATCH 3/6] stashing more changes 2Dec25 --- testing/BCCtoFCC.py | 84 +++++++++++++++++--------------------- testing/EnsembleScoring.py | 13 ++++-- 2 files changed, 46 insertions(+), 51 deletions(-) diff --git a/testing/BCCtoFCC.py b/testing/BCCtoFCC.py index 0098775..eab7aa5 100644 --- a/testing/BCCtoFCC.py +++ b/testing/BCCtoFCC.py @@ -14,12 +14,14 @@ "descriptor": "ACE", "numTypes": 1, "elements": "W", #Needs to be a dict? - "rcutfac": 5.5, - "lambda": 1.4, - "ranks": "1 2 3", - "lmax": "0 0 0", - "lmin": "0 0 0", - "nmax": "8 1 1", + "rcutfac": 4.276275664, + "lambda": 0.7075106628, + "rcinner": 0.00, + "drcinner": 0.01, + "ranks": "1 2 3 4", + "lmax": "0 3 3 0", + "lmin": "0 0 0 0", + "nmax": "8 6 4 2", "nmaxbase": 8, "bzeroflag": 0 }, @@ -29,13 +31,14 @@ "strength_target": 1.0, "strength_prior": 0.0, "moments": "mean stdev" , - "moments_coeff": "1.0 0.01", + "moments_coeff": "1.0 0.0", "moments_bonus": "0 0" , + "norm_by_numdesc": True, }, "TARGET": { "target_fname": "fcc.data", - "target_fdesc": "fcc.npy", +# "target_fdesc": "fcc.npy", "start_fname": "bcc.data", "prior_fdesc": "prior.npy", "job_prefix": "BCCtoFCC" @@ -46,15 +49,18 @@ "ml_strength": 1.0, "nsteps": 1000, "temperature": 100.0, - "min_type": "temp" + "min_type": "box" }, "GENETIC": { - "start_type": "random", #Can be random or template right now. If template, starting generation is ["TARGET"].start_fname + "start_type": "phases", #Can be random or template right now. If template, starting generation is ["TARGET"].start_fname + "reference_phases": "bcc", + "frac_pop_per_ref": "1.0", + "dev_per_ref": "0.2", "mutation_rate": 0.5, - "mutation_types": {"perturb": 0.3, "change_ele": 0.0, "atom_count" : 0.30, "volume" : 0.30, "minimize" : 0.05, "ortho_cell" : 0.05}, + "mutation_types": {"perturb": 0.0, "change_ele": 0.0, "atom_count" : 0.00, "volume" : 1.00, "minimize" : 0.00, "ortho_cell" : 0.00}, "population_size": 100, - "ngenerations": 10, + "ngenerations": 2, "max_atoms": 50, "min_atoms": 10, "max_length_aspect": 2.0, @@ -67,8 +73,23 @@ grs = GRS(settings,comm=comm) tourny_winners = [] + score = grs.get_score(settings["TARGET"]["start_fname"]) -print(" Starting Score:",score) +grs.config.sections['GRADIENT'].min_type = "line" +line_relaxed_struct = grs.gradient_move(settings["TARGET"]["start_fname"]) +line_score = grs.get_score(line_relaxed_struct) +grs.config.sections['GRADIENT'].min_type = "temp" +temp_relaxed_struct = grs.gradient_move(settings["TARGET"]["start_fname"]) +temp_score = grs.get_score(temp_relaxed_struct) +grs.config.sections['GRADIENT'].min_type = "box" +box_relaxed_struct = grs.gradient_move(settings["TARGET"]["start_fname"]) +box_score = grs.get_score(box_relaxed_struct) + +a=np.load('BCCtoFCC_last.npy') +print("Desc_Count:",np.shape(a)[1]) +print("Scores (Starting, box, line, temp):",score,box_score,line_score,temp_score) + +exit() scores, best_struct = grs.genetic_move(settings["TARGET"]["target_fname"]) @@ -78,38 +99,7 @@ shutil.move(best_struct, renamed_best) tourny_winners.append(renamed_best) -print("Setting priors with:",tourny_winners) -grs.set_prior(tourny_winners) -grs.config.sections['SCORING'].strength_target = 0.0 -grs.config.sections['SCORING'].strength_prior = 1.0 -#grs.config.view_state('SCORING') -score = grs.get_score(renamed_best) -print(" Score wrt Priors (best candidates of each gen):",score) - -grs.config.sections['GENETIC'].start_type = "template" - -for i in range(10): - for file in glob.glob(grs.config.sections['TARGET'].job_prefix + "_Cand*Gen*"): - if file not in [row[2] for row in tourny_winners]: - os.remove(file) - grs.config.sections['SCORING'].strength_target = 1.0 - grs.config.sections['SCORING'].strength_prior = 0.0 - - scores, best_struct = grs.genetic_move(renamed_best) - - score = grs.get_score(best_struct) - print(" Best Score:",score, "from",best_struct) - renamed_best = "Winner-%s_"%i+best_struct - shutil.move(best_struct, renamed_best) - tourny_winners.append(renamed_best) - - print(" Ending Score:",score, "from",renamed_best) - - print("Setting priors with:",tourny_winners) - grs.set_prior(tourny_winners) - grs.config.sections['SCORING'].strength_target = 0.0 - grs.config.sections['SCORING'].strength_prior = 1.0 - #grs.config.view_state('SCORING') - score = grs.get_score(renamed_best) - print(" Score wrt Priors (best candidates of each gen):",score) +for file in glob.glob(grs.config.sections['TARGET'].job_prefix + "_Cand*Gen*"): + if file not in [row[2] for row in tourny_winners]: + os.remove(file) diff --git a/testing/EnsembleScoring.py b/testing/EnsembleScoring.py index 6c3a45f..ce7b130 100644 --- a/testing/EnsembleScoring.py +++ b/testing/EnsembleScoring.py @@ -25,12 +25,9 @@ }, "SCORING": { - "score_type": "moments", + "score_type": "wasserstein", "strength_target": 1.0, "strength_prior": 0.0, - "moments": "mean stdev" , - "moments_coeff": "1.0 0.01", - "moments_bonus": "0 0" , "smartmask": 0 }, "TARGET": @@ -73,3 +70,11 @@ #grs.set_prior([settings["TARGET"]["start_fname"]]) ensemble = grs.get_ensemble_score('native') print("Ensemble Score of Prior wrt Target:",ensemble) +updated_struct = settings["TARGET"]["start_fname"] +updated_struct = grs.gradient_move(updated_struct) +updated_struct = grs.update_start(updated_struct,"MinScore") +grs.set_prior(glob.glob(settings['TARGET']["job_prefix"]+"*.data")) +score = grs.get_score(updated_struct) +print(" Ending Score:",score) +ensemble = grs.get_ensemble_score('native') +print("Ensemble Score of Prior wrt Target:",ensemble) From e49850a5ac8738f3644741688b91d463daaa1c79 Mon Sep 17 00:00:00 2001 From: Mitchell Anthony Wood Date: Tue, 9 Dec 2025 16:02:53 -0700 Subject: [PATCH 4/6] Fixed Tourny selection and scoring issue with multielements --- GRSlib/GRS.py | 4 +- GRSlib/basis/construct.py | 4 +- GRSlib/converters/convert.py | 2 +- GRSlib/converters/sections/lammps_ace.py | 26 +++++++--- GRSlib/converters/sections/lammps_base.py | 3 +- GRSlib/io/sections/basis.py | 12 ++--- GRSlib/motion/create.py | 4 +- GRSlib/motion/genetic_moves/moves.py | 2 +- GRSlib/motion/lossfunc/moments.py | 18 +++---- GRSlib/motion/motion.py | 58 +++++++++++++++-------- multi_element/gen_SQS_reproduce.py | 15 +++--- 11 files changed, 84 insertions(+), 64 deletions(-) diff --git a/GRSlib/GRS.py b/GRSlib/GRS.py index a553e96..6a27bb6 100644 --- a/GRSlib/GRS.py +++ b/GRSlib/GRS.py @@ -149,14 +149,12 @@ def get_score(self,data): between file types (xyz=lammps-data, ase.Atoms, etc) """ #Pass data to, and do something with the functs of scoring - #if self.config.sections['TARGET'].target_fname is None: - #print("Provided target descriptors superceed target data file") try: self.descriptors['target'] = np.load(self.config.sections['TARGET'].target_fdesc) + print("Provided target descriptors superceed target data file") except: self.descriptors['target'] = self.convert_to_desc(self.config.sections['TARGET'].target_fname) self.descriptors['current'] = self.convert_to_desc(data) - try: # If prior is empty when getting a score, set to starting structure. if self.descriptors.get('prior',None)==None: self.set_prior([self.config.sections['TARGET'].start_fname]) diff --git a/GRSlib/basis/construct.py b/GRSlib/basis/construct.py index 5c75f3a..260de77 100644 --- a/GRSlib/basis/construct.py +++ b/GRSlib/basis/construct.py @@ -24,7 +24,7 @@ def make_ACE_functions(elements,ranks,lmax,lmin,nmax): # radial function hyperparameters are defined per bond type: bonds = [p for p in itertools.product(elements,elements)] - print('bonds',bonds) + #print('bonds',bonds) rc_range,rc_default,lmb_default,rcin_default = get_default_settings(elements,nshell=2,return_range=True,apply_shift=False) rcutfac = [float(k) for k in rc_default.split()[2:]] lmbda = [float(k) for k in lmb_default.split()[2:]] @@ -66,6 +66,6 @@ def make_ACE_functions(elements,ranks,lmax,lmin,nmax): store_generalized(ccs, coupling_type='wig',L_R=L_R) Apot = AcePot(elements,reference_ens,ranks,nmax,lmax,nradbase,rcutfac,lmbda,rcinner,drcinner,lmin=lmin, **{'ccs':ccs[M_R]}) Apot.write_pot('coupling_coefficients') - print(Apot.nus) + #print(Apot.nus) except ModuleNotFoundError: raise ModuleNotFoundError("Need to have fitsnap3 module in your python path to automatically generate ACE bases") diff --git a/GRSlib/converters/convert.py b/GRSlib/converters/convert.py index c39d9bd..992e918 100644 --- a/GRSlib/converters/convert.py +++ b/GRSlib/converters/convert.py @@ -35,7 +35,7 @@ def lammps_to_ase(self,data): ase_data = read(data,format='lammps-data',Z_of_type=Z_of_type) except: ase_data = read(data+".lammps-data",format='lammps-data',Z_of_type=Z_of_type) - print('in GRSlib/converters/convert.py lammps_to_ase ',ase_data,types) + #print('in GRSlib/converters/convert.py lammps_to_ase ',ase_data,types) return ase_data def lammps_ace(self,data): diff --git a/GRSlib/converters/sections/lammps_ace.py b/GRSlib/converters/sections/lammps_ace.py index bcb3dd9..35fd299 100644 --- a/GRSlib/converters/sections/lammps_ace.py +++ b/GRSlib/converters/sections/lammps_ace.py @@ -40,7 +40,10 @@ def run_lammps_single(self,data): def _collect_lammps_single(self): num_atoms = self._lmp.extract_global("natoms") - num_types = self.config.sections['BASIS'].numtypes + try: + types = self._lmp.numpy.extract_atom(name="type", nelem=num_atoms).ravel() + except: + types = self._lmp.numpy.extract_atom_iarray(name="type", nelem=num_atoms).ravel() with open('coupling_coefficients.yace','r') as readcoeff: lines = readcoeff.readlines() elemline = [line for line in lines if 'elements' in line][0] @@ -51,18 +54,27 @@ def _collect_lammps_single(self): elems = elemstr4.split() nelements = len(elems) desclines = [line for line in lines if 'mu0' in line] + +# if nelements != int(len(set(types))): +# print("Target structure doesnt have the full number of desired element types") - ncols_pace = int(len(desclines)/nelements) +1 - #ncols_pace = int(len(desclines)/nelements) + nelements #with bzero? +# ncols_pace = int(len(desclines)/nelements) +1 + ncols_pace = int(len(desclines)) + 1 #need to bring in full columns and reassign based on element type nrows_pace = num_atoms lmp_pace = _extract_compute_np(self._lmp, "pace", 0, 2, (nrows_pace, ncols_pace)) - if (np.isinf(lmp_pace)).any() or (np.isnan(lmp_pace)).any(): self.pt.single_print('WARNING! Applying np.nan_to_num()') lmp_pace = np.nan_to_num(lmp_pace) if (np.isinf(lmp_pace)).any() or (np.isnan(lmp_pace)).any(): raise ValueError('Nan in computed data of file') -# print("Got descriptors, returning") -# print(np.shape(lmp_pace)) - return lmp_pace + ndesc = int(len(desclines)/nelements) + if nelements > 1: + for atoms in range(num_atoms): + try: + perelem_desc= np.r_[[lmp_pace[atoms,(types[atoms]-1)*ndesc:(types[atoms])*ndesc]], perelem_desc] + except: + perelem_desc = [lmp_pace[atoms,(types[atoms]-1)*ndesc:(types[atoms])*ndesc]] + return perelem_desc + else: + return lmp_pace diff --git a/GRSlib/converters/sections/lammps_base.py b/GRSlib/converters/sections/lammps_base.py index 4e39eb3..5110db3 100644 --- a/GRSlib/converters/sections/lammps_base.py +++ b/GRSlib/converters/sections/lammps_base.py @@ -114,6 +114,7 @@ def _extract_compute_np(lmp, name, compute_style, result_type, array_shape): if array_shape is None: array_np = lmp.numpy.extract_compute(name,compute_style, result_type) + return array_np else: ptr = lmp.extract_compute(name, compute_style, result_type) if result_type == 0: @@ -130,7 +131,7 @@ def _extract_compute_np(lmp, name, compute_style, result_type, array_shape): array_np.shape = array_shape reshaped_array_np = np.delete(array_np, np.s_[-1:], axis=1) #LAMMPS is returning an extra descriptor at the end of the list, deleting by reshaping - return reshaped_array_np + return reshaped_array_np def _extract_commands(string): return [x for x in string.splitlines() if x.strip() != ''] diff --git a/GRSlib/io/sections/basis.py b/GRSlib/io/sections/basis.py index 4c64953..0a1cc4b 100644 --- a/GRSlib/io/sections/basis.py +++ b/GRSlib/io/sections/basis.py @@ -34,8 +34,6 @@ def __init__(self, name, config, pt, infile, args): self.rcinner = self.get_value("BASIS","rcinner",'0.0').split() self.drcinner = self.get_value("BASIS","drcinner",'0.01').split() self.elements = self.get_value("BASIS", "elements", "H").split() - print('elements',self.elements) - print('rcinner',self.rcinner) self.mumax = len(self.elements) self.numtypes = len(self.elements) #self.erefs = self.get_value("ACE", "erefs", "0.0").split() @@ -115,10 +113,10 @@ def _generate_b_list(self): nus.sort(key = lambda x : mu0s[nus_unsort.index(x)],reverse = False) byattyp = srt_by_attyp(nus) #config.nus = [item for sublist in list(byattyp.values()) for item in sublist] - print('byatttyp',byattyp) - print('num type',self.numtypes) +# print('byatttyp',byattyp) +# print('num type',self.numtypes) for atype in range(self.numtypes): - print('atype',atype) +# print('atype',atype) nus = byattyp[str(atype)] for nu in nus: i += 1 @@ -146,7 +144,7 @@ def decorated_write_couple(): bondinds=range(len(self.elements)) bonds = [b for b in itertools.product(bondinds,bondinds)] bondstrs = ['[%d, %d]' % b for b in bonds] - print('bond strs basis.py',bondstrs) +# print('bond strs basis.py',bondstrs) assert len(self.lmbda) == len(bondstrs), "must provide rc, lambda, for each BOND type" assert len(self.rcutfac) == len(bondstrs), "must provide rc, lambda, for each BOND type" if len(self.lmbda) == 1: @@ -186,7 +184,7 @@ def decorated_write_couple(): #store them for later so they don't need to be recalculated store_generalized(ccs, coupling_type='wig',L_R=L_R) - print('rcinner vals basis.py', rcinnervals) +# print('rcinner vals basis.py', rcinnervals) apot = AcePot(self.elements, reference_ens, [int(k) for k in self.ranks], [int(k) for k in self.nmax], [int(k) for k in self.lmax], self.nmaxbase, rcvals, lmbdavals, rcinnervals, drcinnervals, [int(k) for k in self.lmin], self.b_basis, **{'ccs':ccs[M_R]}) apot.write_pot('coupling_coefficients') diff --git a/GRSlib/motion/create.py b/GRSlib/motion/create.py index 4fcc86a..6878c74 100644 --- a/GRSlib/motion/create.py +++ b/GRSlib/motion/create.py @@ -94,9 +94,9 @@ def from_template(self,*args): random_shuffle = True #More of a super function that will call a bunch of the ones below # print("Starting population using provided template") - population = [] duplicate = self.convert.lammps_to_ase(args[0][0]) - for candidate in range(self.config.sections["GENETIC"].population_size): + population = [duplicate] + for candidate in range(self.config.sections["GENETIC"].population_size-1): tmp_atoms = duplicate.copy() if random_shuffle: duplicate_syms = [atom.symbol for atom in tmp_atoms] diff --git a/GRSlib/motion/genetic_moves/moves.py b/GRSlib/motion/genetic_moves/moves.py index 3038189..ca6841e 100644 --- a/GRSlib/motion/genetic_moves/moves.py +++ b/GRSlib/motion/genetic_moves/moves.py @@ -142,7 +142,7 @@ def change_ele(atoms,config): ele_counts = list(ele_counts_dct.values()) le_cond = any([icomp < target_comp[ii] - pm_frac for ii,icomp in enumerate(ele_counts)]) ge_cond = any([icomp > target_comp[ii] + pm_frac for ii,icomp in enumerate(ele_counts)]) - print('final',itr,target_comp,ele_counts, le_cond,ge_cond) + #print('final',itr,target_comp,ele_counts, le_cond,ge_cond) return new_atoms def minimize(atoms,config): diff --git a/GRSlib/motion/lossfunc/moments.py b/GRSlib/motion/lossfunc/moments.py index 1f56822..4dc46b0 100644 --- a/GRSlib/motion/lossfunc/moments.py +++ b/GRSlib/motion/lossfunc/moments.py @@ -12,7 +12,6 @@ def __init__(self, *args): #pt, config, target_desc, prior_desc): self.prior_desc = descriptors.get('prior',None).copy() self.current_desc = descriptors self.n_descriptors = np.shape(self.target_desc)[1] - print('moments num desc',self.n_descriptors) self.mask = list(range(self.n_descriptors)) self.n_params = 1 #Variables LAMMPS needs to know about self.n_elements = self.config.sections['BASIS'].numtypes #Variables LAMMPS needs to know about @@ -32,21 +31,20 @@ def __call__(self, *args): if self.mode=="score": elems, current_desc, beta, energy = args self.n_atoms = np.shape(current_desc)[0] - #TODO Explain score = self.construct_loss(current_desc, self.target_desc) energy[:] = 0 forces = self.grad_loss(current_desc, self.target_desc) #Forces between current and target beta[:,:]= 0 - if self.config.sections['SCORING'].norm_by_numdesc == True: + if self.config.sections['SCORING'].norm_by_numdesc: energy[0] = self.config.sections["SCORING"].strength_target*score/self.n_descriptors #Scaled score (energy) between current and target beta[:,:] = self.config.sections["SCORING"].strength_target*forces/self.n_descriptors #Scaled forces between current and target else: energy[0] = self.config.sections["SCORING"].strength_target*score beta[:,:] = self.config.sections["SCORING"].strength_target*forces - + """ #TODO Explain score = self.construct_loss(self.prior_desc, self.target_desc) # energy[0] += self.config.sections["SCORING"].strength_prior*score #Scaled score (energy) between current and prior @@ -56,12 +54,11 @@ def __call__(self, *args): beta[:,:] += self.config.sections["SCORING"].strength_prior*forces/self.n_descriptors #Scaled forces between current and prior else: beta[:,:] += self.config.sections["SCORING"].strength_prior*forces - + """ elif self.mode=="update": self.update(args) beta = self.grad_loss(self.target_desc, self.target_desc) - def set_mode_update(self): self.mode="update" @@ -138,14 +135,11 @@ def first_moment(self, current_desc, target_desc): current_avg = jnp.average(current_desc, axis=0)*self.mask target_avg = jnp.average(target_desc, axis=0)*self.mask tst_residual = jnp.sum(jnp.nan_to_num(jnp.abs(current_avg-target_avg))) -# tst_residual_av = jnp.average(jnp.nan_to_num(jnp.abs(current_avg-target_avg))) + tst_residual_av = jnp.average(jnp.nan_to_num(jnp.abs(current_avg-target_avg))) is_zero = jnp.array(jnp.isclose(tst_residual,jnp.zeros(tst_residual.shape)),dtype=int) bonus = -jnp.sum(is_zero*(float(self.config.sections['SCORING'].moments_bonus[0]))) - if self.divide_by_numdesc: - tst_residual_final = tst_residual_av*float(self.config.sections['SCORING'].moments_coeff[0]) + bonus #MAE + bonus - else: - tst_residual_final = tst_residual*float(self.config.sections['SCORING'].moments_coeff[0]) + bonus #MAE + bonus - jax.debug.print("first moment inside JIT: {}", tst_residual_final) + tst_residual_final = tst_residual*float(self.config.sections['SCORING'].moments_coeff[0]) + bonus #MAE + bonus + #jax.debug.print("first moment inside JIT: {}", tst_residual_final/38.0) #print('in first moment',float(tst_residual_final)) return tst_residual_final diff --git a/GRSlib/motion/motion.py b/GRSlib/motion/motion.py index c6bf073..368affc 100644 --- a/GRSlib/motion/motion.py +++ b/GRSlib/motion/motion.py @@ -139,20 +139,33 @@ def find_min_runner_up(self,selection,random_x_second=None): #print(selection[mp_inds[0]],selection[mp_inds[1]]) - def tournament_selection_N(self,population_in,k=3,seed=None): + def tournament_selection_N(self,population_in,k,seed): population = population_in.copy() scores = [p[3] for p in population] scores_a = np.array(scores) #lowest_k_minus_1_scores = scores.copy().sort(key=lambda x : x[3],reverse=False)[:k-1] if seed != None: np.random.seed(seed) - selection_ix = np.random.choice(range(0,len(population)),k-1,replace=False) + selection_ix = np.random.choice(range(0,len(population)),k,replace=False) not_ix = [j for j in range(0,len(population)) if j not in selection_ix] - selection_jx = np.random.choice(not_ix,k-1,replace=False) + selection_jx = np.random.choice(not_ix,k,replace=False) + tourny_wins = [] + for id in range(k): + if scores_a[selection_jx[id]] <= scores_a[selection_ix[id]]: + tourny_wins.append(selection_jx[id]) + else: + tourny_wins.append(selection_ix[id]) + updated = [population[ix] for ix in selection_ix] + + """ mask_ix = scores_a[selection_jx] <= scores_a[selection_ix] - #mask_ix = scores_a[selection_jx] < scores_a[selection_ix] + print(mask_ix) selection_ix = selection_jx[mask_ix] + print(selection_ix) updated = [population[ix] for ix in selection_ix] + print(updated) + """ + return updated def filter_unique(self,selection): @@ -167,35 +180,37 @@ def filter_unique(self,selection): def unique_selection(self, data): #More of a super function that will call a bunch of the ones below #This should be the default since we dont want to send duplicates the crossover/mutation - ki=3 #default - #ki =4 + ki = int(self.config.sections["GENETIC"].population_size/2) # 3 #Need to do better here + self.genetic = Genetic(self.pt, self.config,self.convert,self.scoring,self.gradmove) + starting_generation = Create.starting_generation(self,data) scores = [] gen_winners = [] - for candidate in range(len(starting_generation)): + for candidate in range(self.config.sections["GENETIC"].population_size): file_name = self.config.sections['TARGET'].job_prefix+"_Cand%sGen%s.lammps-data"%(candidate,'Init') lammps_data = self.convert.ase_to_lammps(starting_generation[candidate],file_name) #Honestly I would prefer scores as a dictonary of Key:Item pairs, TODO later. scores.append(['Init', candidate, file_name, self.scoring.get_score(lammps_data)]) + # We now have the scores from the starting generation which uses a from_(start_type) to populate + # shutil.move(lammps_data, self.config.sections['TARGET'].job_prefix + "_Cand%sGen%s.data"%(candidate,0)) for iteration in range(self.config.sections['GENETIC'].ngenerations): - population_in = scores.copy() - print('pop in',population_in) + #iterate selection method (good place to sub in different selection methods in the future) - selected_sets = [self.tournament_selection_N(population_in,k=ki,seed=None) for idx in range(len(starting_generation))] - print('raw sets',selected_sets) + selected_sets = [self.tournament_selection_N(scores,ki,None) for idx in range(self.config.sections["GENETIC"].population_size)] + #remove empty selections (TODO remove empty selection solution) selected_sets = [s for s in selected_sets if len(s) >=2] selected= [item for sublist in selected_sets for item in sublist] + #filter for uniqueness (no repeats of candidates from population in selection) - print('selected pre filter',selected) filtered = self.filter_unique(selected) if len(filtered) > ki: selected = filtered else: print('not enough unique candidates found. using repeats') - print('selected post filter',selected) +# print('selected post filter',selected) #selected = self.filter_unique(selected) """ selection = scores.copy() @@ -216,10 +231,10 @@ def unique_selection(self, data): # especially if having trouble finding the right solution. adding some randomness helps avoid # convergence to local minimum. # we could also try (winner + random) , (winner + random_top_10_percent) , (random + random) - print("Iteration:",iteration, "Winner:",winner, "Second:",runner_up) + print("Iteration:",iteration, "Winner:",winner[2],winner[3], "Second:",runner_up[2],runner_up[3]) with open("scoring_%s.txt"%self.config.sections['TARGET'].job_prefix, "a") as f: print(iteration, winner, runner_up, file=f) - print('winner',winner,winner[2]) +# print('winner',winner,winner[2]) atoms_winner = self.convert.lammps_to_ase(winner[2]) atoms_runner_up = self.convert.lammps_to_ase(runner_up[2]) @@ -262,16 +277,17 @@ def unique_selection(self, data): batch = self.genetic.crossover(atoms_winner, atoms_runner_up) #Should have two structures #batch = self.genetic.crossover_ASE(atoms_winner, atoms_runner_up) #crossover function from ASE #print('batch i',batch) - for candidate in range(len(batch)): + + for candidate in range(self.config.sections["GENETIC"].population_size): file_name = self.config.sections['TARGET'].job_prefix+"_Cand%sGen%s.lammps-data"%(candidate,iteration) lammps_data = self.convert.ase_to_lammps(batch[candidate],file_name) score_conv = self.scoring.get_score(lammps_data) scores.append([iteration, candidate, lammps_data, score_conv]) - print('gen %d cand %d score %f' % (iteration,candidate,score_conv)) - shutil.move(lammps_data, self.config.sections['TARGET'].job_prefix + "_Cand%sGen%s.lammps-data"%(candidate,iteration)) - current_generation = [] - for file in glob.glob(self.config.sections['TARGET'].job_prefix + "_Cand*Gen*"): - +# print('gen %d cand %d score %f' % (iteration,candidate,score_conv), file_name) +# shutil.move(lammps_data, self.config.sections['TARGET'].job_prefix + "_Cand%sGen%s.lammps-data"%(candidate,iteration)) + + + for file in glob.glob(self.config.sections['TARGET'].job_prefix + "_Cand*Gen*"): if file not in [row[2] for row in gen_winners]: os.remove(file) diff --git a/multi_element/gen_SQS_reproduce.py b/multi_element/gen_SQS_reproduce.py index 1f970aa..a6c9113 100644 --- a/multi_element/gen_SQS_reproduce.py +++ b/multi_element/gen_SQS_reproduce.py @@ -30,7 +30,7 @@ "score_type": "moments", "strength_target": 1.0, "strength_prior": 0.0, - "norm_by_numdesc":1, + "norm_by_numdesc": True, "moments": "mean" , "moments_coeff": "1.0", "moments_bonus": "0 " , @@ -55,7 +55,7 @@ "mutation_rate": 1.0, "mutation_types": {"perturb": 0.0, "change_ele": 1.0, "atom_count" : 0.0, "volume" : 0.0, "minimize" : 0.0, "ortho_cell" : 0.0}, "population_size": 10, - "ngenerations": 100, + "ngenerations": 10, #"max_atoms": 50, #"min_atoms": 10, "density_ratio": 1.0, @@ -65,17 +65,18 @@ grs = GRS(settings,comm=comm) -score = grs.get_score(settings["TARGET"]["start_fname"]) -print(" Starting Score:",score) +#score = grs.get_score(settings["TARGET"]["start_fname"]) +#print(" Starting Score:",score) updated_struct = settings["TARGET"]["start_fname"] + grs.set_prior([updated_struct]) scores, best_struct = grs.genetic_move(updated_struct) -updated_struct = grs.gradient_move(best_struct) -score = grs.get_score(updated_struct) -print(" Ending Score:",score) +#updated_struct = grs.gradient_move(best_struct) +#score = grs.get_score(updated_struct) +#print(" Ending Score:",score) #updated_struct = grs.update_start(updated_struct,"MinScore") #grs.set_prior(glob.glob(settings['TARGET']["job_prefix"]+"*.data")) From 2af87a8ef76ef3efbd6d4ab87fa84a4a35259241 Mon Sep 17 00:00:00 2001 From: Mitchell Anthony Wood Date: Fri, 12 Dec 2025 16:08:30 -0700 Subject: [PATCH 5/6] Reworked GA via tournament selection --- GRSlib/GRS.py | 2 +- GRSlib/io/sections/genetic.py | 8 +- GRSlib/motion/genetic.py | 79 ++++----- GRSlib/motion/lossfunc/moments_peratom.py | 193 ++++++++++++++++++++ GRSlib/motion/motion.py | 206 +++++++--------------- multi_element/gen_SQS_reproduce.py | 3 + 6 files changed, 308 insertions(+), 183 deletions(-) create mode 100644 GRSlib/motion/lossfunc/moments_peratom.py diff --git a/GRSlib/GRS.py b/GRSlib/GRS.py index 6a27bb6..907d620 100644 --- a/GRSlib/GRS.py +++ b/GRSlib/GRS.py @@ -241,7 +241,7 @@ def genetic_move(self,data): self.score = Scoring(self.pt, self.config, self.loss_func, self.descriptors) # Set scoring class to assign scores to moves self.genmove = Optimize(self.pt, self.config, self.score, self.convert) #Set desired motion class with scoring attached - gen_scores = self.genmove.unique_selection(data) + gen_scores = self.genmove.advance_generations(data) #self.write_output() best_candidate = sorted(gen_scores,key=lambda x: x[3])[0][2] return gen_scores, best_candidate diff --git a/GRSlib/io/sections/genetic.py b/GRSlib/io/sections/genetic.py index 01fa6d0..647d348 100644 --- a/GRSlib/io/sections/genetic.py +++ b/GRSlib/io/sections/genetic.py @@ -8,13 +8,19 @@ def __init__(self, name, config, pt,infile, args): super().__init__(name, config, pt, infile,args) self.allowedkeys = ['mutation_rate', 'mutation_types', 'population_size', 'ngenerations', 'start_type', 'max_atoms', 'min_atoms', 'max_length_aspect', 'max_angle_aspect', 'density_ratio', - 'composition_constraint', 'composition', 'start_type', 'lattice_type', 'structure_template', 'frac_pop_per_ref', 'dev_per_ref','reference_phases', 'change_ele_tol'] + 'composition_constraint', 'composition', 'start_type', 'lattice_type', 'structure_template', + 'frac_pop_per_ref', 'dev_per_ref','reference_phases', 'change_ele_tol', 'tournament_size', + 'replacements', 'multi_mutate','randomseed'] self._check_section() self.mutation_rate = self.get_value("GENETIC", "mutation_rate", 0.5, interpreter="float") #self.reference_phases = self.get_value("GENETIC", "reference_phases", "bcc").split() self.mutation_types = self.get_value("GENETIC", "mutation_types", {"perturb": 0.5, "change_ele": 0.0, "atom_count" : 0.1, "volume" : 0.2, "minimize" : 0.2}, interpreter="dict") self.population_size = self.get_value("GENETIC", "population_size", 20, interpreter="int") self.ngenerations = self.get_value("GENETIC", "ngenerations", 10, interpreter="int") + self.tournament_size = self.get_value("GENETIC", "tournament_size", 10, interpreter="int") + self.replacements = self.get_value("GENETIC", "replacements", False, interpreter="bool") + self.multi_mutate = self.get_value("GENETIC", "multi_mutate", True, interpreter="bool") + self.randomseed = self.get_value("GENETIC", "randomseed", 12345, interpreter="int") self.max_atoms = self.get_value("GENETIC", "max_atoms", 100, interpreter="int") self.change_ele_tol = self.get_value("GENETIC", "change_ele_tol", 0.1, interpreter="float") self.min_atoms = self.get_value("GENETIC", "min_atoms", 10, interpreter="int") diff --git a/GRSlib/motion/genetic.py b/GRSlib/motion/genetic.py index 1637083..8c237b8 100644 --- a/GRSlib/motion/genetic.py +++ b/GRSlib/motion/genetic.py @@ -71,16 +71,11 @@ def crossover(self, parent1, parent2): return crossover_population def crossover_ASE(self, parent1, parent2): - # 1) Takes in a pair of structures, tourny selection should give the most recent winner - # and a (random?) second structure from the winners circle or runner up. + # 1) Takes in a pair of structures, pairs of parents should be the preferred method. # 2) Crossover will be the 'merger' of these two structures, which for now is the cell from last winner and # a spliced together set of atoms based on some random dividing line in the atom ids. # 3) Check the spliced cell for accuracy in chemical composition, flip_atoms until close to desired - # 4) Now generate the remaining population_size - 2 structures as perturbations of the spliced cell. - # 5) Return the population - crossover_population = [] - crossover_population.append(parent1) #Make sure the two parents make it into the next generation for comparison - crossover_population.append(parent2) #Make sure the two parents make it into the next generation for comparison + # 4) Return the pair of children from repeating this process once more atomic_numbers = list(parent1.get_atomic_numbers()) + list(parent2.get_atomic_numbers()) blmin = closest_distances_generator(atomic_numbers, 0.5) @@ -106,45 +101,43 @@ def crossover_ASE(self, parent1, parent2): cellbounds=cellbounds, use_tags=False, ) - for ii in range(self.config.sections["GENETIC"].population_size-2): - child1 = csp.cross(parent1,parent2) - #child2 = csp.cross(parent1,parent2) + child1 = csp.cross(parent1,parent2) + pre_move_lammps = self.convert.ase_to_lammps(child1,'child1') + #Optional minimization after crossover/mutation here, set to none so we can just get the score out + event = getattr(self.gradmove, 'none_min') + before_score, child1_score, child1 = event(pre_move_lammps) - pre_move_lammps = self.convert.ase_to_lammps(child1,'tmp') - grad_type = self.config.sections['GRADIENT'].min_type + '_min' - event = getattr(self.gradmove, grad_type) - before_score, after_score, post_move_lammps = event(pre_move_lammps) - child1 = self.convert.lammps_to_ase(post_move_lammps) + child2 = csp.cross(parent1,parent2) + pre_move_lammps = self.convert.ase_to_lammps(child1,'child2') + #Optional minimization after crossover/mutation here, set to none so we can just get the score out + event = getattr(self.gradmove, 'none_min') + before_score, child2_score, child2 = event(pre_move_lammps) - crossover_population.append(child1) - #crossover_population.append(child2) - return crossover_population + return child1, child1_score, child2, child2_score - def mutation(self, parent, single = False): - # 1) Takes in a structures, tourny selection should give the most recent winner - # 2) Generate the remaining population_size - 1 structures as perturbations of the given cell. - # 3) Return the population - mutated_population = [] + def mutation(self, parent1, parent2, single): + # 1) Takes in a pair of parent structures + # 2) Generate a pair of child structures as perturbations of the given cell. + # 3) Return the children mutation_options = self.config.sections["GENETIC"].mutation_types if not single: - mutation_array = random.choices(list(mutation_options.keys()), weights=mutation_options.values(), k=self.config.sections["GENETIC"].population_size) + mutation_array = random.choices(list(mutation_options.keys()), weights=mutation_options.values(), k=2) #unique mutation per parent else: - mutation_array = random.choices(list(mutation_options.keys()), weights=mutation_options.values(), k=1) - for mutation in mutation_array: - event = getattr(GenMoves, mutation) - mutated = event(parent,self.config) - #print('in mutation: mutated before',mutated) - """ - #TODO update for other operations (other than change_ele) - if self.config.sections['GRADIENT'].min_type == 'none': - pre_move_lammps = self.convert.ase_to_lammps(mutated,'tmp') - grad_type = self.config.sections['GRADIENT'].min_type + '_min' - event = getattr(self.gradmove, grad_type) - print('grad_type',grad_type,event) - before_score, after_score, post_move_lammps = event(pre_move_lammps) - mutated = self.convert.lammps_to_ase(post_move_lammps) - print('in mutation: mutated after', mutated) - """ - mutated_population.append(mutated) - #print('mutated pop',mutated_population) - return mutated_population + mutation = random.choices(list(mutation_options.keys()), weights=mutation_options.values(), k=1) #same mutation per parent + mutation_array = [mutation, mutation] + event = getattr(GenMoves, mutation_array[0][0]) + child1 = event(parent1,self.config) + event = getattr(GenMoves, mutation_array[1][0]) + child2 = event(parent2,self.config) + + pre_move_lammps = self.convert.ase_to_lammps(child1,'child1') + #Optional minimization after crossover/mutation here, set to none so we can just get the score out + event = getattr(self.gradmove, 'none_min') + before_score, child1_score, child1 = event(pre_move_lammps) + + pre_move_lammps = self.convert.ase_to_lammps(child2,'child2') + #Optional minimization after crossover/mutation here, set to none so we can just get the score out + event = getattr(self.gradmove, 'none_min') + before_score, child2_score, child2 = event(pre_move_lammps) + + return child1, child1_score, child2, child2_score diff --git a/GRSlib/motion/lossfunc/moments_peratom.py b/GRSlib/motion/lossfunc/moments_peratom.py new file mode 100644 index 0000000..cb91c0a --- /dev/null +++ b/GRSlib/motion/lossfunc/moments_peratom.py @@ -0,0 +1,193 @@ +from GRSlib.motion.scoring import Scoring +import jax.numpy as jnp +import numpy as np +import jax +from jax import grad, jit +from functools import partial + +class MomentsAtomic(Scoring): + def __init__(self, *args): #pt, config, target_desc, prior_desc): + self.pt, self.config, descriptors = args + self.target_desc = descriptors.get('target',None).copy() + self.prior_desc = descriptors.get('prior',None).copy() + self.current_desc = descriptors + self.n_descriptors = np.shape(self.target_desc)[1] + self.mask = list(range(self.n_descriptors)) + self.n_params = 1 #Variables LAMMPS needs to know about + self.n_elements = self.config.sections['BASIS'].numtypes #Variables LAMMPS needs to know about + self.loss_ff_grad = grad(self.construct_loss) + self.grad_loss = grad(self.construct_loss) + self.mode = "update" + self.divide_by_numdesc = self.config.sections['SCORING'].norm_by_numdesc #Flag to normalize by # of descriptors + #TODO flag to normalize descriptors themselves + + def __call__(self, *args): + #The arguments that this function brings in are super improtant and are expected by LAMMPS MLIAP package. + #They are (elems, current_desc, beta, energy) + #Integer values of LAMMPS atom types are in elems + #Descriptors as a per-atom array into current_desc. + #Per-atom forces are expected for beta + #Per-atom energy is expected for energy, need to do some testing if per-atom values can be reported back. + if self.mode=="score": + elems, current_desc, beta, energy = args + self.n_atoms = np.shape(current_desc)[0] + #TODO Explain + score = self.construct_loss(current_desc, self.target_desc) + + energy[:] = 0 + forces = self.grad_loss(current_desc, self.target_desc) #Forces between current and target + beta[:,:]= 0 + if self.config.sections['SCORING'].norm_by_numdesc: + energy[0] = self.config.sections["SCORING"].strength_target*score/self.n_descriptors #Scaled score (energy) between current and target + beta[:,:] = self.config.sections["SCORING"].strength_target*forces/self.n_descriptors #Scaled forces between current and target + else: + energy[0] = self.config.sections["SCORING"].strength_target*score + beta[:,:] = self.config.sections["SCORING"].strength_target*forces + + """ + #TODO Explain + score = self.construct_loss(self.prior_desc, self.target_desc) +# energy[0] += self.config.sections["SCORING"].strength_prior*score #Scaled score (energy) between current and prior +# print(" Target, Prior Scores: ", energy[0], score) + forces = self.grad_loss(current_desc, self.prior_desc) #Forces between current and prior structures + if self.divide_by_numdesc: + beta[:,:] += self.config.sections["SCORING"].strength_prior*forces/self.n_descriptors #Scaled forces between current and prior + else: + beta[:,:] += self.config.sections["SCORING"].strength_prior*forces + """ + elif self.mode=="update": + self.update(args) + beta = self.grad_loss(self.target_desc, self.target_desc) + + def set_mode_update(self): + self.mode="update" + + def set_mode_score(self): + self.mode="score" + + def update(self,*args): + pt, config, descriptors = args[0] + self.target_desc = descriptors.get('target',None).copy() + self.prior_desc = descriptors.get('prior',None).copy() + self.n_descriptors = np.shape(self.target_desc)[1] + if self.config.sections["SCORING"].smartmask > 0: + indices = [] + target_std = np.std(self.target_desc, axis=0) + nmax_var = len(target_std) - int(np.ceil(self.config.sections["SCORING"].smartmask/2)) + nmin_var = int(np.floor(self.config.sections["SCORING"].smartmask/2)) + list_max_var = sorted(target_std,key=lambda x: x)[nmax_var:] + list_min_var = sorted(target_std,key=lambda x: x)[:nmin_var] + for val in list_max_var: + indices.append(np.where(target_std==val)[0][0]) + for val in list_min_var: + indices.append(np.where(target_std==val)[0][0]) + + self.mask = np.zeros(len(target_std), dtype=int) + self.mask[indices] = 1 + + else: + self.mask = np.zeros(self.n_descriptors, dtype=int) + 1 + + + #if self.n_elements > 1: + #if self.current_desc != None: + # self.current_desc = self.current_desc.flatten() + # self.target_desc = self.target_desc.flatten() + # self.prior_desc = self.prior_desc.flatten() + self.mode = "score" + + @partial(jit, static_argnums=(0,)) + def construct_loss(self, current_desc, target_desc): + #This needs to be a dynamic call like is done in descriptor calcs + loss_ff = 0 + set_of_moments = [] + + if (any(x == 'mean' for x in self.config.sections['SCORING'].moments)): + first_mom = self.first_moment(current_desc, target_desc) + else: + first_mom = None + + if (any(x == 'stdev' for x in self.config.sections['SCORING'].moments)): + second_mom = self.second_moment(current_desc, target_desc) + else: + second_mom = None + + if (any(x == 'skew' for x in self.config.sections['SCORING'].moments)): + third_mom = self.third_moment(current_desc, target_desc) + else: + third_mom = None + + if (any(x == 'kurt' for x in self.config.sections['SCORING'].moments)): + fourth_mom = self.fourth_moment(current_desc, target_desc) + else: + fourth_mom = None + + for item in [first_mom, second_mom, third_mom, fourth_mom]: + if item != None: + set_of_moments.append(item) + + for item in set_of_moments: + loss_ff += item + return loss_ff + + @partial(jit, static_argnums=(0,)) + def first_moment(self, current_desc, target_desc): + current_avg = jnp.average(current_desc, axis=0)*self.mask + current_masked = current_desc*self.mask + target_avg = jnp.average(target_desc, axis=0)*self.mask + tst_residual = jnp.sum(jnp.nan_to_num(jnp.abs(current_masked-target_avg)),axis=1) + + is_zero = jnp.array(jnp.isclose(tst_residual,jnp.zeros(tst_residual.shape)),dtype=int) + bonus = -jnp.sum(is_zero*(float(self.config.sections['SCORING'].moments_bonus[0]))) + tst_residual_final = tst_residual*float(self.config.sections['SCORING'].moments_coeff[0]) + bonus #MAE + bonus + jax.debug.print("first moment inside JIT: {}", tst_residual_final) + return tst_residual_final + + @partial(jit, static_argnums=(0,)) + def second_moment(self, current_desc, target_desc): + current_std = jnp.std(current_desc, axis=0)*self.mask + target_std = jnp.std(target_desc, axis=0)*self.mask + tst_residual = jnp.sum(jnp.nan_to_num(jnp.abs(current_std-target_std))) +# tst_residual_av = jnp.average(jnp.nan_to_num(jnp.abs(current_std-target_std))) + is_zero = jnp.array(jnp.isclose(tst_residual,jnp.zeros(tst_residual.shape)),dtype=int) + bonus = -jnp.sum(is_zero*float(self.config.sections['SCORING'].moments_bonus[1])) + tst_residual_final = tst_residual*float(self.config.sections['SCORING'].moments_coeff[1]) + bonus #MAE + bonus + return tst_residual_final + + @partial(jit, static_argnums=(0,)) + def third_moment(self, current_desc, target_desc): + #Showing my work for Pearsons skew = (3(mean-median)/stdev)) + current_avg = jnp.average(current_desc, axis=0)*self.mask + target_avg = jnp.average(target_desc, axis=0)*self.mask + current_std = jnp.std(current_desc, axis=0)*self.mask + target_std = jnp.std(target_desc, axis=0)*self.mask + current_med = jnp.median(current_desc, axis=0)*self.mask + target_med = jnp.median(target_desc, axis=0)*self.mask + + current_skew = 3.0*(current_avg-current_med)/current_std + target_skew = 3.0*(target_avg-target_med)/target_std + + tst_residual = jnp.sum(jnp.nan_to_num(jnp.abs(current_skew-target_skew))) +# tst_residual_av = jnp.average(jnp.nan_to_num(jnp.abs(current_skew-target_skew))) + is_zero = jnp.array(jnp.isclose(tst_residual,jnp.zeros(tst_residual.shape)),dtype=int) + bonus = -jnp.sum(is_zero*float(self.config.sections['SCORING'].moments_bonus[2])) + tst_residual_final = tst_residual*float(self.config.sections['SCORING'].moments_coeff[2]) + bonus #MAE + bonus + return tst_residual_final + + @partial(jit, static_argnums=(0,)) + def fourth_moment(self, current_desc, target_desc): + #Showing my work for Kurtosis = Avg(z^4.0)-3 where z=(x-avg(x))/stdev(x) + current_avg = jnp.average(current_desc, axis=0)*self.mask + target_avg = jnp.average(target_desc, axis=0)*self.mask + current_std = jnp.std(current_desc, axis=0)*self.mask + target_std = jnp.std(target_desc, axis=0)*self.mask + + current_kurt = jnp.average(((current_desc-current_avg)/current_std)**4.0)-3.0 + target_kurt = jnp.average(((target_desc-target_avg)/target_std)**4.0)-3.0 + + tst_residual = jnp.sum(jnp.nan_to_num(jnp.abs(current_kurt-target_kurt))) +# tst_residual_av = jnp.average(jnp.nan_to_num(jnp.abs(current_kurt-target_kurt))) + is_zero = jnp.array(jnp.isclose(tst_residual,jnp.zeros(tst_residual.shape)),dtype=int) + bonus = -jnp.sum(is_zero*float(self.config.sections['SCORING'].moments_bonus[3])) + tst_residual_final = tst_residual*float(self.config.sections['SCORING'].moments_coeff[3]) + bonus #MAE + bonus + return tst_residual_final diff --git a/GRSlib/motion/motion.py b/GRSlib/motion/motion.py index 368affc..ad9a39b 100644 --- a/GRSlib/motion/motion.py +++ b/GRSlib/motion/motion.py @@ -121,52 +121,62 @@ def __init__(self, pt, config, scoring, convert): self.create = Create(self.pt, self.config, self.convert) self.gradmove = Gradient(pt, config, scoring) - def find_min_runner_up(self,selection,random_x_second=None): - population = selection.copy() - population.sort(key = lambda x:x[3],reverse=False) - mp_inds = {ii:population[ii][1] for ii in range(len(population))} - #print(mp_inds) - #print('sort_pop',population) - #print('win,runner_up',population[0],population[1]) - if random_x_second == None: - return population[0],population[1] + def find_min_max(self,population): + population.sort(key = lambda x:x[3],reverse=True) +# mp_inds = {ii:population[ii][1] for ii in range(len(population))} + return population[0],population[int(len(population))-1] + + def tournament_selection(self,population,iteration,k,replacements,multi_mutate): + #This version of the tournament will have preset values of k and mutation/crossover decisions + #which result in more generic pressure to find higher fitness (lower score). + count_pop = int(len(population)) + scores = np.array([p[3] for p in population]) + tourny_winners = [] + for rounds in range(self.config.sections["GENETIC"].population_size): + tourny_trials = np.random.choice(range(0,len(population)),k,replace=replacements) + indicies = list(tourny_trials) + pool_scores = list(scores[indicies]) + pool_winner = int(np.sort(np.c_[indicies, pool_scores], axis=0)[0][0]) #index of lowest score in this batch + tourny_winners.append(pool_winner) + + #TODO Need to decide what to do with the tourny winners + + parents = [] + for id in range(int(count_pop/2)): + parents.append([population[tourny_winners[id]],population[tourny_winners[int(count_pop-id-1)]]]) + + #Now setup for the next iteration of the tournament + population = [] + if np.random.rand() < float(self.config.sections['GENETIC'].mutation_rate): + for parent_pair in range(len(parents)): + ase_parent1 = self.convert.lammps_to_ase(parents[parent_pair][0][2]) # index of parent pair, index of parent, name of data-file + ase_parent2 = self.convert.lammps_to_ase(parents[parent_pair][1][2]) # index of parent pair, index of parent, name of data-file + child1, score1, child2, score2 = self.genetic.mutation(ase_parent1,ase_parent2,multi_mutate) + #Could add logic here to compare child scores to parents + file_name_child1 = self.config.sections['TARGET'].job_prefix + "_Cand%sGen%s.lammps-data"%(parent_pair*2+0,iteration) + file_name_child2 = self.config.sections['TARGET'].job_prefix + "_Cand%sGen%s.lammps-data"%(parent_pair*2+1,iteration) + + shutil.move('child1', file_name_child1) + shutil.move('child2', file_name_child2) + population.append([iteration, parent_pair*2+0, file_name_child1, score1]) + population.append([iteration, parent_pair*2+1, file_name_child2, score2]) +# print("Parents:",parents[parent_pair][0][3],parents[parent_pair][1][3], "Children:",score1,score2) else: - second = np.random.randint(1,random_x_second-1) - try: - return population[0],population[second] - except IndexError: - return population[0],population[0] - - #print(selection[mp_inds[0]],selection[mp_inds[1]]) - - def tournament_selection_N(self,population_in,k,seed): - population = population_in.copy() - scores = [p[3] for p in population] - scores_a = np.array(scores) - #lowest_k_minus_1_scores = scores.copy().sort(key=lambda x : x[3],reverse=False)[:k-1] - if seed != None: - np.random.seed(seed) - selection_ix = np.random.choice(range(0,len(population)),k,replace=False) - not_ix = [j for j in range(0,len(population)) if j not in selection_ix] - selection_jx = np.random.choice(not_ix,k,replace=False) - tourny_wins = [] - for id in range(k): - if scores_a[selection_jx[id]] <= scores_a[selection_ix[id]]: - tourny_wins.append(selection_jx[id]) - else: - tourny_wins.append(selection_ix[id]) - updated = [population[ix] for ix in selection_ix] - - """ - mask_ix = scores_a[selection_jx] <= scores_a[selection_ix] - print(mask_ix) - selection_ix = selection_jx[mask_ix] - print(selection_ix) - updated = [population[ix] for ix in selection_ix] - print(updated) - """ - - return updated + for parent_pair in range(len(parents)): + ase_parent1 = self.convert.lammps_to_ase(parents[parent_pair][0][2]) # index of parent pair, index of parent, name of data-file + ase_parent2 = self.convert.lammps_to_ase(parents[parent_pair][1][2]) # index of parent pair, index of parent, name of data-file + child1, score1, child2, score2 = self.genetic.crossover_ASE(ase_parent1,ase_parent2) + #Could add logic here to compare child scores to parents + file_name_child1 = self.config.sections['TARGET'].job_prefix + "_Cand%sGen%s.lammps-data"%(parent_pair*2+0,iteration) + file_name_child2 = self.config.sections['TARGET'].job_prefix + "_Cand%sGen%s.lammps-data"%(parent_pair*2+1,iteration) + + shutil.move('child1', file_name_child1) + shutil.move('child2', file_name_child2) + population.append([iteration, parent_pair*2+0, file_name_child1, score1]) + population.append([iteration, parent_pair*2+1, file_name_child2, score2]) +# print("Parents:",parents[parent_pair][0][3],parents[parent_pair][1][3], "Children:",score1,score2) + + return population def filter_unique(self,selection): used = [] @@ -177,13 +187,14 @@ def filter_unique(self,selection): used.append(s[1]) return updated - def unique_selection(self, data): + def advance_generations(self, data): #More of a super function that will call a bunch of the ones below #This should be the default since we dont want to send duplicates the crossover/mutation - ki = int(self.config.sections["GENETIC"].population_size/2) # 3 #Need to do better here - self.genetic = Genetic(self.pt, self.config,self.convert,self.scoring,self.gradmove) - + k = self.config.sections["GENETIC"].tournament_size + replacements = self.config.sections["GENETIC"].replacements + multi_mutate =self.config.sections["GENETIC"].multi_mutate + starting_generation = Create.starting_generation(self,data) scores = [] gen_winners = [] @@ -193,99 +204,18 @@ def unique_selection(self, data): #Honestly I would prefer scores as a dictonary of Key:Item pairs, TODO later. scores.append(['Init', candidate, file_name, self.scoring.get_score(lammps_data)]) # We now have the scores from the starting generation which uses a from_(start_type) to populate + if self.config.sections["GENETIC"].randomseed != None: + np.random.seed(self.config.sections["GENETIC"].randomseed) -# shutil.move(lammps_data, self.config.sections['TARGET'].job_prefix + "_Cand%sGen%s.data"%(candidate,0)) for iteration in range(self.config.sections['GENETIC'].ngenerations): + #Since we have the starting generation, we will move directly to the tourny selection method - #iterate selection method (good place to sub in different selection methods in the future) - selected_sets = [self.tournament_selection_N(scores,ki,None) for idx in range(self.config.sections["GENETIC"].population_size)] - - #remove empty selections (TODO remove empty selection solution) - selected_sets = [s for s in selected_sets if len(s) >=2] - selected= [item for sublist in selected_sets for item in sublist] - - #filter for uniqueness (no repeats of candidates from population in selection) - filtered = self.filter_unique(selected) - if len(filtered) > ki: - selected = filtered - else: - print('not enough unique candidates found. using repeats') -# print('selected post filter',selected) - #selected = self.filter_unique(selected) - """ - selection = scores.copy() - for round in range(len(selection)-2): #-2 because we want to keep the best and second-best for crossover - compare_pair = np.random.randint(0, len(selection), 2) - #This is where a dictionary of score/selection would be nice and clean instead of fixed index references. - if scores[compare_pair[0]][3] <= scores[compare_pair[1]][3]: - loser = compare_pair[1] - selection.pop(loser) - else: - loser = compare_pair[0] - selection.pop(loser) - """ - #NOTE set random_x_second to None to do (winner + runner_up) - winner, runner_up = self.find_min_runner_up(selected,random_x_second=ki) - #At the last round, hold onto the runner up for a possible crossover - #TODO We will probably need to implement different pairing methods rather than just (winner + runner_up) - # especially if having trouble finding the right solution. adding some randomness helps avoid - # convergence to local minimum. - # we could also try (winner + random) , (winner + random_top_10_percent) , (random + random) - print("Iteration:",iteration, "Winner:",winner[2],winner[3], "Second:",runner_up[2],runner_up[3]) + scores = self.tournament_selection(scores,iteration,k,replacements,multi_mutate) + highest, lowest = self.find_min_max(scores) + gen_winners.append(scores[lowest[1]]) + print("Iteration:",iteration, "Lowest:",lowest[2],lowest[3], "Highest:",highest[2],highest[3]) with open("scoring_%s.txt"%self.config.sections['TARGET'].job_prefix, "a") as f: - print(iteration, winner, runner_up, file=f) -# print('winner',winner,winner[2]) - atoms_winner = self.convert.lammps_to_ase(winner[2]) - atoms_runner_up = self.convert.lammps_to_ase(runner_up[2]) - - #Winning candidate is then appended to winners circle list - gen_winners.append(winner) #appends arrays along the first axis (row-wise) - #Now setup for the next iteration of the tournament - """ - #NOTE this is one possible way we can do a loop over structures to get mutations from different candidates & not always the lowest loss candidate - scores = [] - for iset,set_pair in enumerate(selected_sets): - winner, runner_up = self.find_min_runner_up(selected,random_x_second=ki) - atoms_winner = self.convert.lammps_to_ase(set_pair[0][2]) - atoms_runner_up = self.convert.lammps_to_ase(set_pair[1][2]) #TODO update for multi element list - if np.random.rand() < float(self.config.sections['GENETIC'].mutation_rate): - batch = self.genetic.mutation(atoms_winner,single=True) #Will mutation only take in one structure? - else: - batch = self.genetic.crossover(atoms_winner, atoms_runner_up) #Should have two structures - - for candidate in range(len(batch)): - #file_name = self.config.sections['TARGET'].job_prefix+"_Cand%sGen%s.lammps-data"%(candidate,iteration) - file_name = self.config.sections['TARGET'].job_prefix+"_Cand%dGen%s.lammps-data"%(iset,iteration) - #NOTE: is lammps_data here always pulled from starting_generation? if so - # it needs to be updated so that the candidate is pulled from the 'current_generation' - # so far, it is unclear to me if the starting generation is just repeatedly operated on or - # if the generation is being updated and operated on. - lammps_data = self.convert.ase_to_lammps(starting_generation[candidate],file_name) #NOTE question in comment above here - #scores.append([iteration, candidate, file_name, self.scoring.get_score(lammps_data)]) - score_win = self.scoring.get_score(self.convert.ase_to_lammps(atoms_winner,file_name +'-win')) - score_conv = self.scoring.get_score(lammps_data) - #TODO help james understand why the 'winner' score not the lowest candidate score in line below - print('score comp', candidate, len(batch), score_conv, score_win) - scores.append([iteration, candidate, file_name, score_conv]) - #shutil.move(lammps_data, self.config.sections['TARGET'].job_prefix + "_Cand%sGen%s.data"%(candidate,iteration)) - """ - #Now setup for the next iteration of the tournament - scores = [] - if np.random.rand() < float(self.config.sections['GENETIC'].mutation_rate): - batch = self.genetic.mutation(atoms_winner) #Will mutation only take in one structure?- TODO enable mutate >1 structures - else: - batch = self.genetic.crossover(atoms_winner, atoms_runner_up) #Should have two structures - #batch = self.genetic.crossover_ASE(atoms_winner, atoms_runner_up) #crossover function from ASE - #print('batch i',batch) - - for candidate in range(self.config.sections["GENETIC"].population_size): - file_name = self.config.sections['TARGET'].job_prefix+"_Cand%sGen%s.lammps-data"%(candidate,iteration) - lammps_data = self.convert.ase_to_lammps(batch[candidate],file_name) - score_conv = self.scoring.get_score(lammps_data) - scores.append([iteration, candidate, lammps_data, score_conv]) -# print('gen %d cand %d score %f' % (iteration,candidate,score_conv), file_name) -# shutil.move(lammps_data, self.config.sections['TARGET'].job_prefix + "_Cand%sGen%s.lammps-data"%(candidate,iteration)) - + print( lowest,highest, file=f) for file in glob.glob(self.config.sections['TARGET'].job_prefix + "_Cand*Gen*"): if file not in [row[2] for row in gen_winners]: diff --git a/multi_element/gen_SQS_reproduce.py b/multi_element/gen_SQS_reproduce.py index a6c9113..22cc61b 100644 --- a/multi_element/gen_SQS_reproduce.py +++ b/multi_element/gen_SQS_reproduce.py @@ -56,6 +56,9 @@ "mutation_types": {"perturb": 0.0, "change_ele": 1.0, "atom_count" : 0.0, "volume" : 0.0, "minimize" : 0.0, "ortho_cell" : 0.0}, "population_size": 10, "ngenerations": 10, + "tournament_size": 2, + "replacements": True, + "multi_mutate": True, #"max_atoms": 50, #"min_atoms": 10, "density_ratio": 1.0, From 2878b1cc25b3cf2a383ba0ab99d701a70254634b Mon Sep 17 00:00:00 2001 From: Mitchell Anthony Wood Date: Fri, 12 Dec 2025 16:18:29 -0700 Subject: [PATCH 6/6] Fixing bug in io interpreter --- GRSlib/io/sections/genetic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/GRSlib/io/sections/genetic.py b/GRSlib/io/sections/genetic.py index 647d348..1f7b065 100644 --- a/GRSlib/io/sections/genetic.py +++ b/GRSlib/io/sections/genetic.py @@ -18,8 +18,8 @@ def __init__(self, name, config, pt,infile, args): self.population_size = self.get_value("GENETIC", "population_size", 20, interpreter="int") self.ngenerations = self.get_value("GENETIC", "ngenerations", 10, interpreter="int") self.tournament_size = self.get_value("GENETIC", "tournament_size", 10, interpreter="int") - self.replacements = self.get_value("GENETIC", "replacements", False, interpreter="bool") - self.multi_mutate = self.get_value("GENETIC", "multi_mutate", True, interpreter="bool") + self.replacements = self.get_value("GENETIC", "replacements", False) + self.multi_mutate = self.get_value("GENETIC", "multi_mutate", True) self.randomseed = self.get_value("GENETIC", "randomseed", 12345, interpreter="int") self.max_atoms = self.get_value("GENETIC", "max_atoms", 100, interpreter="int") self.change_ele_tol = self.get_value("GENETIC", "change_ele_tol", 0.1, interpreter="float")