diff --git a/CHANGELOG.md b/CHANGELOG.md index d9931d0d0e..f28aec2d16 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,12 +13,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `identify_non_dominated_configurations` method to `Campaign` and `Objective` for determining the Pareto front - Interpoint constraints for continuous search spaces +- `DiscreteBatchConstraint` for ensuring all recommendations in a batch share + the same value for a specified discrete parameter ### Breaking Changes - `ContinuousLinearConstraint.to_botorch` now returns a collection of constraint tuples instead of a single tuple (needed for interpoint constraints) ### Fixed +- `ContinuousCardinalityConstraint` now works in hybrid search spaces +- Typo in `_FixedNumericalContinuousParameter` where `is_numeric` was used + instead of `is_numerical` - `SHAPInsight` breaking with `numpy>=2.4` due to no longer accepted implicit array to scalar conversion @@ -27,6 +32,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 can now be conveniently controlled via the new `Settings` mechanism ### Deprecations +- `BotorchRecommender.max_n_subspaces` has been renamed to `max_n_partitions` - `set_random_seed` and `temporary_seed` utility functions - The environment variables `BAYBE_NUMPY_USE_SINGLE_PRECISION`/`BAYBE_TORCH_USE_SINGLE_PRECISION` have been diff --git a/baybe/constraints/__init__.py b/baybe/constraints/__init__.py index 8b92ecd6fe..40f4b33b0d 100644 --- a/baybe/constraints/__init__.py +++ b/baybe/constraints/__init__.py @@ -11,6 +11,7 @@ ) from baybe.constraints.discrete import ( DISCRETE_CONSTRAINTS_FILTERING_ORDER, + DiscreteBatchConstraint, DiscreteCardinalityConstraint, DiscreteCustomConstraint, DiscreteDependenciesConstraint, @@ -33,6 +34,7 @@ "ContinuousLinearEqualityConstraint", "ContinuousLinearInequalityConstraint", # --- Discrete constraints ---# + "DiscreteBatchConstraint", "DiscreteCardinalityConstraint", "DiscreteCustomConstraint", "DiscreteDependenciesConstraint", diff --git a/baybe/constraints/discrete.py b/baybe/constraints/discrete.py index 740e603f89..a0dcd1c27b 100644 --- a/baybe/constraints/discrete.py +++ b/baybe/constraints/discrete.py @@ -7,6 +7,8 @@ from functools import reduce from typing import TYPE_CHECKING, Any, ClassVar, cast +import numpy as np +import numpy.typing as npt import pandas as pd from attrs import define, field from attrs.validators import in_, min_len @@ -355,6 +357,77 @@ def get_invalid(self, data: pd.DataFrame) -> pd.Index: return data.index[mask_bad] +@define +class DiscreteBatchConstraint(DiscreteConstraint): + """Constraint ensuring all batch recommendations share the same parameter value. + + When this constraint is active, the recommender internally partitions the + candidate set into partitions — one for each unique value of the constrained + parameter — obtains a full batch recommendation from each partition, and + returns the batch with the highest joint acquisition value. + + This constraint is not supported by all recommenders. It is not applied during + search space creation (all parameter values remain in the search space). + + Example: + If parameter ``Temperature`` has values ``[50, 100, 150]`` and a batch of + 10 is requested, the recommender will generate three candidate batches + (one all-50, one all-100, one all-150) and return the best one. + """ + + # Class variables + eval_during_creation: ClassVar[bool] = False + eval_during_modeling: ClassVar[bool] = True + + numerical_only: ClassVar[bool] = False + # See base class. + + def __attrs_post_init__(self): + """Validate that exactly one parameter is specified.""" + if len(self.parameters) != 1: + raise ValueError( + f"'{self.__class__.__name__}' requires exactly one parameter, " + f"but {len(self.parameters)} were provided: {self.parameters}." + ) + + @override + def get_invalid(self, data: pd.DataFrame) -> pd.Index: + """Get the indices of invalid rows. + + Always returns an empty index because this constraint operates at the + batch level, not the row level. Individual rows are never invalid; the + constraint is enforced at recommendation time by partitioning candidates + into partitions. + + Args: + data: A dataframe where each row represents a parameter configuration. + + Returns: + An empty index. + """ + return pd.Index([]) + + def partition_masks( + self, candidates_exp: pd.DataFrame + ) -> list[npt.NDArray[np.bool_]]: + """Return boolean masks defining the partitions for this constraint. + + Each mask selects the rows in ``candidates_exp`` that belong to one + partition, i.e. share the same value for the constrained parameter. + + Args: + candidates_exp: The experimental representation of candidate points. + + Returns: + A list of boolean masks, one per unique value of the constrained + parameter. + """ + param = self.parameters[0] + return [ + (candidates_exp[param] == v).values for v in candidates_exp[param].unique() + ] + + @define class DiscreteCardinalityConstraint(CardinalityConstraint, DiscreteConstraint): """Class for discrete cardinality constraints.""" diff --git a/baybe/constraints/validation.py b/baybe/constraints/validation.py index 51a1a7a918..d1f75ef441 100644 --- a/baybe/constraints/validation.py +++ b/baybe/constraints/validation.py @@ -6,6 +6,7 @@ from baybe.constraints.base import Constraint from baybe.constraints.continuous import ContinuousCardinalityConstraint from baybe.constraints.discrete import ( + DiscreteBatchConstraint, DiscreteDependenciesConstraint, ) from baybe.parameters import NumericalContinuousParameter @@ -27,6 +28,7 @@ def validate_constraints( # noqa: DOC101, DOC103 :class:`baybe.constraints.discrete.DiscreteDependenciesConstraint` declared. ValueError: If any two continuous cardinality constraints have an overlapping parameter set. + ValueError: If multiple batch constraints reference the same parameter. ValueError: If any constraint contains an invalid parameter name. ValueError: If any continuous constraint includes a discrete parameter. ValueError: If any discrete constraint includes a continuous parameter. @@ -45,6 +47,16 @@ def validate_constraints( # noqa: DOC101, DOC103 [con for con in constraints if isinstance(con, ContinuousCardinalityConstraint)] ) + batch_param_names = [ + c.parameters[0] for c in constraints if isinstance(c, DiscreteBatchConstraint) + ] + if duplicates := {n for n in batch_param_names if batch_param_names.count(n) > 1}: + raise ValueError( + f"Multiple '{DiscreteBatchConstraint.__name__}' instances reference " + f"the same parameter(s): {duplicates}. Each parameter can have at " + f"most one batch constraint." + ) + param_names_all = [p.name for p in parameters] param_names_discrete = [p.name for p in parameters if p.is_discrete] param_names_continuous = [p.name for p in parameters if p.is_continuous] diff --git a/baybe/parameters/numerical.py b/baybe/parameters/numerical.py index ba210de244..418d7b2598 100644 --- a/baybe/parameters/numerical.py +++ b/baybe/parameters/numerical.py @@ -155,7 +155,7 @@ def summary(self) -> dict: class _FixedNumericalContinuousParameter(ContinuousParameter): """Parameter class for fixed numerical parameters.""" - is_numeric: ClassVar[bool] = True + is_numerical: ClassVar[bool] = True # See base class. value: float = field(converter=float) diff --git a/baybe/recommenders/naive.py b/baybe/recommenders/naive.py index 8039755443..5b602d881b 100644 --- a/baybe/recommenders/naive.py +++ b/baybe/recommenders/naive.py @@ -41,11 +41,11 @@ class NaiveHybridSpaceRecommender(PureRecommender): # problem that might come up when implementing new subclasses of PureRecommender disc_recommender: PureRecommender = field(factory=BotorchRecommender) """The recommender used for the discrete subspace. Default: - :class:`baybe.recommenders.pure.bayesian.botorch.BotorchRecommender`""" + :class:`baybe.recommenders.pure.bayesian.botorch.core.BotorchRecommender`""" cont_recommender: BayesianRecommender = field(factory=BotorchRecommender) """The recommender used for the continuous subspace. Default: - :class:`baybe.recommenders.pure.bayesian.botorch.BotorchRecommender`""" + :class:`baybe.recommenders.pure.bayesian.botorch.core.BotorchRecommender`""" @override def recommend( diff --git a/baybe/recommenders/pure/base.py b/baybe/recommenders/pure/base.py index 16eefe1016..813fc35d37 100644 --- a/baybe/recommenders/pure/base.py +++ b/baybe/recommenders/pure/base.py @@ -11,7 +11,11 @@ from cattrs.gen import make_dict_unstructure_fn from typing_extensions import override -from baybe.exceptions import DeprecationError, NotEnoughPointsLeftError +from baybe.exceptions import ( + DeprecationError, + IncompatibilityError, + NotEnoughPointsLeftError, +) from baybe.objectives.base import Objective from baybe.recommenders.base import RecommenderProtocol from baybe.searchspace import SearchSpace @@ -38,6 +42,10 @@ class PureRecommender(ABC, RecommenderProtocol): compatibility: ClassVar[SearchSpaceType] """Class variable reflecting the search space compatibility.""" + supports_discrete_batch_constraints: ClassVar[bool] = False + """Class variable indicating whether the recommender supports discrete + batch constraints.""" + _deprecated_allow_repeated_recommendations: bool = field( alias="allow_repeated_recommendations", default=None, @@ -259,6 +267,20 @@ def _recommend_with_discrete_parts( """ is_hybrid_space = searchspace.type is SearchSpaceType.HYBRID + # Check batch constraint support + if ( + searchspace.discrete.constraints_batch + and not self.supports_discrete_batch_constraints + ): + constraint_types = { + type(c).__name__ for c in searchspace.discrete.constraints_batch + } + raise IncompatibilityError( + f"'{self.__class__.__name__}' does not support discrete " + f"batch constraints. The search space contains: " + f"{constraint_types}." + ) + # Get discrete candidates candidates_exp, _ = searchspace.discrete.get_candidates() diff --git a/baybe/recommenders/pure/bayesian/botorch.py b/baybe/recommenders/pure/bayesian/botorch.py deleted file mode 100644 index 0f89b1f80f..0000000000 --- a/baybe/recommenders/pure/bayesian/botorch.py +++ /dev/null @@ -1,587 +0,0 @@ -"""Botorch recommender.""" - -from __future__ import annotations - -import gc -import math -import warnings -from collections.abc import Collection, Iterable -from typing import TYPE_CHECKING, Any, ClassVar - -import numpy as np -import pandas as pd -from attrs import define, field, fields -from attrs.converters import optional as optional_c -from attrs.validators import ge, gt, instance_of -from typing_extensions import override - -from baybe.acquisition.acqfs import qThompsonSampling -from baybe.constraints import ContinuousCardinalityConstraint -from baybe.constraints.utils import is_cardinality_fulfilled -from baybe.exceptions import ( - IncompatibilityError, - IncompatibleAcquisitionFunctionError, - InfeasibilityError, - MinimumCardinalityViolatedWarning, -) -from baybe.parameters.numerical import _FixedNumericalContinuousParameter -from baybe.recommenders.pure.bayesian.base import BayesianRecommender -from baybe.searchspace import ( - SearchSpace, - SearchSpaceType, - SubspaceContinuous, - SubspaceDiscrete, -) -from baybe.utils.basic import flatten -from baybe.utils.conversion import to_string -from baybe.utils.dataframe import to_tensor -from baybe.utils.sampling_algorithms import ( - DiscreteSamplingMethod, - sample_numerical_df, -) - -if TYPE_CHECKING: - from torch import Tensor - - -@define(kw_only=True) -class BotorchRecommender(BayesianRecommender): - """A pure recommender utilizing Botorch's optimization machinery. - - This recommender makes use of Botorch's ``optimize_acqf_discrete``, - ``optimize_acqf`` and ``optimize_acqf_mixed`` functions to optimize discrete, - continuous and hybrid search spaces, respectively. Accordingly, it can be applied to - all kinds of search spaces. - - Note: - In hybrid search spaces, the used algorithm performs a brute-force optimization - that can be computationally expensive. Thus, the behavior of the algorithm in - hybrid search spaces can be controlled via two additional parameters. - """ - - # Class variables - compatibility: ClassVar[SearchSpaceType] = SearchSpaceType.HYBRID - # See base class. - - # Object variables - sequential_continuous: bool = field(default=True) - """Flag defining whether to apply sequential greedy or batch optimization in - **continuous** search spaces. In discrete/hybrid spaces, sequential greedy - optimization is applied automatically. - """ - - hybrid_sampler: DiscreteSamplingMethod | None = field( - converter=optional_c(DiscreteSamplingMethod), default=None - ) - """Strategy used for sampling the discrete subspace when performing hybrid search - space optimization.""" - - sampling_percentage: float = field(default=1.0) - """Percentage of discrete search space that is sampled when performing hybrid search - space optimization. Ignored when ``hybrid_sampler="None"``.""" - - n_restarts: int = field(validator=[instance_of(int), gt(0)], default=10) - """Number of times gradient-based optimization is restarted from different initial - points. **Does not affect purely discrete optimization**. - """ - - n_raw_samples: int = field(validator=[instance_of(int), gt(0)], default=64) - """Number of raw samples drawn for the initialization heuristic in gradient-based - optimization. **Does not affect purely discrete optimization**. - """ - - max_n_subspaces: int = field(default=10, validator=[instance_of(int), ge(1)]) - """Threshold defining the maximum number of subspaces to consider for exhaustive - search in the presence of cardinality constraints. If the combinatorial number of - groupings into active and inactive parameters dictated by the constraints is greater - than this number, that many randomly selected combinations are selected for - optimization.""" - - @sampling_percentage.validator - def _validate_percentage( # noqa: DOC101, DOC103 - self, _: Any, value: float - ) -> None: - """Validate that the given value is in fact a percentage. - - Raises: - ValueError: If ``value`` is not between 0 and 1. - """ - if not 0 <= value <= 1: - raise ValueError( - f"Hybrid sampling percentage needs to be between 0 and 1 but is {value}" - ) - - @override - def __str__(self) -> str: - fields = [ - to_string("Surrogate", self._surrogate_model), - to_string( - "Acquisition function", self.acquisition_function, single_line=True - ), - to_string("Compatibility", self.compatibility, single_line=True), - to_string( - "Sequential continuous", self.sequential_continuous, single_line=True - ), - to_string("Hybrid sampler", self.hybrid_sampler, single_line=True), - to_string( - "Sampling percentage", self.sampling_percentage, single_line=True - ), - ] - return to_string(self.__class__.__name__, *fields) - - @override - def _recommend_discrete( - self, - subspace_discrete: SubspaceDiscrete, - candidates_exp: pd.DataFrame, - batch_size: int, - ) -> pd.Index: - """Generate recommendations from a discrete search space. - - Args: - subspace_discrete: The discrete subspace from which to generate - recommendations. - candidates_exp: The experimental representation of all discrete candidate - points to be considered. - batch_size: The size of the recommendation batch. - - Raises: - IncompatibleAcquisitionFunctionError: If a non-Monte Carlo acquisition - function is used with a batch size > 1. - - Returns: - The dataframe indices of the recommended points in the provided - experimental representation. - """ - assert self._objective is not None - acqf = self._get_acquisition_function(self._objective) - if batch_size > 1 and not acqf.supports_batching: - raise IncompatibleAcquisitionFunctionError( - f"The '{self.__class__.__name__}' only works with Monte Carlo " - f"acquisition functions for batch sizes > 1." - ) - if batch_size > 1 and isinstance(acqf, qThompsonSampling): - raise IncompatibilityError( - "Thompson sampling currently only supports a batch size of 1." - ) - - from botorch.optim import optimize_acqf_discrete - - # determine the next set of points to be tested - candidates_comp = subspace_discrete.transform(candidates_exp) - points, _ = optimize_acqf_discrete( - self._botorch_acqf, batch_size, to_tensor(candidates_comp) - ) - - # retrieve the index of the points from the input dataframe - # IMPROVE: The merging procedure is conceptually similar to what - # `SearchSpace._match_measurement_with_searchspace_indices` does, though using - # a simpler matching logic. When refactoring the SearchSpace class to - # handle continuous parameters, a corresponding utility could be extracted. - idxs = pd.Index( - pd.merge( - pd.DataFrame(points, columns=candidates_comp.columns), - candidates_comp.reset_index(), - on=list(candidates_comp), - how="left", - )["index"] - ) - - return idxs - - @override - def _recommend_continuous( - self, - subspace_continuous: SubspaceContinuous, - batch_size: int, - ) -> pd.DataFrame: - """Generate recommendations from a continuous search space. - - Args: - subspace_continuous: The continuous subspace from which to generate - recommendations. - batch_size: The size of the recommendation batch. - - Raises: - IncompatibleAcquisitionFunctionError: If a non-Monte Carlo acquisition - function is used with a batch size > 1. - - Returns: - A dataframe containing the recommendations as individual rows. - """ - assert self._objective is not None - if ( - batch_size > 1 - and not self._get_acquisition_function(self._objective).supports_batching - ): - raise IncompatibleAcquisitionFunctionError( - f"The '{self.__class__.__name__}' only works with Monte Carlo " - f"acquisition functions for batch sizes > 1." - ) - - points, _ = self._recommend_continuous_torch(subspace_continuous, batch_size) - - return pd.DataFrame(points, columns=subspace_continuous.parameter_names) - - def _recommend_continuous_torch( - self, subspace_continuous: SubspaceContinuous, batch_size: int - ) -> tuple[Tensor, Tensor]: - """Dispatcher selecting the continuous optimization routine.""" - if subspace_continuous.constraints_cardinality: - return self._recommend_continuous_with_cardinality_constraints( - subspace_continuous, batch_size - ) - else: - return self._recommend_continuous_without_cardinality_constraints( - subspace_continuous, batch_size - ) - - def _recommend_continuous_with_cardinality_constraints( - self, - subspace_continuous: SubspaceContinuous, - batch_size: int, - ) -> tuple[Tensor, Tensor]: - """Recommend from a continuous search space with cardinality constraints. - - This is achieved by considering the individual restricted subspaces that can be - obtained by splitting the parameters into sets of active and inactive - parameters, according to what is allowed by the cardinality constraints. - - The specific collection of subspaces considered by the recommender is obtained - as either the full combinatorial set of possible parameter splits or a random - selection thereof, depending on the upper bound specified by the corresponding - recommender attribute. - - In each of these spaces, the (in)activity assignment is fixed, so that the - cardinality constraints can be removed and a regular optimization can be - performed. The recommendation is then constructed from the combined optimization - results of the unconstrained spaces. - - Args: - subspace_continuous: The continuous subspace from which to generate - recommendations. - batch_size: The size of the recommendation batch. - - Returns: - The recommendations and corresponding acquisition values. - - Raises: - ValueError: If the continuous search space has no cardinality constraints. - """ - if not subspace_continuous.constraints_cardinality: - raise ValueError( - f"'{self._recommend_continuous_with_cardinality_constraints.__name__}' " - f"expects a subspace with constraints of type " - f"'{ContinuousCardinalityConstraint.__name__}'. " - ) - - # Determine search scope based on number of inactive parameter combinations - exhaustive_search = ( - subspace_continuous.n_inactive_parameter_combinations - <= self.max_n_subspaces - ) - iterator: Iterable[Collection[str]] - if exhaustive_search: - # If manageable, evaluate all combinations of inactive parameters - iterator = subspace_continuous.inactive_parameter_combinations() - else: - # Otherwise, draw a random subset of inactive parameter combinations - iterator = subspace_continuous._sample_inactive_parameters( - self.max_n_subspaces - ) - - # Create iterable of subspaces to be optimized - subspaces = ( - (subspace_continuous._enforce_cardinality_constraints(inactive_parameters)) - for inactive_parameters in iterator - ) - - points, acqf_value = self._optimize_continuous_subspaces(subspaces, batch_size) - - # Check if any minimum cardinality constraints are violated - if not is_cardinality_fulfilled( - pd.DataFrame(points, columns=subspace_continuous.parameter_names), - subspace_continuous, - check_maximum=False, - ): - warnings.warn( - "At least one minimum cardinality constraint has been violated. " - "This may occur when parameter ranges extend beyond zero in both " - "directions, making the feasible region non-convex. For such " - "parameters, minimum cardinality constraints are currently not " - "enforced due to the complexity of the resulting optimization problem.", - MinimumCardinalityViolatedWarning, - ) - - return points, acqf_value - - def _recommend_continuous_without_cardinality_constraints( - self, - subspace_continuous: SubspaceContinuous, - batch_size: int, - ) -> tuple[Tensor, Tensor]: - """Recommend from a continuous search space without cardinality constraints. - - Args: - subspace_continuous: The continuous subspace from which to generate - recommendations. - batch_size: The size of the recommendation batch. - - Returns: - The recommendations and corresponding acquisition values. - - Raises: - ValueError: If the continuous search space has cardinality constraints. - """ - import torch - from botorch.optim import optimize_acqf - - if subspace_continuous.constraints_cardinality: - raise ValueError( - f"'{self._recommend_continuous_without_cardinality_constraints.__name__}' " # noqa: E501 - f"expects a subspace without constraints of type " - f"'{ContinuousCardinalityConstraint.__name__}'. " - ) - - fixed_parameters = { - idx: p.value - for (idx, p) in enumerate(subspace_continuous.parameters) - if isinstance(p, _FixedNumericalContinuousParameter) - } - - # TODO: Add option for automatic choice once the "settings" PR is merged, - # which ships the necessary machinery - if ( - self.sequential_continuous - and subspace_continuous.has_interpoint_constraints - ): - raise IncompatibilityError( - f"Setting the " - f"'{fields(BotorchRecommender).sequential_continuous.name}' " - f"flag to ``True`` while interpoint constraints are present in the " - f"continuous subspace is not supported. " - ) - - # NOTE: The explicit `or None` conversion is added as an additional safety net - # because it is unclear if the corresponding presence checks for these - # arguments is correctly implemented in all invoked BoTorch subroutines. - # For details: https://github.com/pytorch/botorch/issues/2042 - points, acqf_values = optimize_acqf( - acq_function=self._botorch_acqf, - bounds=torch.from_numpy(subspace_continuous.comp_rep_bounds.values), - q=batch_size, - num_restarts=self.n_restarts, - raw_samples=self.n_raw_samples, - fixed_features=fixed_parameters or None, - equality_constraints=flatten( - c.to_botorch( - subspace_continuous.parameters, - batch_size=batch_size if c.is_interpoint else None, - ) - for c in subspace_continuous.constraints_lin_eq - ) - or None, - inequality_constraints=flatten( - c.to_botorch( - subspace_continuous.parameters, - batch_size=batch_size if c.is_interpoint else None, - ) - for c in subspace_continuous.constraints_lin_ineq - ) - or None, - sequential=self.sequential_continuous, - ) - return points, acqf_values - - @override - def _recommend_hybrid( - self, - searchspace: SearchSpace, - candidates_exp: pd.DataFrame, - batch_size: int, - ) -> pd.DataFrame: - """Recommend points using the ``optimize_acqf_mixed`` function of BoTorch. - - This functions samples points from the discrete subspace, performs optimization - in the continuous subspace with these points being fixed and returns the best - found solution. - - **Important**: This performs a brute-force calculation by fixing every possible - assignment of discrete variables and optimizing the continuous subspace for - each of them. It is thus computationally expensive. - - **Note**: This function implicitly assumes that discrete search space parts in - the respective data frame come first and continuous parts come second. - - Args: - searchspace: The search space in which the recommendations should be made. - candidates_exp: The experimental representation of the candidates - of the discrete subspace. - batch_size: The size of the calculated batch. - - Raises: - IncompatibleAcquisitionFunctionError: If a non-Monte Carlo acquisition - function is used with a batch size > 1. - - Returns: - The recommended points. - """ - assert self._objective is not None - - # Interpoint constraints cannot be used with optimize_acqf_mixed, see - # https://github.com/meta-pytorch/botorch/issues/2996 - if searchspace.continuous.has_interpoint_constraints: - raise IncompatibilityError( - "Interpoint constraints are not available in hybrid spaces." - ) - if ( - batch_size > 1 - and not self._get_acquisition_function(self._objective).supports_batching - ): - raise IncompatibleAcquisitionFunctionError( - f"The '{self.__class__.__name__}' only works with Monte Carlo " - f"acquisition functions for batch sizes > 1." - ) - - import torch - from botorch.optim import optimize_acqf_mixed - - # Transform discrete candidates - candidates_comp = searchspace.discrete.transform(candidates_exp) - - # Calculate the number of samples from the given percentage - n_candidates = math.ceil(self.sampling_percentage * len(candidates_comp.index)) - - # Potential sampling of discrete candidates - if self.hybrid_sampler is not None: - candidates_comp = sample_numerical_df( - candidates_comp, n_candidates, method=self.hybrid_sampler - ) - - # Prepare all considered discrete configurations in the - # List[Dict[int, float]] format expected by BoTorch. - num_comp_columns = len(candidates_comp.columns) - candidates_comp.columns = list(range(num_comp_columns)) - fixed_features_list = candidates_comp.to_dict("records") - - # Actual call of the BoTorch optimization routine - # NOTE: The explicit `or None` conversion is added as an additional safety net - # because it is unclear if the corresponding presence checks for these - # arguments is correctly implemented in all invoked BoTorch subroutines. - # For details: https://github.com/pytorch/botorch/issues/2042 - points, _ = optimize_acqf_mixed( - acq_function=self._botorch_acqf, - bounds=torch.from_numpy(searchspace.comp_rep_bounds.values), - q=batch_size, - num_restarts=self.n_restarts, - raw_samples=self.n_raw_samples, - fixed_features_list=fixed_features_list, # type: ignore[arg-type] - equality_constraints=flatten( - c.to_botorch( - searchspace.continuous.parameters, - idx_offset=len(candidates_comp.columns), - batch_size=batch_size if c.is_interpoint else None, - ) - for c in searchspace.continuous.constraints_lin_eq - ) - or None, - inequality_constraints=flatten( - c.to_botorch( - searchspace.continuous.parameters, - idx_offset=num_comp_columns, - batch_size=batch_size if c.is_interpoint else None, - ) - for c in searchspace.continuous.constraints_lin_ineq - ) - or None, - ) - - # Align candidates with search space index. Done via including the search space - # index during the merge, which is used later for back-translation into the - # experimental representation - merged = pd.merge( - pd.DataFrame(points), - candidates_comp.reset_index(), - on=list(candidates_comp.columns), - how="left", - ).set_index("index") - - # Get experimental representation of discrete part - rec_disc_exp = searchspace.discrete.exp_rep.loc[merged.index] - - # Combine discrete and continuous parts - rec_exp = pd.concat( - [ - rec_disc_exp, - merged.iloc[:, num_comp_columns:].set_axis( - searchspace.continuous.parameter_names, axis=1 - ), - ], - axis=1, - ) - - return rec_exp - - def _optimize_continuous_subspaces( - self, subspaces: Iterable[SubspaceContinuous], batch_size: int - ) -> tuple[Tensor, Tensor]: - """Find the optimum candidates from multiple continuous subspaces. - - Important: - Subspaces without feasible solutions will be silently ignored. If none of - the subspaces has a feasible solution, an exception will be raised. - - Args: - subspaces: The subspaces to consider for the optimization. - batch_size: The number of points to be recommended. - - Raises: - InfeasibilityError: If none of the subspaces has a feasible solution. - - Returns: - The batch of candidates and the corresponding acquisition value. - """ - import torch - from botorch.exceptions.errors import InfeasibilityError as BoInfeasibilityError - - acqf_values_all: list[Tensor] = [] - points_all: list[Tensor] = [] - - for subspace in subspaces: - try: - # Optimize the acquisition function - # Note: We explicitly evaluate the acqf function for the batch because - # the object returned by the optimization routine may contain joint or - # individual acquisition values, depending on the whether sequential - # or joint optimization is applied - p, _ = self._recommend_continuous_torch(subspace, batch_size) - with torch.no_grad(): - acqf = self._botorch_acqf(p) - - # Append optimization results - points_all.append(p) - acqf_values_all.append(acqf) - - # The optimization problem may be infeasible in certain subspaces - except BoInfeasibilityError: - pass - - if not points_all: - raise InfeasibilityError( - "No feasible solution could be found. Potentially the specified " - "constraints are too restrictive, i.e. there may be too many " - "constraints or thresholds may have been set too tightly. " - "Considered relaxing the constraints to improve the chances " - "of finding a feasible solution." - ) - - # Find the best option f - best_idx = np.argmax(acqf_values_all) - points = points_all[best_idx] - acqf_value = acqf_values_all[best_idx] - - return points, acqf_value - - -# Collect leftover original slotted classes processed by `attrs.define` -gc.collect() diff --git a/baybe/recommenders/pure/bayesian/botorch/__init__.py b/baybe/recommenders/pure/bayesian/botorch/__init__.py new file mode 100644 index 0000000000..899b2c9a70 --- /dev/null +++ b/baybe/recommenders/pure/bayesian/botorch/__init__.py @@ -0,0 +1,7 @@ +"""Botorch recommender.""" + +from baybe.recommenders.pure.bayesian.botorch.core import BotorchRecommender + +__all__ = [ + "BotorchRecommender", +] diff --git a/baybe/recommenders/pure/bayesian/botorch/continuous.py b/baybe/recommenders/pure/bayesian/botorch/continuous.py new file mode 100644 index 0000000000..252bdfde6f --- /dev/null +++ b/baybe/recommenders/pure/bayesian/botorch/continuous.py @@ -0,0 +1,209 @@ +"""Continuous recommendation routines for BotorchRecommender.""" + +from __future__ import annotations + +import warnings +from collections.abc import Callable, Collection, Iterable +from typing import TYPE_CHECKING + +import pandas as pd +from attrs import fields + +from baybe.constraints.utils import is_cardinality_fulfilled +from baybe.exceptions import ( + IncompatibilityError, + MinimumCardinalityViolatedWarning, +) +from baybe.parameters.numerical import _FixedNumericalContinuousParameter +from baybe.searchspace import SubspaceContinuous +from baybe.utils.basic import flatten + +if TYPE_CHECKING: + from torch import Tensor + + from baybe.recommenders.pure.bayesian.botorch.core import BotorchRecommender + + +def recommend_continuous_torch( + recommender: BotorchRecommender, + subspace_continuous: SubspaceContinuous, + batch_size: int, +) -> tuple[Tensor, Tensor]: + """Dispatcher selecting the continuous optimization routine.""" + if subspace_continuous.constraints_cardinality: + return recommend_continuous_with_cardinality_constraints( + recommender, subspace_continuous, batch_size + ) + else: + return recommend_continuous_without_cardinality_constraints( + recommender, subspace_continuous, batch_size + ) + + +def recommend_continuous_with_cardinality_constraints( + recommender: BotorchRecommender, + subspace_continuous: SubspaceContinuous, + batch_size: int, +) -> tuple[Tensor, Tensor]: + """Recommend from a continuous space with cardinality constraints. + + Optimizes the acquisition function across partitions defined by cardinality + constraints and returns the best result. + + The specific collection of partitions considered by the recommender is obtained + as either the full combinatorial set of possible parameter splits or a random + selection thereof, depending on the upper bound specified by the corresponding + recommender attribute. + + In each partition, the constraint-imposed configuration is fixed, so that the + constraints can be removed and a regular optimization can be performed. The + recommendation is then constructed from the combined optimization results of the + unconstrained spaces. + + Args: + recommender: The recommender instance. + subspace_continuous: The continuous subspace from which to generate + recommendations. + batch_size: The size of the recommendation batch. + + Returns: + The recommendations and corresponding acquisition values. + + Raises: + ValueError: If the continuous search space has no cardinality + constraints. + """ + if not subspace_continuous.constraints_cardinality: + raise ValueError( + f"'{recommend_continuous_with_cardinality_constraints.__name__}' " + f"expects a subspace with cardinality constraints." + ) + + # Determine search scope based on number of partition configurations + configs: Iterable[frozenset[str]] + if subspace_continuous.n_theoretical_partitions <= recommender.max_n_partitions: + configs = subspace_continuous.inactive_parameter_combinations() + else: + configs = subspace_continuous._sample_inactive_parameters( + recommender.max_n_partitions + ) + + # Create closures for each partition configuration + def make_callable( + inactive_params: Collection[str], + ) -> Callable[[], tuple[Tensor, Tensor]]: + def optimize() -> tuple[Tensor, Tensor]: + import torch + + sub = subspace_continuous._enforce_cardinality_constraints(inactive_params) + # Note: We explicitly evaluate the acqf function for the batch + # because the object returned by the optimization routine may + # contain joint or individual acquisition values, depending on + # whether sequential or joint optimization is applied + p, _ = recommend_continuous_torch(recommender, sub, batch_size) + with torch.no_grad(): + acqf_value = recommender._botorch_acqf(p) + return p, acqf_value + + return optimize + + callables = (make_callable(ip) for ip in configs) + points, acqf_value = recommender._optimize_over_partitions(callables) + + # Check if any minimum cardinality constraints are violated + if not is_cardinality_fulfilled( + pd.DataFrame(points, columns=subspace_continuous.parameter_names), + subspace_continuous, + check_maximum=False, + ): + warnings.warn( + "At least one minimum cardinality constraint has been violated. " + "This may occur when parameter ranges extend beyond zero in both " + "directions, making the feasible region non-convex. For such " + "parameters, minimum cardinality constraints are currently not " + "enforced due to the complexity of the resulting optimization problem.", + MinimumCardinalityViolatedWarning, + ) + + return points, acqf_value + + +def recommend_continuous_without_cardinality_constraints( + recommender: BotorchRecommender, + subspace_continuous: SubspaceContinuous, + batch_size: int, +) -> tuple[Tensor, Tensor]: + """Recommend from a continuous search space without cardinality constraints. + + Args: + recommender: The recommender instance. + subspace_continuous: The continuous subspace from which to generate + recommendations. + batch_size: The size of the recommendation batch. + + Returns: + The recommendations and corresponding acquisition values. + + Raises: + ValueError: If the continuous search space has cardinality constraints. + """ + import torch + from botorch.optim import optimize_acqf + + if subspace_continuous.constraints_cardinality: + raise ValueError( + f"'{recommend_continuous_without_cardinality_constraints.__name__}' " + f"expects a subspace without cardinality constraints." + ) + + fixed_parameters = { + idx: p.value + for (idx, p) in enumerate(subspace_continuous.parameters) + if isinstance(p, _FixedNumericalContinuousParameter) + } + + # TODO: Add option for automatic choice once the "settings" PR is merged, + # which ships the necessary machinery + if ( + recommender.sequential_continuous + and subspace_continuous.has_interpoint_constraints + ): + from baybe.recommenders.pure.bayesian.botorch.core import BotorchRecommender + + raise IncompatibilityError( + f"Setting the " + f"'{fields(BotorchRecommender).sequential_continuous.name}' " + f"flag to ``True`` while interpoint constraints are present in the " + f"continuous subspace is not supported. " + ) + + # NOTE: The explicit `or None` conversion is added as an additional safety net + # because it is unclear if the corresponding presence checks for these + # arguments is correctly implemented in all invoked BoTorch subroutines. + # For details: https://github.com/pytorch/botorch/issues/2042 + points, acqf_values = optimize_acqf( + acq_function=recommender._botorch_acqf, + bounds=torch.from_numpy(subspace_continuous.comp_rep_bounds.values), + q=batch_size, + num_restarts=recommender.n_restarts, + raw_samples=recommender.n_raw_samples, + fixed_features=fixed_parameters or None, + equality_constraints=flatten( + c.to_botorch( + subspace_continuous.parameters, + batch_size=batch_size if c.is_interpoint else None, + ) + for c in subspace_continuous.constraints_lin_eq + ) + or None, + inequality_constraints=flatten( + c.to_botorch( + subspace_continuous.parameters, + batch_size=batch_size if c.is_interpoint else None, + ) + for c in subspace_continuous.constraints_lin_ineq + ) + or None, + sequential=recommender.sequential_continuous, + ) + return points, acqf_values diff --git a/baybe/recommenders/pure/bayesian/botorch/core.py b/baybe/recommenders/pure/bayesian/botorch/core.py new file mode 100644 index 0000000000..2fcc5e759d --- /dev/null +++ b/baybe/recommenders/pure/bayesian/botorch/core.py @@ -0,0 +1,302 @@ +"""Botorch recommender core.""" + +from __future__ import annotations + +import gc +import warnings +from collections.abc import Callable, Iterable +from typing import TYPE_CHECKING, Any, ClassVar + +import numpy as np +import pandas as pd +from attrs import define, field +from attrs.converters import optional as optional_c +from attrs.validators import ge, gt, instance_of +from typing_extensions import override + +from baybe.exceptions import ( + IncompatibleAcquisitionFunctionError, + InfeasibilityError, +) +from baybe.recommenders.pure.bayesian.base import BayesianRecommender +from baybe.recommenders.pure.bayesian.botorch.continuous import ( + recommend_continuous_torch, +) +from baybe.recommenders.pure.bayesian.botorch.discrete import ( + recommend_discrete_with_partitions, + recommend_discrete_without_partitions, +) +from baybe.recommenders.pure.bayesian.botorch.hybrid import ( + recommend_hybrid_with_partitions, + recommend_hybrid_without_partitions, +) +from baybe.searchspace import ( + SearchSpace, + SearchSpaceType, + SubspaceContinuous, + SubspaceDiscrete, +) +from baybe.utils.conversion import to_string +from baybe.utils.sampling_algorithms import DiscreteSamplingMethod + +if TYPE_CHECKING: + from torch import Tensor + + +@define(kw_only=True) +class BotorchRecommender(BayesianRecommender): + """A pure recommender utilizing Botorch's optimization machinery. + + This recommender makes use of Botorch's ``optimize_acqf_discrete``, + ``optimize_acqf`` and ``optimize_acqf_mixed`` functions to optimize discrete, + continuous and hybrid search spaces, respectively. Accordingly, it can be applied to + all kinds of search spaces. + + Note: + In hybrid search spaces, the used algorithm performs a brute-force optimization + that can be computationally expensive. Thus, the behavior of the algorithm in + hybrid search spaces can be controlled via two additional parameters. + """ + + # Class variables + compatibility: ClassVar[SearchSpaceType] = SearchSpaceType.HYBRID + # See base class. + + supports_discrete_batch_constraints: ClassVar[bool] = True + # See base class. + + # Object variables + sequential_continuous: bool = field(default=True) + """Flag defining whether to apply sequential greedy or batch optimization in + **continuous** search spaces. In discrete/hybrid spaces, sequential greedy + optimization is applied automatically. + """ + + hybrid_sampler: DiscreteSamplingMethod | None = field( + converter=optional_c(DiscreteSamplingMethod), default=None + ) + """Strategy used for sampling the discrete subspace when performing hybrid search + space optimization.""" + + sampling_percentage: float = field(default=1.0) + """Percentage of discrete search space that is sampled when performing hybrid search + space optimization. Ignored when ``hybrid_sampler="None"``.""" + + n_restarts: int = field(validator=[instance_of(int), gt(0)], default=10) + """Number of times gradient-based optimization is restarted from different initial + points. **Does not affect purely discrete optimization**. + """ + + n_raw_samples: int = field(validator=[instance_of(int), gt(0)], default=64) + """Number of raw samples drawn for the initialization heuristic in gradient-based + optimization. **Does not affect purely discrete optimization**. + """ + + max_n_partitions: int = field(default=10, validator=[instance_of(int), ge(1)]) + """Maximum number of partitions to evaluate when partitioning constraints are + present (e.g., continuous cardinality constraints). If the total number of + partitions + exceeds this limit, a random subset of that size is sampled for optimization instead + of performing an exhaustive search.""" + + @property + def max_n_subspaces(self) -> int: + """Deprecated! Use ``max_n_partitions`` instead.""" + warnings.warn( + "'max_n_subspaces' has been renamed to 'max_n_partitions' and will " + "be removed in a future version.", + DeprecationWarning, + stacklevel=2, + ) + return self.max_n_partitions + + @max_n_subspaces.setter + def max_n_subspaces(self, value: int) -> None: + """Deprecated! Use ``max_n_partitions`` instead.""" # noqa: D401 + warnings.warn( + "'max_n_subspaces' has been renamed to 'max_n_partitions' and will " + "be removed in a future version.", + DeprecationWarning, + stacklevel=2, + ) + self.max_n_partitions = value + + @sampling_percentage.validator + def _validate_percentage( # noqa: DOC101, DOC103 + self, _: Any, value: float + ) -> None: + """Validate that the given value is in fact a percentage. + + Raises: + ValueError: If ``value`` is not between 0 and 1. + """ + if not 0 <= value <= 1: + raise ValueError( + f"Hybrid sampling percentage needs to be between 0 and 1 but is {value}" + ) + + @override + def __str__(self) -> str: + fields = [ + to_string("Surrogate", self._surrogate_model), + to_string( + "Acquisition function", self.acquisition_function, single_line=True + ), + to_string("Compatibility", self.compatibility, single_line=True), + to_string( + "Sequential continuous", self.sequential_continuous, single_line=True + ), + to_string("Hybrid sampler", self.hybrid_sampler, single_line=True), + to_string( + "Sampling percentage", self.sampling_percentage, single_line=True + ), + ] + return to_string(self.__class__.__name__, *fields) + + @override + def _recommend_discrete( + self, + subspace_discrete: SubspaceDiscrete, + candidates_exp: pd.DataFrame, + batch_size: int, + ) -> pd.Index: + """Generate recommendations from a discrete search space. + + Dispatches to the appropriate optimization routine depending on whether + batch constraints are present. + + Args: + subspace_discrete: The discrete subspace from which to generate + recommendations. + candidates_exp: The experimental representation of all discrete candidate + points to be considered. + batch_size: The size of the recommendation batch. + + Returns: + The dataframe indices of the recommended points in the provided + experimental representation. + """ + if subspace_discrete.constraints_batch: + return recommend_discrete_with_partitions( + self, subspace_discrete, candidates_exp, batch_size + ) + return recommend_discrete_without_partitions( + self, subspace_discrete, candidates_exp, batch_size + ) + + @override + def _recommend_continuous( + self, + subspace_continuous: SubspaceContinuous, + batch_size: int, + ) -> pd.DataFrame: + """Generate recommendations from a continuous search space. + + Args: + subspace_continuous: The continuous subspace from which to generate + recommendations. + batch_size: The size of the recommendation batch. + + Raises: + IncompatibleAcquisitionFunctionError: If a non-Monte Carlo acquisition + function is used with a batch size > 1. + + Returns: + A dataframe containing the recommendations as individual rows. + """ + assert self._objective is not None + if ( + batch_size > 1 + and not self._get_acquisition_function(self._objective).supports_batching + ): + raise IncompatibleAcquisitionFunctionError( + f"The '{self.__class__.__name__}' only works with Monte Carlo " + f"acquisition functions for batch sizes > 1." + ) + + points, _ = recommend_continuous_torch(self, subspace_continuous, batch_size) + + return pd.DataFrame(points, columns=subspace_continuous.parameter_names) + + @override + def _recommend_hybrid( + self, + searchspace: SearchSpace, + candidates_exp: pd.DataFrame, + batch_size: int, + ) -> pd.DataFrame: + """Generate recommendations from a hybrid search space. + + Dispatches to the appropriate optimization routine depending on whether + partitioning constraints are present. + + Args: + searchspace: The search space in which the recommendations should be made. + candidates_exp: The experimental representation of the candidates + of the discrete subspace. + batch_size: The size of the calculated batch. + + Returns: + The recommended points. + """ + if ( + searchspace.discrete.constraints_batch + or searchspace.continuous.constraints_cardinality + ): + return recommend_hybrid_with_partitions( + self, searchspace, candidates_exp, batch_size + ) + return recommend_hybrid_without_partitions( + self, searchspace, candidates_exp, batch_size + ) + + def _optimize_over_partitions( + self, + partition_callables: Iterable[Callable[[], tuple[Any, Tensor]]], + ) -> tuple[Any, Tensor]: + """Optimize across partitions and return the result with the best acqf value. + + Each callable performs optimization for one partition configuration and returns + a ``(result, acquisition_value)`` tuple. Partitions that raise + ``InfeasibilityError`` are silently skipped. + + Args: + partition_callables: An iterable of zero-argument callables. Each callable + runs the optimization for one partition and returns + ``(result, acqf_value)``. It may raise ``InfeasibilityError`` if the + partition is infeasible. + + Raises: + InfeasibilityError: If none of the partitions has a feasible solution. + + Returns: + The result and acquisition value of the best partition. + """ + from botorch.exceptions.errors import InfeasibilityError as BoInfeasibilityError + + results_all: list = [] + acqf_values_all: list[Tensor] = [] + + for optimize_fn in partition_callables: + try: + result, acqf_value = optimize_fn() + results_all.append(result) + acqf_values_all.append(acqf_value) + except (BoInfeasibilityError, InfeasibilityError): + pass + + if not results_all: + raise InfeasibilityError( + "No feasible solution could be found. Potentially the specified " + "constraints are too restrictive, i.e. there may be too many " + "constraints or thresholds may have been set too tightly. " + "Consider relaxing the constraints to improve the chances " + "of finding a feasible solution." + ) + + best_idx = np.argmax(acqf_values_all) + return results_all[best_idx], acqf_values_all[best_idx] + + +# Collect leftover original slotted classes processed by `attrs.define` +gc.collect() diff --git a/baybe/recommenders/pure/bayesian/botorch/discrete.py b/baybe/recommenders/pure/bayesian/botorch/discrete.py new file mode 100644 index 0000000000..21085195b0 --- /dev/null +++ b/baybe/recommenders/pure/bayesian/botorch/discrete.py @@ -0,0 +1,141 @@ +"""Discrete recommendation routines for BotorchRecommender.""" + +from __future__ import annotations + +from collections.abc import Callable, Iterable +from typing import TYPE_CHECKING + +import numpy as np +import pandas as pd + +from baybe.searchspace import SubspaceDiscrete +from baybe.utils.dataframe import to_tensor + +if TYPE_CHECKING: + from torch import Tensor + + from baybe.recommenders.pure.bayesian.botorch.core import BotorchRecommender + + +def recommend_discrete_with_partitions( + recommender: BotorchRecommender, + subspace_discrete: SubspaceDiscrete, + candidates_exp: pd.DataFrame, + batch_size: int, +) -> pd.Index: + """Recommend from a discrete space with batch constraints. + + Partitions the candidate set according to batch constraints, + runs optimization on each feasible partition, and returns the batch with + the highest joint acquisition value. Partitions with fewer candidates + than ``batch_size`` are skipped. + + Args: + recommender: The recommender instance. + subspace_discrete: The discrete subspace from which to generate + recommendations. + candidates_exp: The experimental representation of candidates. + batch_size: The size of the recommendation batch. + + Returns: + The dataframe indices of the recommended points. + """ + import torch + + masks: Iterable[np.ndarray] + if subspace_discrete.n_theoretical_partitions <= recommender.max_n_partitions: + masks = subspace_discrete.partition_masks( + candidates_exp, min_candidates=batch_size + ) + else: + masks = subspace_discrete.sample_partition_masks( + candidates_exp, recommender.max_n_partitions, min_candidates=batch_size + ) + + def make_callable( + mask: np.ndarray, + ) -> Callable[[], tuple[pd.Index, Tensor]]: + def optimize() -> tuple[pd.Index, Tensor]: + subset = candidates_exp.loc[mask] + + idxs = recommend_discrete_without_partitions( + recommender, subspace_discrete, subset, batch_size + ) + + comp = subspace_discrete.transform(candidates_exp.loc[idxs]) + with torch.no_grad(): + acqf_value = recommender._botorch_acqf(to_tensor(comp).unsqueeze(0)) + return idxs, acqf_value + + return optimize + + callables = (make_callable(m) for m in masks) + best_idxs, _ = recommender._optimize_over_partitions(callables) + return best_idxs + + +def recommend_discrete_without_partitions( + recommender: BotorchRecommender, + subspace_discrete: SubspaceDiscrete, + candidates_exp: pd.DataFrame, + batch_size: int, +) -> pd.Index: + """Generate recommendations from a discrete search space. + + Args: + recommender: The recommender instance. + subspace_discrete: The discrete subspace from which to generate + recommendations. + candidates_exp: The experimental representation of all discrete candidate + points to be considered. + batch_size: The size of the recommendation batch. + + Raises: + IncompatibleAcquisitionFunctionError: If a non-Monte Carlo acquisition + function is used with a batch size > 1. + + Returns: + The dataframe indices of the recommended points in the provided + experimental representation. + """ + from baybe.acquisition.acqfs import qThompsonSampling + from baybe.exceptions import ( + IncompatibilityError, + IncompatibleAcquisitionFunctionError, + ) + + assert recommender._objective is not None + acqf = recommender._get_acquisition_function(recommender._objective) + if batch_size > 1 and not acqf.supports_batching: + raise IncompatibleAcquisitionFunctionError( + f"The '{recommender.__class__.__name__}' only works with Monte Carlo " + f"acquisition functions for batch sizes > 1." + ) + if batch_size > 1 and isinstance(acqf, qThompsonSampling): + raise IncompatibilityError( + "Thompson sampling currently only supports a batch size of 1." + ) + + from botorch.optim import optimize_acqf_discrete + + # determine the next set of points to be tested + candidates_comp = subspace_discrete.transform(candidates_exp) + points, _ = optimize_acqf_discrete( + recommender._botorch_acqf, batch_size, to_tensor(candidates_comp) + ) + + # retrieve the index of the points from the input dataframe + # IMPROVE: The merging procedure is conceptually similar to what + # `SearchSpace._match_measurement_with_searchspace_indices` does, though using + # a simpler matching logic. When refactoring the SearchSpace class to + # handle continuous parameters, a corresponding utility could be extracted. + idxs = pd.Index( + pd.merge( + pd.DataFrame(points, columns=candidates_comp.columns), + candidates_comp.reset_index(), + on=list(candidates_comp), + how="left", + )["index"] + ) + + return idxs diff --git a/baybe/recommenders/pure/bayesian/botorch/hybrid.py b/baybe/recommenders/pure/bayesian/botorch/hybrid.py new file mode 100644 index 0000000000..d3eddd31f0 --- /dev/null +++ b/baybe/recommenders/pure/bayesian/botorch/hybrid.py @@ -0,0 +1,252 @@ +"""Hybrid recommendation routines for BotorchRecommender.""" + +from __future__ import annotations + +import math +import warnings +from collections.abc import Callable, Iterable +from typing import TYPE_CHECKING + +import numpy as np +import pandas as pd + +from baybe.constraints.utils import is_cardinality_fulfilled +from baybe.exceptions import ( + IncompatibilityError, + IncompatibleAcquisitionFunctionError, + MinimumCardinalityViolatedWarning, +) +from baybe.searchspace import SearchSpace +from baybe.utils.basic import flatten +from baybe.utils.dataframe import to_tensor +from baybe.utils.sampling_algorithms import sample_numerical_df + +if TYPE_CHECKING: + from torch import Tensor + + from baybe.recommenders.pure.bayesian.botorch.core import BotorchRecommender + + +def recommend_hybrid_without_partitions( + recommender: BotorchRecommender, + searchspace: SearchSpace, + candidates_exp: pd.DataFrame, + batch_size: int, +) -> pd.DataFrame: + """Recommend points using the ``optimize_acqf_mixed`` function of BoTorch. + + This functions samples points from the discrete subspace, performs optimization + in the continuous subspace with these points being fixed and returns the best + found solution. + + **Important**: This performs a brute-force calculation by fixing every possible + assignment of discrete variables and optimizing the continuous subspace for + each of them. It is thus computationally expensive. + + **Note**: This function implicitly assumes that discrete search space parts in + the respective data frame come first and continuous parts come second. + + Args: + recommender: The recommender instance. + searchspace: The search space in which the recommendations should be made. + candidates_exp: The experimental representation of the candidates + of the discrete subspace. + batch_size: The size of the calculated batch. + + Raises: + IncompatibleAcquisitionFunctionError: If a non-Monte Carlo acquisition + function is used with a batch size > 1. + + Returns: + The recommended points. + """ + assert recommender._objective is not None + + # Interpoint constraints cannot be used with optimize_acqf_mixed, see + # https://github.com/meta-pytorch/botorch/issues/2996 + if searchspace.continuous.has_interpoint_constraints: + raise IncompatibilityError( + "Interpoint constraints are not available in hybrid spaces." + ) + if ( + batch_size > 1 + and not recommender._get_acquisition_function( + recommender._objective + ).supports_batching + ): + raise IncompatibleAcquisitionFunctionError( + f"The '{recommender.__class__.__name__}' only works with Monte Carlo " + f"acquisition functions for batch sizes > 1." + ) + + import torch + from botorch.optim import optimize_acqf_mixed + + # Transform discrete candidates + candidates_comp = searchspace.discrete.transform(candidates_exp) + + # Calculate the number of samples from the given percentage + n_candidates = math.ceil( + recommender.sampling_percentage * len(candidates_comp.index) + ) + + # Potential sampling of discrete candidates + if recommender.hybrid_sampler is not None: + candidates_comp = sample_numerical_df( + candidates_comp, n_candidates, method=recommender.hybrid_sampler + ) + + # Prepare all considered discrete configurations in the + # List[Dict[int, float]] format expected by BoTorch. + num_comp_columns = len(candidates_comp.columns) + candidates_comp.columns = list(range(num_comp_columns)) + fixed_features_list = candidates_comp.to_dict("records") + + # Actual call of the BoTorch optimization routine + # NOTE: The explicit `or None` conversion is added as an additional safety net + # because it is unclear if the corresponding presence checks for these + # arguments is correctly implemented in all invoked BoTorch subroutines. + # For details: https://github.com/pytorch/botorch/issues/2042 + points, _ = optimize_acqf_mixed( + acq_function=recommender._botorch_acqf, + bounds=torch.from_numpy(searchspace.comp_rep_bounds.values), + q=batch_size, + num_restarts=recommender.n_restarts, + raw_samples=recommender.n_raw_samples, + fixed_features_list=fixed_features_list, # type: ignore[arg-type] + equality_constraints=flatten( + c.to_botorch( + searchspace.continuous.parameters, + idx_offset=len(candidates_comp.columns), + batch_size=batch_size if c.is_interpoint else None, + ) + for c in searchspace.continuous.constraints_lin_eq + ) + or None, + inequality_constraints=flatten( + c.to_botorch( + searchspace.continuous.parameters, + idx_offset=num_comp_columns, + batch_size=batch_size if c.is_interpoint else None, + ) + for c in searchspace.continuous.constraints_lin_ineq + ) + or None, + ) + + # Align candidates with search space index. Done via including the search space + # index during the merge, which is used later for back-translation into the + # experimental representation + merged = pd.merge( + pd.DataFrame(points), + candidates_comp.reset_index(), + on=list(candidates_comp.columns), + how="left", + ).set_index("index") + + # Get experimental representation of discrete part + rec_disc_exp = searchspace.discrete.exp_rep.loc[merged.index] + + # Combine discrete and continuous parts + rec_exp = pd.concat( + [ + rec_disc_exp, + merged.iloc[:, num_comp_columns:].set_axis( + searchspace.continuous.parameter_names, axis=1 + ), + ], + axis=1, + ) + + return rec_exp + + +def recommend_hybrid_with_partitions( + recommender: BotorchRecommender, + searchspace: SearchSpace, + candidates_exp: pd.DataFrame, + batch_size: int, +) -> pd.DataFrame: + """Recommend from a hybrid space with partitioning constraints. + + Uses ``SearchSpace.partitions()`` to enumerate the Cartesian + product of discrete and continuous partition configurations, capped at + ``max_n_partitions`` total. In purely discrete search spaces, partitions + with fewer candidates than ``batch_size`` are pre-filtered. + + Args: + recommender: The recommender instance. + searchspace: The search space in which the recommendations should be made. + candidates_exp: The experimental representation of the candidates + of the discrete subspace. + batch_size: The size of the calculated batch. + + Returns: + The recommended points. + """ + from attrs import evolve + + subspace_c = searchspace.continuous + + # Get combined configurations, capped at max_n_partitions + # NOTE: No min_discrete_candidates filtering in hybrid spaces because + # optimize_acqf_mixed can produce multiple recommendations from a single + # discrete candidate by varying continuous parameters. + combined_masks: Iterable[tuple[np.ndarray, frozenset[str]]] + if searchspace.n_theoretical_partitions <= recommender.max_n_partitions: + combined_masks = searchspace.partitions(candidates_exp) + else: + combined_masks = searchspace.sample_partitions( + candidates_exp, recommender.max_n_partitions + ) + + def make_callable( + d_mask: np.ndarray, + c_inactive_params: frozenset[str], + ) -> Callable[[], tuple[pd.DataFrame, Tensor]]: + def optimize() -> tuple[pd.DataFrame, Tensor]: + import torch + + subset = candidates_exp.loc[d_mask] + + if c_inactive_params: + mod_cont = subspace_c._enforce_cardinality_constraints( + c_inactive_params + ) + else: + mod_cont = subspace_c + mod_searchspace = evolve(searchspace, continuous=mod_cont) + + rec = recommend_hybrid_without_partitions( + recommender, mod_searchspace, subset, batch_size + ) + + comp = mod_searchspace.transform(rec) + with torch.no_grad(): + acqf_value = recommender._botorch_acqf( + to_tensor(comp.values).unsqueeze(0) + ) + return rec, acqf_value + + return optimize + + callables = (make_callable(d_mask, c_ip) for d_mask, c_ip in combined_masks) + best_rec, _ = recommender._optimize_over_partitions(callables) + + # Post-check minimum cardinality on continuous columns + if subspace_c.constraints_cardinality and not is_cardinality_fulfilled( + best_rec[list(subspace_c.parameter_names)], + subspace_c, + check_maximum=False, + ): + warnings.warn( + "At least one minimum cardinality constraint has been violated. " + "This may occur when parameter ranges extend beyond zero in both " + "directions, making the feasible region non-convex. For such " + "parameters, minimum cardinality constraints are currently not " + "enforced due to the complexity of the resulting optimization " + "problem.", + MinimumCardinalityViolatedWarning, + ) + + return best_rec diff --git a/baybe/recommenders/pure/nonpredictive/sampling.py b/baybe/recommenders/pure/nonpredictive/sampling.py index acb5af55c3..d8e5156c72 100644 --- a/baybe/recommenders/pure/nonpredictive/sampling.py +++ b/baybe/recommenders/pure/nonpredictive/sampling.py @@ -9,6 +9,7 @@ from attrs.validators import instance_of from typing_extensions import override +from baybe.exceptions import InfeasibilityError from baybe.recommenders.pure.nonpredictive.base import NonPredictiveRecommender from baybe.searchspace import SearchSpace, SearchSpaceType, SubspaceDiscrete from baybe.settings import Settings, active_settings @@ -23,6 +24,9 @@ class RandomRecommender(NonPredictiveRecommender): compatibility: ClassVar[SearchSpaceType] = SearchSpaceType.HYBRID # See base class. + supports_discrete_batch_constraints: ClassVar[bool] = True + # See base class. + @override def _recommend_hybrid( self, @@ -30,22 +34,37 @@ def _recommend_hybrid( candidates_exp: pd.DataFrame, batch_size: int, ) -> pd.DataFrame: - if searchspace.type == SearchSpaceType.DISCRETE: - return candidates_exp.sample(batch_size) - - cont_random = searchspace.continuous.sample_uniform(batch_size=batch_size) - if searchspace.type == SearchSpaceType.CONTINUOUS: - return cont_random - - disc_candidates, _ = searchspace.discrete.get_candidates() - - # TODO decide mechanism if number of possible discrete candidates is smaller - # than batch size - disc_random = disc_candidates.sample( + is_hybrid = searchspace.type is SearchSpaceType.HYBRID + + # Sample continuous part if applicable + if is_hybrid or searchspace.type is SearchSpaceType.CONTINUOUS: + cont_random = searchspace.continuous.sample_uniform(batch_size=batch_size) + if searchspace.type is SearchSpaceType.CONTINUOUS: + return cont_random + + # Restrict to a random partition if batch constraints are present + if searchspace.discrete.constraints_batch: + masks = searchspace.discrete.sample_partition_masks( + candidates_exp, + n=1, + min_candidates=None if is_hybrid else batch_size, + ) + if not masks: + raise InfeasibilityError( + "No feasible partition found for the given " + "batch constraints. All partitions have fewer " + f"candidates than the requested {batch_size=}." + ) + candidates_exp = candidates_exp.loc[masks[0]] + + disc_random = candidates_exp.sample( n=batch_size, - replace=len(disc_candidates) < batch_size, + replace=is_hybrid or len(candidates_exp) < batch_size, ) + if not is_hybrid: + return disc_random + cont_random.index = disc_random.index return pd.concat([disc_random, cont_random], axis=1) diff --git a/baybe/searchspace/continuous.py b/baybe/searchspace/continuous.py index a3e0fa34f6..465362c578 100644 --- a/baybe/searchspace/continuous.py +++ b/baybe/searchspace/continuous.py @@ -4,8 +4,9 @@ import gc import math +import random from collections.abc import Collection, Iterator, Sequence -from itertools import chain, product +from itertools import chain from typing import TYPE_CHECKING, Any, cast import numpy as np @@ -108,8 +109,10 @@ def __str__(self) -> str: return to_string(self.__class__.__name__, *fields) @property - def constraints_cardinality(self) -> tuple[ContinuousCardinalityConstraint, ...]: - """Cardinality constraints.""" + def constraints_cardinality( + self, + ) -> tuple[ContinuousCardinalityConstraint, ...]: + """The cardinality constraints of the subspace.""" return tuple( c for c in self.constraints_nonlin @@ -143,21 +146,61 @@ def _validate_constraints_lin_ineq( ) @property - def n_inactive_parameter_combinations(self) -> int: - """The number of possible inactive parameter combinations.""" + def n_theoretical_partitions(self) -> int: + """The theoretical number of possible partition configurations. + + Returns 0 if no cardinality constraints exist, indicating that + no decomposition is needed. + """ + if not self.constraints_cardinality: + return 0 return math.prod( c.n_inactive_parameter_combinations for c in self.constraints_cardinality ) - def inactive_parameter_combinations(self) -> Iterator[frozenset[str]]: - """Get an iterator over all possible combinations of inactive parameters.""" - for combination in product( - *[ - con.inactive_parameter_combinations() - for con in self.constraints_cardinality - ] - ): - yield frozenset(chain(*combination)) + def inactive_parameter_combinations( # noqa: DOC404 + self, + *, + shuffle: bool = False, + replace: bool = False, + ) -> Iterator[frozenset[str]]: + """Get an iterator over all possible inactive parameter combinations. + + Args: + shuffle: If ``True``, iterate in uniformly shuffled order. + Has no effect when ``replace=True``. + replace: If ``True``, sample with replacement, producing an + infinite iterator where each draw is independent. + + Yields: + A frozenset of inactive parameter names for the subspace. + """ + per_constraint = [ + list(con.inactive_parameter_combinations()) + for con in self.constraints_cardinality + ] + + total = math.prod(len(v) for v in per_constraint) + + def _resolve_flat_idx(flat_idx: int) -> frozenset[str]: + combo = [] + remaining = flat_idx + for values in per_constraint: + remaining, idx = divmod(remaining, len(values)) + combo.append(values[idx]) + return frozenset(chain(*combo)) + + if replace: + candidates = list(range(total)) + while candidates: + idx_pos = random.randint(0, len(candidates) - 1) + yield _resolve_flat_idx(candidates[idx_pos]) + else: + order = list(range(total)) + if shuffle: + random.shuffle(order) + for flat_idx in order: + yield _resolve_flat_idx(flat_idx) @constraints_nonlin.validator def _validate_constraints_nonlin(self, _, __) -> None: @@ -617,13 +660,13 @@ def _sample_from_polytope_with_cardinality_constraints( .fillna(0.0) ) - def _sample_inactive_parameters(self, batch_size: int = 1) -> list[set[str]]: - """Sample inactive parameters according to the given cardinality constraints.""" + def _sample_inactive_parameters(self, batch_size: int = 1) -> list[frozenset[str]]: + """Sample inactive parameter configurations from the cardinality constraints.""" inactives_per_constraint = [ con.sample_inactive_parameters(batch_size) for con in self.constraints_cardinality ] - return [set(chain(*x)) for x in zip(*inactives_per_constraint)] + return [frozenset(chain(*x)) for x in zip(*inactives_per_constraint)] def sample_from_full_factorial(self, batch_size: int = 1) -> pd.DataFrame: """Draw parameter configurations from the full factorial of the space. diff --git a/baybe/searchspace/core.py b/baybe/searchspace/core.py index 8b0da30c92..319f6dc712 100644 --- a/baybe/searchspace/core.py +++ b/baybe/searchspace/core.py @@ -3,16 +3,20 @@ from __future__ import annotations import gc -from collections.abc import Iterable, Sequence +from collections.abc import Iterable, Iterator, Sequence from enum import Enum +from itertools import product from typing import cast +import numpy as np +import numpy.typing as npt import pandas as pd from attrs import define, field from typing_extensions import override from baybe.constraints import validate_constraints from baybe.constraints.base import Constraint +from baybe.exceptions import InfeasibilityError from baybe.parameters import TaskParameter from baybe.parameters.base import Parameter from baybe.searchspace.continuous import SubspaceContinuous @@ -284,6 +288,112 @@ def n_tasks(self) -> int: except StopIteration: return 1 + @property + def n_theoretical_partitions(self) -> int: + """Total theoretical number of partition configurations. + + Returns 0 if no partitioning constraints exist on either side. + When only one side has constraints, the other does not contribute to + the count. + """ + d = self.discrete.n_theoretical_partitions + c = self.continuous.n_theoretical_partitions + if d == 0 == c: + return 0 + return max(d, 1) * max(c, 1) + + def partitions( # noqa: DOC404 + self, + candidates_exp: pd.DataFrame, + min_discrete_candidates: int | None = None, + ) -> Iterator[tuple[npt.NDArray[np.bool_], frozenset[str]]]: + r"""Get an iterator over all combined partition configurations. + + Yields the Cartesian product of discrete masks and continuous + configurations. + + Args: + candidates_exp: The experimental representation of discrete candidates. + min_discrete_candidates: If provided, discrete partitions with fewer + matching candidates are skipped. + + Yields: + A discrete mask and continuous inactive parameters pair. + """ + yield from product( + self.discrete.partition_masks( + candidates_exp, min_candidates=min_discrete_candidates + ), + self.continuous.inactive_parameter_combinations(), + ) + + def sample_partitions( + self, + candidates_exp: pd.DataFrame, + n: int, + min_discrete_candidates: int | None = None, + *, + max_rejections: int = 10, + ) -> list[tuple[npt.NDArray[np.bool_], frozenset[str]]]: + """Sample unique combined partition configurations. + + Zips two independent with-replacement iterators from the discrete and + continuous sides, producing random pairs from the Cartesian product. + Duplicate pairs are skipped. + + Args: + candidates_exp: The experimental representation of discrete candidates. + n: Number of unique configurations to sample. + min_discrete_candidates: If provided, discrete partitions with fewer + matching candidates are excluded. + max_rejections: Maximum number of times a duplicate combination can + be drawn before raising ``InfeasibilityError``. + + Raises: + InfeasibilityError: If not enough unique partition configurations + are available. + + Returns: + A list of ``(discrete_mask, continuous_inactive_params)`` tuples. + """ + d_iter = self.discrete.partition_masks( + candidates_exp, + min_candidates=min_discrete_candidates, + shuffle=True, + replace=True, + ) + c_iter = self.continuous.inactive_parameter_combinations( + shuffle=True, replace=True + ) + + seen: set[tuple[bytes, frozenset[str]]] = set() + results: list[tuple[npt.NDArray[np.bool_], frozenset[str]]] = [] + rejections = 0 + + for d_mask, c_config in zip(d_iter, c_iter): + key = (d_mask.tobytes(), c_config) + if key in seen: + rejections += 1 + if rejections > max_rejections: + raise InfeasibilityError( + f"Not enough unique partition configurations available. " + f"Requested {n} but only {len(results)} could be found." + ) + continue + seen.add(key) + rejections = 0 + results.append((d_mask, c_config)) + if len(results) >= n: + break + + if len(results) < n: + raise InfeasibilityError( + f"Not enough unique subspace configurations available. " + f"Requested {n} but only {len(results)} could be found." + ) + + return results + def get_comp_rep_parameter_indices(self, name: str, /) -> tuple[int, ...]: """Find a parameter's column indices in the computational representation. diff --git a/baybe/searchspace/discrete.py b/baybe/searchspace/discrete.py index efae2cfc6b..d45e81e1fa 100644 --- a/baybe/searchspace/discrete.py +++ b/baybe/searchspace/discrete.py @@ -3,12 +3,14 @@ from __future__ import annotations import gc -from collections.abc import Collection, Sequence -from itertools import compress +import random +from collections.abc import Collection, Iterator, Sequence +from itertools import compress, islice from math import prod from typing import TYPE_CHECKING, Any import numpy as np +import numpy.typing as npt import pandas as pd from attrs import define, field from cattrs import IterableValidationError @@ -16,6 +18,7 @@ from baybe.constraints import DISCRETE_CONSTRAINTS_FILTERING_ORDER, validate_constraints from baybe.constraints.base import DiscreteConstraint +from baybe.constraints.discrete import DiscreteBatchConstraint from baybe.exceptions import DeprecationError from baybe.parameters import ( CategoricalEncoding, @@ -183,10 +186,14 @@ def from_product( """See :class:`baybe.searchspace.core.SearchSpace`.""" # Set defaults and order constraints constraints = constraints or [] - constraints = sorted( - constraints, + filtering_constraints = sorted( + [c for c in constraints if c.eval_during_creation], key=lambda x: DISCRETE_CONSTRAINTS_FILTERING_ORDER.index(x.__class__), ) + non_filtering_constraints = [ + c for c in constraints if not c.eval_during_creation + ] + constraints = [*filtering_constraints, *non_filtering_constraints] if active_settings.use_polars_for_constraints: lazy_df = parameter_cartesian_prod_polars(parameters) @@ -578,6 +585,125 @@ def estimate_product_space_size( comp_rep_shape=(n_rows, n_cols_comp), ) + @property + def constraints_batch( + self, + ) -> tuple[DiscreteBatchConstraint, ...]: + """The batch constraints of the subspace.""" + return tuple( + c for c in self.constraints if isinstance(c, DiscreteBatchConstraint) + ) + + @property + def n_theoretical_partitions(self) -> int: + """The theoretical number of possible partition configurations. + + Returns 0 if no batch constraints exist, indicating that + no decomposition is needed. + """ + if not self.constraints_batch: + return 0 + return prod( + len(self.get_parameters_by_name([c.parameters[0]])[0].active_values) + for c in self.constraints_batch + ) + + def partition_masks( # noqa: DOC404 + self, + candidates_exp: pd.DataFrame, + min_candidates: int | None = None, + *, + shuffle: bool = False, + replace: bool = False, + ) -> Iterator[npt.NDArray[np.bool_]]: + r"""Get an iterator over all possible partition masks. + + Collects masks from each batch constraint, iterates the + Cartesian product, AND-reduces each combination, and yields feasible + combined masks. + + Args: + candidates_exp: The experimental representation of candidate points. + min_candidates: If provided, combined masks selecting fewer rows + are silently skipped. + shuffle: If ``True``, iterate in uniformly shuffled order. + Has no effect when ``replace=True``. + replace: If ``True``, sample with replacement, producing an + infinite iterator where each draw is independent. Infeasible + indices are permanently excluded from the sampling pool. + + Yields: + A boolean mask selecting the partition's rows. + """ + constraints = self.constraints_batch + if not constraints: + per_constraint: list[list[npt.NDArray[np.bool_]]] = [ + [np.ones(len(candidates_exp), dtype=bool)] + ] + else: + per_constraint = [c.partition_masks(candidates_exp) for c in constraints] + + total = prod(len(masks) for masks in per_constraint) + + def _resolve_flat_idx(flat_idx: int) -> npt.NDArray[np.bool_]: + # Decompose flat index into per-constraint indices. + # Example with 3 constraints of partition lengths [3, 2, 4]: + # flat_idx=11 -> divmod(11,3)=(3,2) -> A[2] + # divmod(3,2)=(1,1) -> B[1] + # divmod(1,4)=(0,1) -> C[1] + # Result: masks A[2] AND B[1] AND C[1] + masks = [] + remaining = flat_idx + for constraint_masks in per_constraint: + remaining, idx = divmod(remaining, len(constraint_masks)) + masks.append(constraint_masks[idx]) + return np.logical_and.reduce(masks) + + if replace: + candidates = list(range(total)) + while candidates: + idx_pos = random.randint(0, len(candidates) - 1) + flat_idx = candidates[idx_pos] + combined = _resolve_flat_idx(flat_idx) + if min_candidates is not None and combined.sum() < min_candidates: + candidates[idx_pos] = candidates[-1] + candidates.pop() + continue + yield combined + else: + order = list(range(total)) + if shuffle: + random.shuffle(order) + for flat_idx in order: + combined = _resolve_flat_idx(flat_idx) + if min_candidates is not None and combined.sum() < min_candidates: + continue + yield combined + + def sample_partition_masks( + self, + candidates_exp: pd.DataFrame, + n: int, + min_candidates: int | None = None, + ) -> list[npt.NDArray[np.bool_]]: + """Sample partition masks. + + Args: + candidates_exp: The experimental representation of candidate points. + n: Number of masks to sample. + min_candidates: If provided, partitions with fewer matching + candidates are skipped. + + Returns: + A list of boolean masks. + """ + return list( + islice( + self.partition_masks(candidates_exp, min_candidates, shuffle=True), + n, + ) + ) + def get_candidates(self) -> tuple[pd.DataFrame, pd.DataFrame]: """Return the set of candidate parameter settings that can be tested. diff --git a/docs/userguide/async.md b/docs/userguide/async.md index c590b0048a..a376fcf2c6 100644 --- a/docs/userguide/async.md +++ b/docs/userguide/async.md @@ -48,7 +48,7 @@ function with `pending_experiments` will result in an For technical reasons, not every recommender is able to make use of `pending_experiments`. For instance, -[`BotorchRecommender`](baybe.recommenders.pure.bayesian.botorch.BotorchRecommender) +[`BotorchRecommender`](baybe.recommenders.pure.bayesian.botorch.core.BotorchRecommender) takes all pending experiments into account, even if they do not match exactly with points in the search space. By contrast, diff --git a/docs/userguide/campaigns.md b/docs/userguide/campaigns.md index 517e29cfa9..1b0abd9c4f 100644 --- a/docs/userguide/campaigns.md +++ b/docs/userguide/campaigns.md @@ -96,7 +96,7 @@ used is strongly discouraged. **Note:** While the above distinction is true in the general case, it may not be relevant for all configured settings, for instance, when the used recommender is not capable of joint optimization. Currently, the -[BotorchRecommender](baybe.recommenders.pure.bayesian.botorch.BotorchRecommender) +[BotorchRecommender](baybe.recommenders.pure.bayesian.botorch.core.BotorchRecommender) is the only recommender available that performs joint optimization. ``` diff --git a/docs/userguide/constraints.md b/docs/userguide/constraints.md index e66be3051c..d14e3dd831 100644 --- a/docs/userguide/constraints.md +++ b/docs/userguide/constraints.md @@ -130,7 +130,7 @@ to be aware of: - BayBE does not support to use both interpoint and cardinality constraints within the same search space. - When using interpoint constraints, candidate generation cannot be done -{attr}`sequentially `, +{attr}`sequentially `, and an error is raised when attempted. - Interpoint constraints are only supported in purely continuous spaces and are not available in hybrid spaces. @@ -169,8 +169,8 @@ settings, searching an optimal parameter configuration can quickly become infeas creating the need for approximation schemes: * The - {paramref}`BotorchRecommender.max_n_subspaces ` - attribute can be used to limit the number of subspaces considered during optimization. + {paramref}`BotorchRecommender.max_n_partitions ` + attribute can be used to limit the number of partitions considered during optimization. * When the ranges of cardinality-constrained parameters cover both positive and negative values, minimal cardinality requirements cannot always be guaranteed, potentially resulting in a {class}`~baybe.exceptions.MinimumCardinalityViolatedWarning`. @@ -533,3 +533,50 @@ Due to the arbitrary nature of code and dependencies that can be used in the using a `DiscreteCustomConstraint` results in an error if you attempt to serialize the corresponding object or higher-level objects containing it. ``` + +### DiscreteBatchConstraint +Unlike the other discrete constraints described above, the +{class}`~baybe.constraints.discrete.DiscreteBatchConstraint` does not filter candidates +from the search space. Instead, it controls how recommendations are generated at +batch level: it ensures that **all experiments in a recommended batch share the same +value** for the constrained parameter. + +This is useful, for example, when experiments in a batch must be run under shared +conditions. Consider a well plate experiment where each plate holds multiple samples +but only one temperature can be set per plate. If the optimizer recommends a batch of +experiments to fill one plate, all of them must use the same temperature. The +`DiscreteBatchConstraint` enforces this by internally partitioning the candidate space +into subspaces (one per temperature value), optimizing each subspace independently, and +selecting the batch with the highest expected utility. + +```python +from baybe.constraints import DiscreteBatchConstraint + +DiscreteBatchConstraint( + parameters=["Temperature"], # all batch entries will share the same temperature +) +``` + +Multiple batch constraints on different parameters can be combined. For instance, if +both the temperature and the solvent must be fixed across the plate, two constraints +can be specified: + +```python +DiscreteBatchConstraint(parameters=["Temperature"]) +DiscreteBatchConstraint(parameters=["Solvent"]) +``` + +In this case, each recommended batch will share both the same temperature and the same +solvent. The optimizer evaluates the Cartesian product of possible value combinations +and selects the best one. + +```{admonition} Recommender Compatibility +:class: warning +The `DiscreteBatchConstraint` is only effective with recommenders that can compare +batch-level outcomes, such as +{class}`~baybe.recommenders.pure.bayesian.botorch.core.BotorchRecommender` and +{class}`~baybe.recommenders.pure.nonpredictive.sampling.RandomRecommender`. +Other recommenders will raise an +{class}`~baybe.exceptions.IncompatibilityError` if a search space with batch +constraints is used. +``` diff --git a/docs/userguide/getting_recommendations.md b/docs/userguide/getting_recommendations.md index 810ef1b472..6fad86860f 100644 --- a/docs/userguide/getting_recommendations.md +++ b/docs/userguide/getting_recommendations.md @@ -31,7 +31,7 @@ BayBE offers two entry points for requesting recommendations: {attr}`~baybe.recommenders.meta.base.MetaRecommender.is_stateful` property. ``` - For example, using the {class}`~baybe.recommenders.pure.bayesian.botorch.BotorchRecommender`: + For example, using the {class}`~baybe.recommenders.pure.bayesian.botorch.core.BotorchRecommender`: ~~~python recommender = BotorchRecommender() recommendation = recommender.recommend(batch_size, searchspace, objective, measurements) diff --git a/docs/userguide/recommenders.md b/docs/userguide/recommenders.md index 488ea4c297..fc4dadd7c1 100644 --- a/docs/userguide/recommenders.md +++ b/docs/userguide/recommenders.md @@ -21,7 +21,7 @@ The Bayesian recommenders in BayBE are built on the foundation of the class, offering an array of possibilities with internal surrogate models and support for various acquisition functions. -* The **[`BotorchRecommender`](baybe.recommenders.pure.bayesian.botorch.BotorchRecommender)** +* The **[`BotorchRecommender`](baybe.recommenders.pure.bayesian.botorch.core.BotorchRecommender)** is a powerful recommender based on BoTorch's optimization engine that can be applied to all kinds of search spaces. In continuous spaces, its `sequential_continuous` flag allows to choose between greedy sequential optimization and batch optimization as the @@ -32,16 +32,16 @@ for various acquisition functions. spaces, as it does gradient-based optimization in the continuous part of the space while exhaustively evaluating configurations of the discrete subspace. You can customize this behavior to only sample a certain percentage of the discrete subspace via the - {attr}`~baybe.recommenders.pure.bayesian.botorch.BotorchRecommender.sampling_percentage` + {attr}`~baybe.recommenders.pure.bayesian.botorch.core.BotorchRecommender.sampling_percentage` argument and to choose different sampling algorithms via the - {attr}`~baybe.recommenders.pure.bayesian.botorch.BotorchRecommender.hybrid_sampler` + {attr}`~baybe.recommenders.pure.bayesian.botorch.core.BotorchRecommender.hybrid_sampler` argument. The gradient-based optimization part can also further be controlled by the - {attr}`~baybe.recommenders.pure.bayesian.botorch.BotorchRecommender.n_restarts` and - {attr}`~baybe.recommenders.pure.bayesian.botorch.BotorchRecommender.n_raw_samples` + {attr}`~baybe.recommenders.pure.bayesian.botorch.core.BotorchRecommender.n_restarts` and + {attr}`~baybe.recommenders.pure.bayesian.botorch.core.BotorchRecommender.n_raw_samples` arguments. For details, please refer - to [BotorchRecommender](baybe.recommenders.pure.bayesian.botorch.BotorchRecommender). + to [BotorchRecommender](baybe.recommenders.pure.bayesian.botorch.core.BotorchRecommender). * The **[`NaiveHybridSpaceRecommender`](baybe.recommenders.naive.NaiveHybridSpaceRecommender)** can be applied to all search spaces, but is intended to be used in hybrid spaces. diff --git a/examples/Custom_Hooks/probability_of_improvement.py b/examples/Custom_Hooks/probability_of_improvement.py index 3ae7dd66ae..7834592d1d 100644 --- a/examples/Custom_Hooks/probability_of_improvement.py +++ b/examples/Custom_Hooks/probability_of_improvement.py @@ -4,7 +4,7 @@ # {func}`register_hooks ` utility can be used to # extract the *Probability of Improvement (PI)* from a running campaign: # * We define a hook that is compatible with the -# {meth}`BotorchRecommender.recommend ` +# {meth}`BotorchRecommender.recommend ` # interface and lets us extract the PI achieved after each experimental iteration, # * attach the hook to the recommender driving our campaign, # * and plot the evolving PI values after campaign completion. @@ -107,7 +107,7 @@ def extract_pi( ) # In this example, we use `MethodType` to bind the -# {meth}`BotorchRecommender.recommend ` +# {meth}`BotorchRecommender.recommend ` # **function** with our hook. # For more information, we refer to the [`basic example`](./basics.md) explaining the # hook mechanics. diff --git a/tests/constraints/test_batch_constraint.py b/tests/constraints/test_batch_constraint.py new file mode 100644 index 0000000000..7cc14f59ec --- /dev/null +++ b/tests/constraints/test_batch_constraint.py @@ -0,0 +1,150 @@ +"""Tests for the discrete batch constraint.""" + +import pytest +from pytest import param + +from baybe.constraints.discrete import DiscreteBatchConstraint +from baybe.exceptions import IncompatibilityError, InfeasibilityError +from baybe.parameters.numerical import NumericalDiscreteParameter +from baybe.recommenders import BotorchRecommender +from baybe.recommenders.pure.nonpredictive.sampling import ( + FPSRecommender, + RandomRecommender, +) +from baybe.searchspace import SearchSpace +from baybe.targets import NumericalTarget +from baybe.utils.dataframe import create_fake_input + +BATCH_SIZE = 3 +TARGET = NumericalTarget("y") + +_params = [ + NumericalDiscreteParameter("d0", values=(0.0, 0.5, 1.0)), + NumericalDiscreteParameter("d1", values=(0.0, 0.5, 1.0)), +] + + +@pytest.mark.parametrize( + ("constraints", "constrained_params", "batch_size"), + [ + param( + [DiscreteBatchConstraint(parameters=["d0"])], + ["d0"], + BATCH_SIZE, + id="single", + ), + param( + [ + DiscreteBatchConstraint(parameters=["d0"]), + DiscreteBatchConstraint(parameters=["d1"]), + ], + ["d0", "d1"], + 1, + id="multiple", + ), + ], +) +def test_batch_constraint_bayesian(constraints, constrained_params, batch_size): + """BotorchRecommender respects batch constraints.""" + searchspace = SearchSpace.from_product(_params, constraints) + measurements = create_fake_input(_params, [TARGET], n_rows=3) + + rec = BotorchRecommender().recommend( + batch_size, searchspace, TARGET.to_objective(), measurements + ) + assert rec.shape[0] == batch_size + for p in constrained_params: + assert rec[p].nunique() == 1 + + +def test_batch_constraint_random_recommender(): + """RandomRecommender respects the batch constraint.""" + searchspace = SearchSpace.from_product( + _params, [DiscreteBatchConstraint(parameters=["d0"])] + ) + rec = RandomRecommender().recommend(BATCH_SIZE, searchspace) + assert rec["d0"].nunique() == 1 + assert rec.shape[0] == BATCH_SIZE + + +def test_batch_constraint_unsupported_recommender(): + """Unsupported recommenders raise IncompatibilityError.""" + searchspace = SearchSpace.from_product( + _params, [DiscreteBatchConstraint(parameters=["d0"])] + ) + with pytest.raises(IncompatibilityError, match="does not support"): + FPSRecommender().recommend(BATCH_SIZE, searchspace) + + +def test_batch_constraint_validation_multi_param(): + """DiscreteBatchConstraint requires exactly one parameter.""" + with pytest.raises(ValueError, match="exactly one parameter"): + DiscreteBatchConstraint(parameters=["A", "B"]) + + +def test_batch_constraint_validation_duplicate(): + """Two batch constraints on the same parameter are rejected.""" + constraints = [ + DiscreteBatchConstraint(parameters=["d0"]), + DiscreteBatchConstraint(parameters=["d0"]), + ] + with pytest.raises(ValueError, match="same parameter"): + SearchSpace.from_product(_params, constraints) + + +@pytest.mark.parametrize( + ("constraints", "expected"), + [ + param([], 0, id="none"), + param([DiscreteBatchConstraint(parameters=["d0"])], 3, id="single"), + param( + [ + DiscreteBatchConstraint(parameters=["d0"]), + DiscreteBatchConstraint(parameters=["d1"]), + ], + 9, + id="two", + ), + ], +) +def test_batch_constraint_n_theoretical_partitions(constraints, expected): + """The n_theoretical_partitions property returns the correct count.""" + assert ( + SearchSpace.from_product(_params, constraints).discrete.n_theoretical_partitions + == expected + ) + + +def test_batch_constraint_all_partitions_too_small(): + """All partitions infeasible raises InfeasibilityError.""" + searchspace = SearchSpace.from_product( + _params, [DiscreteBatchConstraint(parameters=["d0"])] + ) + measurements = create_fake_input(_params, [TARGET], n_rows=2) + + # Each d0 partition has 3 candidates, batch_size=4 exceeds all + with pytest.raises(InfeasibilityError): + BotorchRecommender().recommend( + 4, searchspace, TARGET.to_objective(), measurements + ) + + +@pytest.mark.parametrize( + ("min_candidates", "expected_count"), + [ + param(None, 3, id="no_filter"), + param(4, 0, id="all_skipped"), + param(3, 3, id="all_retained"), + ], +) +def test_partition_masks_min_candidates(min_candidates, expected_count): + """Partition mask filtering by min_candidates.""" + searchspace = SearchSpace.from_product( + _params, [DiscreteBatchConstraint(parameters=["d0"])] + ) + masks = list( + searchspace.discrete.partition_masks( + searchspace.discrete.exp_rep, min_candidates=min_candidates + ) + ) + assert len(masks) == expected_count diff --git a/tests/constraints/test_partition_constraints_hybrid.py b/tests/constraints/test_partition_constraints_hybrid.py new file mode 100644 index 0000000000..1bb1d3d57a --- /dev/null +++ b/tests/constraints/test_partition_constraints_hybrid.py @@ -0,0 +1,111 @@ +"""Tests for partitioning constraints in hybrid search spaces.""" + +import pytest +from pytest import param + +from baybe.constraints.continuous import ContinuousCardinalityConstraint +from baybe.constraints.discrete import ( + DiscreteBatchConstraint, + DiscreteCardinalityConstraint, +) +from baybe.constraints.utils import is_cardinality_fulfilled +from baybe.parameters.numerical import ( + NumericalContinuousParameter, + NumericalDiscreteParameter, +) +from baybe.recommenders import BotorchRecommender +from baybe.searchspace import SearchSpace +from baybe.targets import NumericalTarget +from baybe.utils.dataframe import create_fake_input + +BATCH_SIZE = 2 +MAX_CARDINALITY = 1 +TARGET = NumericalTarget("t") + +_discrete_params = [ + NumericalDiscreteParameter("d0", values=(0.0, 0.5, 1.0)), + NumericalDiscreteParameter("d1", values=(0.0, 0.5, 1.0)), +] +_continuous_params = [ + NumericalContinuousParameter("c0", bounds=(0, 1)), + NumericalContinuousParameter("c1", bounds=(0, 1)), +] +_all_params = [*_discrete_params, *_continuous_params] + + +@pytest.mark.parametrize( + "constraints", + [ + param( + [ + ContinuousCardinalityConstraint( + parameters=["c0", "c1"], max_cardinality=MAX_CARDINALITY + ) + ], + id="continuous_cardinality", + ), + param( + [ + DiscreteCardinalityConstraint( + parameters=["d0", "d1"], max_cardinality=MAX_CARDINALITY + ) + ], + id="discrete_cardinality", + ), + param( + [ + DiscreteCardinalityConstraint( + parameters=["d0", "d1"], max_cardinality=MAX_CARDINALITY + ), + ContinuousCardinalityConstraint( + parameters=["c0", "c1"], max_cardinality=MAX_CARDINALITY + ), + ], + id="both_cardinality", + ), + param( + [DiscreteBatchConstraint(parameters=["d0"])], + id="batch_single", + ), + param( + [ + DiscreteBatchConstraint(parameters=["d0"]), + DiscreteBatchConstraint(parameters=["d1"]), + ], + id="batch_multiple", + ), + param( + [ + DiscreteBatchConstraint(parameters=["d0"]), + ContinuousCardinalityConstraint( + parameters=["c0", "c1"], max_cardinality=MAX_CARDINALITY + ), + ], + id="batch_and_cardinality", + ), + ], +) +def test_partition_constraints_hybrid(constraints): + """Partitioning constraints are respected in hybrid search spaces.""" + searchspace = SearchSpace.from_product(_all_params, constraints) + measurements = create_fake_input(_all_params, [TARGET], n_rows=3) + + rec = BotorchRecommender().recommend( + BATCH_SIZE, searchspace, TARGET.to_objective(), measurements + ) + + for c in constraints: + if isinstance(c, ContinuousCardinalityConstraint): + assert is_cardinality_fulfilled( + rec, searchspace.continuous, check_minimum=False + ) + elif isinstance(c, DiscreteCardinalityConstraint): + n_nonzero = (rec[list(c.parameters)] != 0.0).sum(axis=1) + assert (n_nonzero <= c.max_cardinality).all(), ( + f"Discrete cardinality constraint violated: {n_nonzero.tolist()}" + ) + elif isinstance(c, DiscreteBatchConstraint): + assert rec[c.parameters[0]].nunique() == 1, ( + f"Batch constraint violated for '{c.parameters[0]}': " + f"found {rec[c.parameters[0]].nunique()} unique values" + )