diff --git a/docs/bibliography.bib b/docs/bibliography.bib index 145ae74..92679ca 100644 --- a/docs/bibliography.bib +++ b/docs/bibliography.bib @@ -194,3 +194,10 @@ @misc{Turri2023 year = {2023}, copyright = {arXiv.org perpetual, non-exclusive license} } +@inproceedings{kostic2024learning, +title={Learning the Infinitesimal Generator of Stochastic Diffusion Processes}, +author={Vladimir R Kostic and H{\'e}l{\`e}ne Halconruy and Timoth{\'e}e Devergne and Karim Lounici and Massimiliano Pontil}, +booktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems}, +year={2024}, +url={https://openreview.net/forum?id=H7SaaqfCUi} +} diff --git a/docs/reference/kernel/dynamics.rst b/docs/reference/kernel/dynamics.rst new file mode 100644 index 0000000..65206bc --- /dev/null +++ b/docs/reference/kernel/dynamics.rst @@ -0,0 +1,47 @@ +.. _dynamics_reference: +============== +:code:`kernel.dynamics` +============== +.. module:: linear_operator_learning.kernel + +.. rst-class:: lead + + Kernel Methods for dynamics analysis + +Table of Contents +----------------- + +- :ref:`Regressors <_dynamics_kernel_regressors>` +- :ref:`Types <_dynamics_kernel_types>` + + + +.. _kernel_regressors: +Regressors +---------- + +Common +~~~~~~ + +.. autofunction:: linear_operator_learning.kernel.dynamics.regressors.predict +.. autofunction:: linear_operator_learning.kernel.dynamics.regressors.modes + + +.. _dynamics_rrr: +Reduced Rank +~~~~~~~~~~~~ +.. autofunction:: linear_operator_learning.kernel.dynamics.regressors.reduced_rank_regression_dirichlet + + + +.. _dynamics_kernel_types: + +Types +----- + +.. autoclass:: linear_operator_learning.kernel.dynamics.structs.ModeResult + :members: + + + +.. footbibliography:: \ No newline at end of file diff --git a/docs/reference/kernel/index.rst b/docs/reference/kernel/index.rst index 3ef83fb..c58b1e7 100644 --- a/docs/reference/kernel/index.rst +++ b/docs/reference/kernel/index.rst @@ -8,3 +8,4 @@ Algorithms and utilities to learn linear operators via kernel methods. .. toctree:: kernel + dynamics diff --git a/docs/reference/kernel/kernel.rst b/docs/reference/kernel/kernel.rst index 698f408..1b8090d 100644 --- a/docs/reference/kernel/kernel.rst +++ b/docs/reference/kernel/kernel.rst @@ -75,4 +75,10 @@ General Utilities .. autofunction:: linear_operator_learning.kernel.utils.sanitize_complex_conjugates +.. _kernels: +Custom Kernels +----------------- +.. autoclass:: linear_operator_learning.kernel.kernels.RBF_with_grad + :members: + :exclude-members: __init__, __new__ .. footbibliography:: \ No newline at end of file diff --git a/linear_operator_learning/kernel/__init__.py b/linear_operator_learning/kernel/__init__.py index 636e05a..3837c65 100644 --- a/linear_operator_learning/kernel/__init__.py +++ b/linear_operator_learning/kernel/__init__.py @@ -1,3 +1,5 @@ """Kernel methods entry point.""" from linear_operator_learning.kernel.regressors import * # noqa +from linear_operator_learning.kernel.utils import * # noqa +from linear_operator_learning.kernel.kernels import * # noqa diff --git a/linear_operator_learning/kernel/dynamics/__init__.py b/linear_operator_learning/kernel/dynamics/__init__.py new file mode 100644 index 0000000..c31939c --- /dev/null +++ b/linear_operator_learning/kernel/dynamics/__init__.py @@ -0,0 +1,4 @@ +"""dynamics entry point.""" + +from .regressors import * # noqa: F403 +from .structs import * # noqa: F403 diff --git a/linear_operator_learning/kernel/dynamics/regressors.py b/linear_operator_learning/kernel/dynamics/regressors.py new file mode 100644 index 0000000..694be58 --- /dev/null +++ b/linear_operator_learning/kernel/dynamics/regressors.py @@ -0,0 +1,203 @@ +"""Kernel-based regressors for linear operators based on dynamical trajectories.""" + +from math import sqrt +from typing import Callable, Literal, Union +from warnings import warn + +import numpy as np +import scipy.linalg +from numpy import ndarray +from scipy.sparse.linalg import eigs, eigsh + +from linear_operator_learning.kernel.dynamics.structs import ModeResult +from linear_operator_learning.kernel.linalg import ( + add_diagonal_, + stable_topk, + weighted_norm, +) +from linear_operator_learning.kernel.regressors import evaluate_eigenfunction +from linear_operator_learning.kernel.structs import EigResult, FitResult +from linear_operator_learning.kernel.utils import sanitize_complex_conjugates + +__all__ = ["reduced_rank_regression_dirichlet", "modes", "predict"] + + +def reduced_rank_regression_dirichlet( + kernel_X: ndarray, # kernel matrix of the training data + dKernel_X: ndarray, # derivative of the kernel: dK_X_{i,j} = <\phi(x_i),d\phi(x_j)> (matrix N in the paper) + dKernel_dX: ndarray, # derivative of the kernel dK_dX_{i,j} = + shift: float, # shift parameter of the resolvent + tikhonov_reg: float, # Tikhonov (ridge) regularization parameter, can be 0, + rank: int, +): # Rank of the estimator) + r"""Fits the physics informed Reduced Rank Estimator from :footcite:t:`kostic2024learning`. + + Args: + kernel_X (np.ndarray): kernel matrix of the training data + dKernel_X (np.ndarray): (matrix N in :footcite:t:`kostic2024learning`) derivative of the kernel: :math:`N_{i,(k-1)n+j} = \langle \phi(x_i),d_k\phi(x_j) \rangle` + dKernel_dX (np.ndarray): (matrix M in :footcite:t:`kostic2024learning`) derivative of the kernel :math:`M_{(l-1)n+i,(k-1)n+j} = \langle d_l\phi(x_i),d_k\phi(x_j) \rangle` + shift (float): shift parameter of the resolvent + tikhonov_reg (float): Tikhonov (ridge) regularization parameter + rank (int): Rank of the estimator + + Shape: + ``kernel_X``: :math:`(N, N)`, where :math:`N` is the number of training data. + + ``dkernel_X``: :math:`(N, (d+1)N)`. where :math:`N` is the number of training data amd :math:`d` is the dimensionality of the input data. + + ``dkernel_dX``: :math:`((d+1)N, (d+1)N)`. where :math:`N` is the number of training data amd :math:`d` is the dimensionality of the input data. + Returns: EigResult structure containing the eigenvalues and eigenvectors + """ + npts = kernel_X.shape[0] + sqrt_npts = np.sqrt(npts) + + dimension_derivative = dKernel_dX.shape[0] + + # We follow the notation of the paper + J = ( + kernel_X / sqrt_npts + - (dKernel_X / sqrt_npts) + @ np.linalg.inv( + dKernel_dX + tikhonov_reg * shift * sqrt_npts * np.eye(dimension_derivative) + ) + @ dKernel_X.T + ) + + sigmas_2, vectors = eigs( + J @ kernel_X / sqrt_npts, k=rank + 5, M=(J + tikhonov_reg * np.eye(J.shape[0])) * shift + ) + + values, stable_values_idxs = stable_topk(sigmas_2, rank, ignore_warnings=False) + + V = vectors[:, stable_values_idxs] + # Normalization step + V = V @ np.diag(np.sqrt(sqrt_npts) / np.sqrt(np.diag(V.T @ kernel_X @ V))) + + sigma = np.diag(sigmas_2[stable_values_idxs]) + + make_U = np.block( + [ + np.eye(npts) / np.sqrt(shift), + -dKernel_X + @ np.linalg.inv( + dKernel_dX + shift * tikhonov_reg * sqrt_npts * np.eye(dimension_derivative) + ), + ] + ).T + + U = make_U @ (kernel_X @ V / sqrt_npts - shift * V @ sigma) / (tikhonov_reg * shift) + + evals, vl, vr = scipy.linalg.eig(V.T @ V @ sigma, left=True) + + evals = sanitize_complex_conjugates(evals) # To be coupled with the LOL function + lambdas = shift - 1 / evals + r_perm = np.argsort(-lambdas) + vr = vr[:, r_perm] + l_perm = np.argsort(-lambdas.conj()) + vl = vl[:, l_perm] + evals = evals[r_perm] + + vl /= np.sqrt(evals) + result: EigResult = {"values": lambdas[r_perm], "left": (V @ vl) * sqrt_npts, "right": U @ vr} + return result + + +def modes( + eig_result: EigResult, + initial_conditions: ndarray, # Feature matrix of the shape [num_initial_conditions, features] or kernel matrix of the shape [num_initial_conditions, num_training_points] + obs_train: ndarray, # Observable to be predicted evaluated on the trajectory data, shape [num_training_points, obs_features] +) -> ModeResult: + r"""Computes the decomposition of an observable in the eigenmode basis. + + Given the eigenfunctions obtained from a fit function, this function computes + the decomposition of a given observable in terms of these eigenmodes. The resulting + modes provide insight into the dominant temporal patterns governing the system. + + Args: + eig_result (EigResult): Eigen decomposition result containing eigenvalues and left/right eigenfunctions. + initial_conditions (np.ndarray): kernel matrix or block matrix of the form [[K_x,0][0,N]] when using the dirichlet generator + obs_train (np.ndarray): Observable evaluated on the training trajectory data, + + Shape: + - ``eig_result["left"]``: :math:`(N, r)`, where :math:`r` is the rank of the decomposition. + - ``eig_result["right"]``: :math:`(N, r)`, right eigenfunctions of the transition operator. + - ``eig_result["values"]``: :math:`(r,)`, complex eigenvalues of the transition operator. + - ``initial_conditions``: :math:`(N_{init}, N)`, kernel representation or :math:`(N_{init}, N*d)` if using the dirichlet estimator. + - ``obs_train``: :math:`(N, d_{obs})`, observable evaluated on training data. + + Returns: + ModeResult: A dictionary containing: + - ``decay_rates`` (:math:`(r,)`): Real part of the eigenvalues, representing decay rates. + - ``frequencies`` (:math:`(r,)`): Imaginary part of the eigenvalues, representing oscillation frequencies. + - ``modes`` (:math:`(r, N_{init}, d_{obs})`): Observable decomposed in the eigenmode basis. + """ + evals = eig_result["values"] + levecs = eig_result["left"] + npts = levecs.shape[ + 0 + ] # We use the eigenvector to be consistent with the dirichlet estimator that does not have the same shape #obs_train.shape[0] + if initial_conditions.ndim == 1: + initial_conditions = np.expand_dims(initial_conditions, axis=0) + conditioning = evaluate_eigenfunction( + eig_result, "right", initial_conditions + ).T # [rank, num_initial_conditions] + str = "abcdefgh" # Maximum number of feature dimensions is 8 + einsum_str = "".join([str[k] for k in range(obs_train.ndim - 1)]) # string for features + modes_ = np.einsum("nr,n" + einsum_str + "->r" + einsum_str, levecs.conj(), obs_train) / sqrt( + npts + ) # [rank, features] + modes_ = np.expand_dims(modes_, axis=1) + dims_to_add = modes_.ndim - conditioning.ndim + if dims_to_add > 0: + conditioning = np.expand_dims(conditioning, axis=tuple(range(-dims_to_add, 0))) + modes = conditioning * modes_ # [rank, num_init_cond, obs_features] + result: ModeResult = { + "decay_rates": -evals.real, + "frequencies": evals.imag / (2 * np.pi), + "modes": modes, + } + return result + + +def predict( + t: Union[float, ndarray], # time in the same units as dt + mode_result: ModeResult, +) -> ndarray: # shape [num_init_cond, features] or if num_t>1 [num_init_cond, num_time, features] + r"""Predicts the evolution of an observable using the mode decomposition. + + Given the modal decomposition of an observable, this function reconstructs + the time evolution by propagating the eigenmodes forward in time. + + Args: + t (Union[float, np.ndarray]): Time or array of time values in the same units as the spectral decomposition. + mode_result (ModeResult): Result of the mode decomposition, containing decay rates, frequencies, and modes. + + Shape: + - ``t``: :math:`(T,)` or scalar, where :math:`T` is the number of time steps. + - ``mode_result["decay_rates"]``: :math:`(r,)`, real decay rates of the modes. + - ``mode_result["frequencies"]``: :math:`(r,)`, imaginary frequencies of the modes. + - ``mode_result["modes"]``: :math:`(r, N_{init}, d_{obs})`, mode coefficients. + - ``predictions``: :math:`(N_{init}, T, d_{obs})`, predicted observable values at times :math:`t`. + If :math:`T=1` or :math:`N_{init}=1`, unnecessary dimensions are removed. + + Returns: + np.ndarray: Predicted observable values at the given time steps. + If multiple time points are given, the shape is :math:`(N_{init}, T, d_{obs})`. + If only one time step or one initial condition is provided, the output is squeezed accordingly. + """ + if type(t) == float: # noqa: E721 + t = np.array([t]) + evals = -mode_result["decay_rates"] + 2 * np.pi * 1j * mode_result["frequencies"] + to_evals = np.exp(evals[:, None] * t[None, :]) # [rank,time_steps] + str = "abcdefgh" # Maximum number of feature dimensions is 8 + einsum_str = "".join( + [str[k] for k in range(mode_result["modes"].ndim - 2)] + ) # string for features + predictions = np.einsum( + "rs,rm" + einsum_str + "->ms" + einsum_str, to_evals, mode_result["modes"] + ) + if ( + predictions.shape[0] == 1 or predictions.shape[1] == 1 + ): # If only one time point or one initial condition is requested, remove unnecessary dims + predictions = np.squeeze(predictions) + return predictions.real diff --git a/linear_operator_learning/kernel/dynamics/structs.py b/linear_operator_learning/kernel/dynamics/structs.py new file mode 100644 index 0000000..3c8b6e0 --- /dev/null +++ b/linear_operator_learning/kernel/dynamics/structs.py @@ -0,0 +1,14 @@ +"""Structs used by the `kernel dynamics` algorithms.""" + +from typing import TypedDict + +import numpy as np +from numpy import ndarray + + +class ModeResult(TypedDict): + """Return type for modes decompositions of dynamics kernel regressors.""" + + decay_rates: ndarray + frequencies: ndarray + modes: ndarray diff --git a/linear_operator_learning/kernel/kernels.py b/linear_operator_learning/kernel/kernels.py new file mode 100644 index 0000000..07585d8 --- /dev/null +++ b/linear_operator_learning/kernel/kernels.py @@ -0,0 +1,82 @@ +# linear_operator_learning/kernel/dynamics/kernels.py # noqa: D100 + +import numpy as np +from sklearn.gaussian_process.kernels import RBF # type: ignore + + +class RBF_with_grad(RBF): # noqa: D101 + def __init___(self, length_scale=1.0, length_scale_bounds=(1e-5, 1e5)): # noqa: D105 + # Init code here: + super().__init__(length_scale, length_scale_bounds) + + def grad(self, kernel_X, X, friction): + r"""Returns the matrix :math:`N_{i,(k-1)n+j}= \langle \phi(x_i),d_k\phi(x_j) \rangle` (matrix :math:`N` :footcite:t:`kostic2024learning` + where :math:`i = 1,\dots n, k=1, \dots d, j=1,\dots n` + and :math:`N` is the number of training points and :math:`d` is the dimensionality of the system. + + Args: + kernel_X (np.ndarray): kernel matrix of the training data + X (np.ndarray): training data + friction (np.ndarray): friction parameter of the physical model :math:`s(x)` in :footcite:t:`kostic2024learning` + Shape: + ``kernel_X``: :math:`(N, N)`, where :math:`N` is the number of training data. + + ``X``: :math:`(N,d)` where :math:`N` is the number of training data and :math:`d` the dimensionality of the system. + + ``friction``: :math:`d` where :math:`d` is the dimensionality of the system. + + Output: :math:`\langle \phi(x_i),d_k\phi(x_j) \rangle` of shape `N,Nd`, where :math:`N` is the number of training data and :math:`d` the dimension of the system. + """ # noqa: D205 + difference = X[:, np.newaxis, :] - X[np.newaxis, :, :] + n = difference.shape[0] + d = X.shape[1] + N = np.zeros((n, n * d)) + sigma = self.length_scale + for i in range(n): + for j in range(n): + for k in range(0, d): + N[i, k * n + j] = ( + np.sqrt(friction[k]) * difference[i, j, k] * kernel_X[i, j] / sigma**2 + ) + return N + + def grad2(self, kernel_X: np.ndarray, X: np.ndarray, friction: np.ndarray): + r"""Returns the matrix :math:`M_{(k-1)n + i,(l-1)n+j}= \langle d_k\phi(x_i),d_l\phi(x_j) \rangle` (matrix :math:`M` in :footcite:t:`kostic2024learning`) + where :math:`i = 1,\dots n, k=1, \dots d, j=1,\dots n, l=1, \dots d` + and :math:`N` is the number of training points and :math:`d` is the dimensionality of the system. + + Args: + kernel_X (np.ndarray): kernel matrix of the training data + X (np.ndarray): training data + friction (np.ndarray): friction parameter of the physical model :math:`s(x)` in :footcite:t:`kostic2024learning` + Shape: + ``kernel_X``: :math:`(N, N)`, where :math:`N` is the number of training data. + ``X``: :math:`(N,d)` where :math:`N` is the number of training data and `d` the dimensionality of the system. + ``friction``: :math:`d` where :math:`d` is the dimensionality of the system + Returns: :math:`M_{(k-1)n + i,(l-1)n+j}= \langle d_k\phi(x_i),d_l\phi(x_j) \rangle` of shape `Nd,Nd`, where :math:`N` is the number of training data and :math:`d` the dimension of the system. + """ # noqa: D205 + difference = X[:, np.newaxis, :] - X[np.newaxis, :, :] + + d = difference.shape[2] + n = difference.shape[0] + M = np.zeros((n * d, n * d)) + sigma = self.length_scale + for i in range(n): + for j in range(n): + for k in range(0, d): + for m in range(0, d): + if m == k: + M[(k) * n + i, (m) * n + j] = ( + friction[k] + * (1 / sigma**2 - difference[i, j, k] ** 2 / sigma**4) + * kernel_X[i, j] + ) + else: + M[(k) * n + i, (m) * n + j] = ( + np.sqrt(friction[k]) + * np.sqrt(friction[m]) + * (-difference[i, j, k] * difference[i, j, m] / sigma**4) + * kernel_X[i, j] + ) + + return M diff --git a/linear_operator_learning/kernel/regressors.py b/linear_operator_learning/kernel/regressors.py index cd3fc26..5c710d1 100644 --- a/linear_operator_learning/kernel/regressors.py +++ b/linear_operator_learning/kernel/regressors.py @@ -104,7 +104,9 @@ def evaluate_eigenfunction( Output: :math:`(N_0, R)` """ vr_or_vl = eig_result[which] - rsqrt_dim = (K_Xin_X_or_Y.shape[1]) ** (-0.5) + rsqrt_dim = ( + 1 / np.sqrt(eig_result["left"].shape[0]) + ) # To be consistent with the Dirichlet estimator, we get the dimension of the training set by the left eigenvector #(K_Xin_X_or_Y.shape[1]) ** (-0.5) return np.linalg.multi_dot([rsqrt_dim * K_Xin_X_or_Y, vr_or_vl]) diff --git a/linear_operator_learning/kernel/utils.py b/linear_operator_learning/kernel/utils.py index 2604aac..fe47189 100644 --- a/linear_operator_learning/kernel/utils.py +++ b/linear_operator_learning/kernel/utils.py @@ -6,6 +6,11 @@ from numpy import ndarray from scipy.spatial.distance import pdist +__all__ = [ + "return_phi_dphi", + "return_dphi_dphi", +] + def topk(vec: ndarray, k: int): """Get the top k values from a Numpy array. @@ -57,3 +62,84 @@ def _row_col_from_condensed_index(d, index): i = (-b - sqrt(b**2 - 8 * index)) // 2 j = index + i * (b + i + 2) // 2 + 1 return (int(i), int(j)) + + +def return_phi_dphi( + kernel_X: np.ndarray, + X: np.ndarray, + sigma: float, + friction: np.ndarray, +): + r"""Returns the matrix :math:`N_{i,(k-1)n+j}=<\phi(x_i),d_k\phi(x_j)>` (matrix :math:`N` :footcite:t:`kostic2024learning`) only for + a GAUSSIAN kernel + where :math:`i = 1,\dots n, k=1, \dots d, j=1,\dots n` + and :math:`N` is the number of training points and :math:`d` is the dimensionality of the system. + + Args: + kernel_X (np.ndarray): kernel matrix of the training data + X (np.ndarray): training data + sigma (float): length scale of the GAUSSIAN kernel + friction (np.ndarray): friction parameter of the physical model :math:`s(x)` in :footcite:t:`kostic2024learning` + Shape: + ``kernel_X``: :math:`(N, N)`, where :math:`N` is the number of training data. + + ``X``: :math:`(N,d)` where :math:`N` is the number of training data and :math:`d` the dimensionality of the system. + + ``friction``: :math:`d` where :math:`d` is the dimensionality of the system. + + Output: :math:`<\phi(x_i),d_k\phi(x_j)>` of shape `N,Nd`, where :math:`N` is the number of training data and :math:`d` the dimension of the system. + """ # noqa: D205 + difference = X[:, np.newaxis, :] - X[np.newaxis, :, :] + n = difference.shape[0] + d = X.shape[1] + N = np.zeros((n, n * d)) + for i in range(n): + for j in range(n): + for k in range(0, d): + N[i, k * n + j] = ( + np.sqrt(friction[k]) * difference[i, j, k] * kernel_X[i, j] / sigma**2 + ) + return N + + +def return_dphi_dphi(kernel_X: np.ndarray, X: np.ndarray, sigma: float, friction: np.ndarray): + r"""Returns the matrix :math:`M_{(k-1)n + i,(l-1)n+j}=` (matrix :math:`M` in :footcite:t:`kostic2024learning`) only for + a GAUSSIAN kernel + where :math:`i = 1,\dots n, k=1, \dots d, j=1,\dots n, l=1, \dots d` + and :math:`N` is the number of training points and :math:`d` is the dimensionality of the system. + + Args: + kernel_X (np.ndarray): kernel matrix of the training data + X (np.ndarray): training data + sigma (float): length scale of the GAUSSIAN kernel + friction (np.ndarray): friction parameter of the physical model :math:`s(x)` in :footcite:t:`kostic2024learning` + Shape: + ``kernel_X``: :math:`(N, N)`, where :math:`N` is the number of training data. + ``X``: :math:`(N,d)` where :math:`N` is the number of training data and `d` the dimensionality of the system. + ``friction``: :math:`d` where :math:`d` is the dimensionality of the system + Returns: :math:`M_{(k-1)n + i,(l-1)n+j}=` of shape `N,Nd`, where :math:`N` is the number of training data and :math:`d` the dimension of the system. + """ # noqa: D205 + difference = X[:, np.newaxis, :] - X[np.newaxis, :, :] + + d = difference.shape[2] + n = difference.shape[0] + M = np.zeros((n * d, n * d)) + for i in range(n): + for j in range(n): + for k in range(0, d): + for m in range(0, d): + if m == k: + M[(k) * n + i, (m) * n + j] = ( + friction[k] + * (1 / sigma**2 - difference[i, j, k] ** 2 / sigma**4) + * kernel_X[i, j] + ) + else: + M[(k) * n + i, (m) * n + j] = ( + np.sqrt(friction[k]) + * np.sqrt(friction[m]) + * (-difference[i, j, k] * difference[i, j, m] / sigma**4) + * kernel_X[i, j] + ) + + return M