From a270398b1b89021b78fcbfd51adab1e25de3d9a1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Devergne=20Timoth=C3=A9e?= <timothee.devergne@iit.it>
Date: Wed, 12 Feb 2025 18:23:16 +0100
Subject: [PATCH 1/7] added the functions for the physics informed gen

---
 linear_operator_learning/kernel/linalg.py     |  57 +++++++++
 linear_operator_learning/kernel/regressors.py | 109 ++++++++++++++++++
 linear_operator_learning/kernel/utils.py      |  76 ++++++++++++
 3 files changed, 242 insertions(+)

diff --git a/linear_operator_learning/kernel/linalg.py b/linear_operator_learning/kernel/linalg.py
index 8af351a..45444bb 100644
--- a/linear_operator_learning/kernel/linalg.py
+++ b/linear_operator_learning/kernel/linalg.py
@@ -64,6 +64,38 @@ def eig(
     return result
 
 
+def eig_physics_informed(
+    fit_result: FitResult,
+    shift: float,
+) -> EigResult:
+    """Computes the eigendecomposition of the infinitesimal generator operator.
+
+    Args:
+        fit_result (FitResult): Fit result as defined in ``operator_learning.structs``.
+        shift (float): Shift parameter of the resolvent.
+
+    Returns:
+        EigResult: as defined in ``operator_learning.structs``
+
+    """
+    U = fit_result["U"]
+    V = fit_result["V"]
+    sigma = fit_result["svals"]
+    evals, vl, vr = scipy.linalg.eig(V.T @ V @ np.diag(sigma**2), left=True)
+
+    evals = sanitize_complex_conjugates(evals)
+    lambdas = shift - 1 / evals
+    r_perm = np.argsort(-lambdas)
+    vr = vr[:, r_perm]
+    l_perm = np.argsort(-lambdas.conj())
+    vl = vl[:, l_perm]
+    evals = evals[r_perm]
+
+    vl /= np.sqrt(evals)
+    result: EigResult = {"values": lambdas[r_perm], "left": V @ vl, "right": U @ vr}
+    return result
+
+
 def evaluate_eigenfunction(
     eig_result: EigResult,
     which: Literal["left", "right"],
@@ -92,6 +124,31 @@ def evaluate_eigenfunction(
     return np.linalg.multi_dot([rsqrt_dim * K_Xin_X_or_Y, vr_or_vl])
 
 
+def evaluate_right_eigenfunction_physics_informed(
+    kernel_X: np.ndarray, dKernel_X: np.ndarray, eig_result: np.ndarray, shift: float
+):
+    r"""Fits the physics informed Reduced Rank Estimator.
+
+    Args:
+        kernel_X (np.ndarray): kernel matrix of the training data
+        dKernel_X (np.ndarray): derivative of the kernel: dK_X_{i,j} = <\phi(x_i),d\phi(x_j)> (matrix N in the paper)
+        eig_result: EigResult object containing eigendecomposition results
+        shift (float): shift parameter of the resolvent
+
+    Shape:
+        ``kernel_X``: :math:`(N, N)`, where :math:`N` is the number of training data.
+        ``dkernel_X``: :math:`(N, (d+1)N)`. where :math:`N` is the number of training data amd :math: `d` is the dimensionality of the input data.
+
+    Returns:
+        Right eigenfunctions of the generator of shape :math:  `(N, r)` where :math: `r` is the rank of the estimator
+
+    """
+    npts = kernel_X.shape[0]
+    sqrt_npts = np.sqrt(npts)
+    u = eig_result["right"]
+    return (np.sqrt(shift) * kernel_X @ u[:npts, :] + dKernel_X @ u[npts:, :]) / sqrt_npts
+
+
 def add_diagonal_(M: ndarray, alpha: float):
     """Add alpha to the diagonal of M inplace.
 
diff --git a/linear_operator_learning/kernel/regressors.py b/linear_operator_learning/kernel/regressors.py
index 548d77c..5ee838a 100644
--- a/linear_operator_learning/kernel/regressors.py
+++ b/linear_operator_learning/kernel/regressors.py
@@ -59,6 +59,47 @@ def predict(
     return np.linalg.multi_dot([K_dot_U, M, V_dot_obs])
 
 
+def predict_physics_informed(
+    eig_result: FitResult,
+    kernel_obs: np.ndarray,
+    dKernel_obs: np.ndarray,
+    obs_train_X: np.ndarray,
+    shift: float,
+    time: float,
+) -> np.ndarray:
+    r"""Predicts future states using kernel matrices and fitted results.
+
+    Args:
+        eig_result: EigResult object containing reduced rank regression results
+        kernel_obs (np.ndarray): kernel matrix of the initial conditions and the training set
+        dKernel_obs (np.ndarray): derivative of the kernel between the initial condition and the training set: dK_X_{i,j} = <\phi(x_i),d\phi(x_j)> (matrix N in the paper)
+        obs_train_X (ndarray): Observable evaluated on output training data (or inducing points for Nystroem)
+        shift (float): shift parameter of the resolvent
+        time (float): time at which we want to estimate the prediction
+
+    Shape:
+        ``kernel_obs``: :math:`(N, N)`, where :math:`N` is the number of training data.
+
+        ``dkernel_obs``: :math:`(N, (d+1)N)`. where :math:`N` is the number of training data amd :math: `d` is the dimensionality of the input data.
+
+        ``obs_train_X``: :math:`(N, *)`, where :math:`*` is the shape of the observable.
+
+        Output: :math:`(N, *)`.
+    """
+    ul = eig_result["left"]
+    ur = eig_result["right"]
+    evs = eig_result["values"]
+    npts = kernel_obs.shape[0]
+    h = (np.sqrt(shift) * (kernel_obs @ ur[:npts, :]) + (dKernel_obs @ ur[npts:, :])) / np.sqrt(
+        npts
+    )
+    g = np.sqrt(shift) * obs_train_X @ ul
+
+    pred = (np.exp(evs * time)[np.newaxis, :] * (g[np.newaxis, :] * h)).sum(axis=-1)
+
+    return pred
+
+
 def pcr(
     kernel_X: ndarray,
     tikhonov_reg: float = 0.0,
@@ -391,3 +432,71 @@ def rand_reduced_rank(
     svals = np.sqrt(values)
     result: FitResult = {"U": U, "V": V, "svals": svals}
     return result
+
+
+def physics_informed_reduced_rank_regression(
+    kernel_X: np.ndarray,  # kernel matrix of the training data
+    dKernel_X: np.ndarray,  # derivative of the kernel: dK_X_{i,j} = <\phi(x_i),d\phi(x_j)> (matrix N in the paper)
+    dKernel_dX: np.ndarray,  # derivative of the kernel dK_dX_{i,j} = <d\phi(x_i),d\phi(x_j)>
+    shift: float,  # shift parameter of the resolvent
+    tikhonov_reg: float,  # Tikhonov (ridge) regularization parameter, can be 0,
+    rank: int,
+):  # Rank of the estimator)
+    r"""Fits the physics informed Reduced Rank Estimator.
+
+    Args:
+        kernel_X (np.ndarray): kernel matrix of the training data
+        dKernel_X (np.ndarray): derivative of the kernel: dK_X_{i,j} = <\phi(x_i),d\phi(x_j)> (matrix N in the paper)
+        dKernel_dX (np.ndarray):  derivative of the kernel dK_dX_{i,j} = <d\phi(x_i),d\phi(x_j)> (matrix M in the paper)
+        shift (float): shift parameter of the resolvent
+        tikhonov_reg (float): Tikhonov (ridge) regularization parameter
+        rank (int): Rank of the estimator
+
+    Shape:
+        ``kernel_X``: :math:`(N, N)`, where :math:`N` is the number of training data.
+
+        ``dkernel_X``: :math:`(N, (d+1)N)`. where :math:`N` is the number of training data amd :math: `d` is the dimensionality of the input data.
+        ``dkernel_dX``: :math:`((d+1)N, (d+1)N)`. where :math:`N` is the number of training data amd :math: `d` is the dimensionality of the input data.
+    Returns: FitResult structure containing `U` and `V` matrices
+    """
+    npts = kernel_X.shape[0]
+    sqrt_npts = np.sqrt(npts)
+
+    dimension_derivative = dKernel_dX.shape[0]
+
+    # We follow the notation of the paper
+    J = (
+        kernel_X / sqrt_npts
+        - (dKernel_X / sqrt_npts)
+        @ np.linalg.inv(
+            dKernel_dX + tikhonov_reg * shift * sqrt_npts * np.eye(dimension_derivative)
+        )
+        @ dKernel_X.T
+    )
+
+    sigmas_2, vectors = eigs(
+        J @ kernel_X / sqrt_npts, k=rank + 5, M=(J + tikhonov_reg * np.eye(J.shape[0])) * shift
+    )
+
+    values, stable_values_idxs = stable_topk(sigmas_2, rank, ignore_warnings=False)
+
+    V = vectors[:, stable_values_idxs]
+    # Normalization step
+    V = V @ np.diag(np.sqrt(sqrt_npts) / np.sqrt(np.diag(V.T @ kernel_X @ V)))
+
+    sigma = np.diag(sigmas_2[stable_values_idxs])
+
+    make_U = np.block(
+        [
+            np.eye(npts) / np.sqrt(shift),
+            -dKernel_X
+            @ np.linalg.inv(
+                dKernel_dX + shift * tikhonov_reg * sqrt_npts * np.eye(dimension_derivative)
+            ),
+        ]
+    ).T
+
+    U = make_U @ (kernel_X @ V / sqrt_npts - shift * V @ sigma) / (tikhonov_reg * shift)
+    result: FitResult = {"U": U.real, "V": V.real, "svals": np.sqrt(sigmas_2[stable_values_idxs])}
+
+    return result
diff --git a/linear_operator_learning/kernel/utils.py b/linear_operator_learning/kernel/utils.py
index ca77b48..8bd9a8c 100644
--- a/linear_operator_learning/kernel/utils.py
+++ b/linear_operator_learning/kernel/utils.py
@@ -57,3 +57,79 @@ def _row_col_from_condensed_index(d, index):
     i = (-b - sqrt(b**2 - 8 * index)) // 2
     j = index + i * (b + i + 2) // 2 + 1
     return (int(i), int(j))
+
+
+def return_phi_dphi(
+    kernel_X: np.ndarray,
+    X: np.ndarray,
+    sigma: float,
+    friction: np.ndarray,
+):
+    r"""Returns the matrix :math: `N_{i,(k-1)n+j}=<\phi(x_i),d_k\phi(x_j)>` (matrix :math:`N` in the paper) only for
+    a GAUSSIAN kernel
+    where :math:`i = 1,\dots n, k=1, \dots d, j=1,\dots n`
+    and :math:`N` is the number of training points and :math:`d` is the dimensionality of the system
+    Args:
+        kernel_X (np.ndarray): kernel matrix of the training data
+        X (np.ndarray): training data
+        sigma (float): length scale of the GAUSSIAN kernel
+        friction (np.ndarray): friction parameter of the physical model :math:`s(x)` in the paper
+    Shape:
+        ``kernel_X`` `: :math:`(N, N)`, where :math:`N` is the number of training data.
+        ``X``: :math: `(N,d)`  where :math:`N` is the number of training data and `d` the dimensionality of the system.
+        ``friction``: :math: `d`  where :math:`d` is the dimensionality of the system.
+    Output: :math:`<\phi(x_i),d_k\phi(x_j)>` of shape `N,Nd`, where :math:`N` is the number of training data and :math: `d` the dimension of the system.
+    """  # noqa: D205
+    difference = X[:, np.newaxis, :] - X[np.newaxis, :, :]
+    n = difference.shape[0]
+    d = X.shape[1]
+    N = np.zeros((n, n * d))
+    for i in range(n):
+        for j in range(n):
+            for k in range(0, d):
+                N[i, k * n + j] = (
+                    np.sqrt(friction[k]) * difference[i, j, k] * kernel_X[i, j] / sigma**2
+                )
+    return N
+
+
+def return_dphi_dphi(kernel_X: np.ndarray, X: np.ndarray, sigma: float, friction: np.ndarray):
+    r"""Returns the matrix :math: `M_{(k-1)n + i,(l-1)n+j}=<d_k\phi(x_i),d_l\phi(x_j)>` (matrix :math:`n` in the paper) only for
+    a GAUSSIAN kernel
+    where :math:`i = 1,\dots n, k=1, \dots d, j=1,\dots n, l=1, \dots d`
+    and :math:`N` is the number of training points and :math:`d` is the dimensionality of the system
+    Args:
+        kernel_X (np.ndarray): kernel matrix of the training data
+        X (np.ndarray): training data
+        sigma (float): length scale of the GAUSSIAN kernel
+        friction (np.ndarray): friction parameter of the physical model :math:`s(x)` in the paper
+    Shape:
+        ``kernel_X`` `: :math:`(N, N)`, where :math:`N` is the number of training data.
+        ``X``: :math: `(N,d)`  where :math:`N` is the number of training data and `d` the dimensionality of the system.
+        ``friction``: :math: `d`  where :math:`d` is the dimensionality of the system
+    Returns: :math:`M_{(k-1)n + i,(l-1)n+j}=<d_k\phi(x_i),d_l\phi(x_j)>` of shape `N,Nd`, where :math:`N` is the number of training data and :math: `d` the dimension of the system.
+    """  # noqa: D205
+    difference = X[:, np.newaxis, :] - X[np.newaxis, :, :]
+
+    d = difference.shape[2]
+    n = difference.shape[0]
+    M = np.zeros((n * d, n * d))
+    for i in range(n):
+        for j in range(n):
+            for k in range(0, d):
+                for m in range(0, d):
+                    if m == k:
+                        M[(k) * n + i, (m) * n + j] = (
+                            friction[k]
+                            * (1 / sigma**2 - difference[i, j, k] ** 2 / sigma**4)
+                            * kernel_X[i, j]
+                        )
+                    else:
+                        M[(k) * n + i, (m) * n + j] = (
+                            np.sqrt(friction[k])
+                            * np.sqrt(friction[m])
+                            * (-difference[i, j, k] * difference[i, j, m] / sigma**4)
+                            * kernel_X[i, j]
+                        )
+
+    return M

From d09b6fa8c5b54b83c24098997ac4b6f40445fe02 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Devergne=20Timoth=C3=A9e?= <timothee.devergne@iit.it>
Date: Mon, 17 Feb 2025 18:47:02 +0100
Subject: [PATCH 2/7] Added docs

---
 docs/reference/kernel.rst                     | 19 +++++++++++
 linear_operator_learning/kernel/__init__.py   |  1 +
 linear_operator_learning/kernel/linalg.py     | 10 ++++--
 linear_operator_learning/kernel/regressors.py | 15 ++++----
 linear_operator_learning/kernel/utils.py      | 34 ++++++++++++-------
 5 files changed, 59 insertions(+), 20 deletions(-)

diff --git a/docs/reference/kernel.rst b/docs/reference/kernel.rst
index a4e6368..f40ea5a 100644
--- a/docs/reference/kernel.rst
+++ b/docs/reference/kernel.rst
@@ -20,10 +20,16 @@ Inference
 
 .. autofunction:: linear_operator_learning.kernel.predict
 
+.. autofunction:: linear_operator_learning.kernel.predict_physics_informed
+
 .. autofunction:: linear_operator_learning.kernel.eig
 
+.. autofunction:: linear_operator_learning.kernel.eig_physics_informed
+
 .. autofunction:: linear_operator_learning.kernel.evaluate_eigenfunction
 
+.. autofunction:: linear_operator_learning.kernle.evaluate_right_eigenfunction_physics_informed
+
 .. _rrr:
 Reduced Rank
 ~~~~~~~~~~~~
@@ -33,6 +39,8 @@ Reduced Rank
 
 .. autofunction:: linear_operator_learning.kernel.rand_reduced_rank
 
+.. autofunction:: linear_operator_learning.kernel.physics_informed_reduced_rank_regression
+
 .. _pcr:
 Principal Component Regression
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -60,4 +68,15 @@ Linear Algebra Utilities
 
 .. autofunction:: linear_operator_learning.kernel.linalg.add_diagonal_
 
+
+.. _kernel_derivatives:
+Kernel Derivatives
+~~~~~~~~~~~~~~~~~~~~~~~~
+.. autofunction:: linear_operator_learning.kernel.return_phi_dphi
+
+.. autofunction:: linear_operator_learning.kernel.return_dphi_dphi
+
+Bibliography
+~~~~~~~~~~~~~~~~~~~~~~~~
+
 .. footbibliography::
\ No newline at end of file
diff --git a/linear_operator_learning/kernel/__init__.py b/linear_operator_learning/kernel/__init__.py
index 5918b1f..6d55b08 100644
--- a/linear_operator_learning/kernel/__init__.py
+++ b/linear_operator_learning/kernel/__init__.py
@@ -2,3 +2,4 @@
 
 from linear_operator_learning.kernel.linalg import *  # noqa
 from linear_operator_learning.kernel.regressors import *  # noqa
+from linear_operator_learning.kernel.utils import *  # noqa
diff --git a/linear_operator_learning/kernel/linalg.py b/linear_operator_learning/kernel/linalg.py
index 45444bb..fceeb8d 100644
--- a/linear_operator_learning/kernel/linalg.py
+++ b/linear_operator_learning/kernel/linalg.py
@@ -10,7 +10,12 @@
 from linear_operator_learning.kernel.structs import EigResult, FitResult
 from linear_operator_learning.kernel.utils import sanitize_complex_conjugates, topk
 
-__all__ = ["eig", "evaluate_eigenfunction"]
+__all__ = [
+    "eig",
+    "evaluate_eigenfunction",
+    "eig_physics_informed",
+    "evaluate_right_eigenfunction_physics_informed",
+]
 
 
 def eig(
@@ -131,12 +136,13 @@ def evaluate_right_eigenfunction_physics_informed(
 
     Args:
         kernel_X (np.ndarray): kernel matrix of the training data
-        dKernel_X (np.ndarray): derivative of the kernel: dK_X_{i,j} = <\phi(x_i),d\phi(x_j)> (matrix N in the paper)
+        dKernel_X (np.ndarray): (matrix N in the paper) derivative of the kernel: N{i,(k-1)n+j} = <\phi(x_i),d_k\phi(x_j)>
         eig_result: EigResult object containing eigendecomposition results
         shift (float): shift parameter of the resolvent
 
     Shape:
         ``kernel_X``: :math:`(N, N)`, where :math:`N` is the number of training data.
+
         ``dkernel_X``: :math:`(N, (d+1)N)`. where :math:`N` is the number of training data amd :math: `d` is the dimensionality of the input data.
 
     Returns:
diff --git a/linear_operator_learning/kernel/regressors.py b/linear_operator_learning/kernel/regressors.py
index 5ee838a..9f99a69 100644
--- a/linear_operator_learning/kernel/regressors.py
+++ b/linear_operator_learning/kernel/regressors.py
@@ -14,11 +14,13 @@
 
 __all__ = [
     "predict",
+    "predict_physics_informed",
     "pcr",
     "nystroem_pcr",
     "reduced_rank",
     "nystroem_reduced_rank",
     "rand_reduced_rank",
+    "physics_informed_reduced_rank_regression",
 ]
 
 
@@ -72,7 +74,7 @@ def predict_physics_informed(
     Args:
         eig_result: EigResult object containing reduced rank regression results
         kernel_obs (np.ndarray): kernel matrix of the initial conditions and the training set
-        dKernel_obs (np.ndarray): derivative of the kernel between the initial condition and the training set: dK_X_{i,j} = <\phi(x_i),d\phi(x_j)> (matrix N in the paper)
+        dKernel_obs (np.ndarray): (matrix N in the paper) derivative of the kernel between the initial condition and the training set: :math:`N_{i,(k-1)n+j} = \langle \phi(x_i),d_k\phi(x_j) \rangle`
         obs_train_X (ndarray): Observable evaluated on output training data (or inducing points for Nystroem)
         shift (float): shift parameter of the resolvent
         time (float): time at which we want to estimate the prediction
@@ -80,7 +82,7 @@ def predict_physics_informed(
     Shape:
         ``kernel_obs``: :math:`(N, N)`, where :math:`N` is the number of training data.
 
-        ``dkernel_obs``: :math:`(N, (d+1)N)`. where :math:`N` is the number of training data amd :math: `d` is the dimensionality of the input data.
+        ``dkernel_obs``: :math:`(N, (d+1)N)`. where :math:`N` is the number of training data amd :math:`d` is the dimensionality of the input data.
 
         ``obs_train_X``: :math:`(N, *)`, where :math:`*` is the shape of the observable.
 
@@ -446,8 +448,8 @@ def physics_informed_reduced_rank_regression(
 
     Args:
         kernel_X (np.ndarray): kernel matrix of the training data
-        dKernel_X (np.ndarray): derivative of the kernel: dK_X_{i,j} = <\phi(x_i),d\phi(x_j)> (matrix N in the paper)
-        dKernel_dX (np.ndarray):  derivative of the kernel dK_dX_{i,j} = <d\phi(x_i),d\phi(x_j)> (matrix M in the paper)
+        dKernel_X (np.ndarray): (matrix N in the paper) derivative of the kernel: :math:`N_{i,(k-1)n+j} = \langle \phi(x_i),d_k\phi(x_j) \rangle`
+        dKernel_dX (np.ndarray):  (matrix M in the paper) derivative of the kernel :math:`M_{(l-1)n+i,(k-1)n+j} = \langle d_l\phi(x_i),d_k\phi(x_j) \rangle`
         shift (float): shift parameter of the resolvent
         tikhonov_reg (float): Tikhonov (ridge) regularization parameter
         rank (int): Rank of the estimator
@@ -455,8 +457,9 @@ def physics_informed_reduced_rank_regression(
     Shape:
         ``kernel_X``: :math:`(N, N)`, where :math:`N` is the number of training data.
 
-        ``dkernel_X``: :math:`(N, (d+1)N)`. where :math:`N` is the number of training data amd :math: `d` is the dimensionality of the input data.
-        ``dkernel_dX``: :math:`((d+1)N, (d+1)N)`. where :math:`N` is the number of training data amd :math: `d` is the dimensionality of the input data.
+        ``dkernel_X``: :math:`(N, (d+1)N)`. where :math:`N` is the number of training data amd :math:`d` is the dimensionality of the input data.
+
+        ``dkernel_dX``: :math:`((d+1)N, (d+1)N)`. where :math:`N` is the number of training data amd :math:`d` is the dimensionality of the input data.
     Returns: FitResult structure containing `U` and `V` matrices
     """
     npts = kernel_X.shape[0]
diff --git a/linear_operator_learning/kernel/utils.py b/linear_operator_learning/kernel/utils.py
index 8bd9a8c..f159edd 100644
--- a/linear_operator_learning/kernel/utils.py
+++ b/linear_operator_learning/kernel/utils.py
@@ -6,6 +6,11 @@
 from numpy import ndarray
 from scipy.spatial.distance import pdist
 
+__all__ = [
+    "return_phi_dphi",
+    "return_dphi_dphi",
+]
+
 
 def topk(vec: ndarray, k: int):
     """Get the top k values from a Numpy array.
@@ -65,20 +70,24 @@ def return_phi_dphi(
     sigma: float,
     friction: np.ndarray,
 ):
-    r"""Returns the matrix :math: `N_{i,(k-1)n+j}=<\phi(x_i),d_k\phi(x_j)>` (matrix :math:`N` in the paper) only for
+    r"""Returns the matrix :math:`N_{i,(k-1)n+j}=<\phi(x_i),d_k\phi(x_j)>` (matrix :math:`N` in the paper) only for
     a GAUSSIAN kernel
     where :math:`i = 1,\dots n, k=1, \dots d, j=1,\dots n`
-    and :math:`N` is the number of training points and :math:`d` is the dimensionality of the system
+    and :math:`N` is the number of training points and :math:`d` is the dimensionality of the system.
+
     Args:
         kernel_X (np.ndarray): kernel matrix of the training data
         X (np.ndarray): training data
         sigma (float): length scale of the GAUSSIAN kernel
         friction (np.ndarray): friction parameter of the physical model :math:`s(x)` in the paper
     Shape:
-        ``kernel_X`` `: :math:`(N, N)`, where :math:`N` is the number of training data.
-        ``X``: :math: `(N,d)`  where :math:`N` is the number of training data and `d` the dimensionality of the system.
-        ``friction``: :math: `d`  where :math:`d` is the dimensionality of the system.
-    Output: :math:`<\phi(x_i),d_k\phi(x_j)>` of shape `N,Nd`, where :math:`N` is the number of training data and :math: `d` the dimension of the system.
+        ``kernel_X``: :math:`(N, N)`, where :math:`N` is the number of training data.
+
+        ``X``: :math:`(N,d)`  where :math:`N` is the number of training data and :math:`d` the dimensionality of the system.
+
+        ``friction``: :math:`d`  where :math:`d` is the dimensionality of the system.
+
+    Output: :math:`<\phi(x_i),d_k\phi(x_j)>` of shape `N,Nd`, where :math:`N` is the number of training data and :math:`d` the dimension of the system.
     """  # noqa: D205
     difference = X[:, np.newaxis, :] - X[np.newaxis, :, :]
     n = difference.shape[0]
@@ -94,20 +103,21 @@ def return_phi_dphi(
 
 
 def return_dphi_dphi(kernel_X: np.ndarray, X: np.ndarray, sigma: float, friction: np.ndarray):
-    r"""Returns the matrix :math: `M_{(k-1)n + i,(l-1)n+j}=<d_k\phi(x_i),d_l\phi(x_j)>` (matrix :math:`n` in the paper) only for
+    r"""Returns the matrix :math:`M_{(k-1)n + i,(l-1)n+j}=<d_k\phi(x_i),d_l\phi(x_j)>` (matrix :math:`M` in the paper) only for
     a GAUSSIAN kernel
     where :math:`i = 1,\dots n, k=1, \dots d, j=1,\dots n, l=1, \dots d`
-    and :math:`N` is the number of training points and :math:`d` is the dimensionality of the system
+    and :math:`N` is the number of training points and :math:`d` is the dimensionality of the system.
+
     Args:
         kernel_X (np.ndarray): kernel matrix of the training data
         X (np.ndarray): training data
         sigma (float): length scale of the GAUSSIAN kernel
         friction (np.ndarray): friction parameter of the physical model :math:`s(x)` in the paper
     Shape:
-        ``kernel_X`` `: :math:`(N, N)`, where :math:`N` is the number of training data.
-        ``X``: :math: `(N,d)`  where :math:`N` is the number of training data and `d` the dimensionality of the system.
-        ``friction``: :math: `d`  where :math:`d` is the dimensionality of the system
-    Returns: :math:`M_{(k-1)n + i,(l-1)n+j}=<d_k\phi(x_i),d_l\phi(x_j)>` of shape `N,Nd`, where :math:`N` is the number of training data and :math: `d` the dimension of the system.
+        ``kernel_X``: :math:`(N, N)`, where :math:`N` is the number of training data.
+        ``X``: :math:`(N,d)`  where :math:`N` is the number of training data and `d` the dimensionality of the system.
+        ``friction``: :math:`d`  where :math:`d` is the dimensionality of the system
+    Returns: :math:`M_{(k-1)n + i,(l-1)n+j}=<d_k\phi(x_i),d_l\phi(x_j)>` of shape `N,Nd`, where :math:`N` is the number of training data and :math:`d` the dimension of the system.
     """  # noqa: D205
     difference = X[:, np.newaxis, :] - X[np.newaxis, :, :]
 

From 5d7e7324caaf540282649db18a4cefaa5c89564b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Devergne=20Timoth=C3=A9e?= <timothee.devergne@iit.it>
Date: Tue, 18 Feb 2025 12:36:47 +0100
Subject: [PATCH 3/7] Updated docs

---
 docs/bibliography.bib                         |  7 +++++++
 linear_operator_learning/kernel/linalg.py     |  6 +++---
 linear_operator_learning/kernel/regressors.py | 10 +++++-----
 linear_operator_learning/kernel/utils.py      |  8 ++++----
 4 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/docs/bibliography.bib b/docs/bibliography.bib
index 6606dc4..ad455ee 100644
--- a/docs/bibliography.bib
+++ b/docs/bibliography.bib
@@ -178,4 +178,11 @@ @misc{Turri2023
   publisher = {arXiv},
   year = {2023},
   copyright = {arXiv.org perpetual,  non-exclusive license}
+}
+@inproceedings{kostic2024learning,
+title={Learning the Infinitesimal Generator of Stochastic Diffusion Processes},
+author={Vladimir R Kostic and H{\'e}l{\`e}ne Halconruy and Timoth{\'e}e Devergne and Karim Lounici and Massimiliano Pontil},
+booktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},
+year={2024},
+url={https://openreview.net/forum?id=H7SaaqfCUi}
 }
\ No newline at end of file
diff --git a/linear_operator_learning/kernel/linalg.py b/linear_operator_learning/kernel/linalg.py
index fceeb8d..78af80f 100644
--- a/linear_operator_learning/kernel/linalg.py
+++ b/linear_operator_learning/kernel/linalg.py
@@ -73,7 +73,7 @@ def eig_physics_informed(
     fit_result: FitResult,
     shift: float,
 ) -> EigResult:
-    """Computes the eigendecomposition of the infinitesimal generator operator.
+    """Computes the eigendecomposition of the infinitesimal generator operator (see :footcite:t:`kostic2024learning`).
 
     Args:
         fit_result (FitResult): Fit result as defined in ``operator_learning.structs``.
@@ -132,11 +132,11 @@ def evaluate_eigenfunction(
 def evaluate_right_eigenfunction_physics_informed(
     kernel_X: np.ndarray, dKernel_X: np.ndarray, eig_result: np.ndarray, shift: float
 ):
-    r"""Fits the physics informed Reduced Rank Estimator.
+    r"""Evaluates the right eigenfunction of the physics informed generator ( see :footcite:t:`kostic2024learning`).
 
     Args:
         kernel_X (np.ndarray): kernel matrix of the training data
-        dKernel_X (np.ndarray): (matrix N in the paper) derivative of the kernel: N{i,(k-1)n+j} = <\phi(x_i),d_k\phi(x_j)>
+        dKernel_X (np.ndarray): (matrix N in :footcite:t:`kostic2024learning`) derivative of the kernel: N{i,(k-1)n+j} = <\phi(x_i),d_k\phi(x_j)>
         eig_result: EigResult object containing eigendecomposition results
         shift (float): shift parameter of the resolvent
 
diff --git a/linear_operator_learning/kernel/regressors.py b/linear_operator_learning/kernel/regressors.py
index 9f99a69..541ab05 100644
--- a/linear_operator_learning/kernel/regressors.py
+++ b/linear_operator_learning/kernel/regressors.py
@@ -69,12 +69,12 @@ def predict_physics_informed(
     shift: float,
     time: float,
 ) -> np.ndarray:
-    r"""Predicts future states using kernel matrices and fitted results.
+    r"""Predicts future states using kernel matrices and fitted results using a physics informed generator from :footcite:t:`kostic2024learning`.
 
     Args:
         eig_result: EigResult object containing reduced rank regression results
         kernel_obs (np.ndarray): kernel matrix of the initial conditions and the training set
-        dKernel_obs (np.ndarray): (matrix N in the paper) derivative of the kernel between the initial condition and the training set: :math:`N_{i,(k-1)n+j} = \langle \phi(x_i),d_k\phi(x_j) \rangle`
+        dKernel_obs (np.ndarray): (matrix N in :footcite:t:`kostic2024learning`) derivative of the kernel between the initial condition and the training set: :math:`N_{i,(k-1)n+j} = \langle \phi(x_i),d_k\phi(x_j) \rangle`
         obs_train_X (ndarray): Observable evaluated on output training data (or inducing points for Nystroem)
         shift (float): shift parameter of the resolvent
         time (float): time at which we want to estimate the prediction
@@ -444,12 +444,12 @@ def physics_informed_reduced_rank_regression(
     tikhonov_reg: float,  # Tikhonov (ridge) regularization parameter, can be 0,
     rank: int,
 ):  # Rank of the estimator)
-    r"""Fits the physics informed Reduced Rank Estimator.
+    r"""Fits the physics informed Reduced Rank Estimator from :footcite:t:`kostic2024learning`.
 
     Args:
         kernel_X (np.ndarray): kernel matrix of the training data
-        dKernel_X (np.ndarray): (matrix N in the paper) derivative of the kernel: :math:`N_{i,(k-1)n+j} = \langle \phi(x_i),d_k\phi(x_j) \rangle`
-        dKernel_dX (np.ndarray):  (matrix M in the paper) derivative of the kernel :math:`M_{(l-1)n+i,(k-1)n+j} = \langle d_l\phi(x_i),d_k\phi(x_j) \rangle`
+        dKernel_X (np.ndarray): (matrix N in :footcite:t:`kostic2024learning`) derivative of the kernel: :math:`N_{i,(k-1)n+j} = \langle \phi(x_i),d_k\phi(x_j) \rangle`
+        dKernel_dX (np.ndarray):  (matrix M in :footcite:t:`kostic2024learning`) derivative of the kernel :math:`M_{(l-1)n+i,(k-1)n+j} = \langle d_l\phi(x_i),d_k\phi(x_j) \rangle`
         shift (float): shift parameter of the resolvent
         tikhonov_reg (float): Tikhonov (ridge) regularization parameter
         rank (int): Rank of the estimator
diff --git a/linear_operator_learning/kernel/utils.py b/linear_operator_learning/kernel/utils.py
index f159edd..583400e 100644
--- a/linear_operator_learning/kernel/utils.py
+++ b/linear_operator_learning/kernel/utils.py
@@ -70,7 +70,7 @@ def return_phi_dphi(
     sigma: float,
     friction: np.ndarray,
 ):
-    r"""Returns the matrix :math:`N_{i,(k-1)n+j}=<\phi(x_i),d_k\phi(x_j)>` (matrix :math:`N` in the paper) only for
+    r"""Returns the matrix :math:`N_{i,(k-1)n+j}=<\phi(x_i),d_k\phi(x_j)>` (matrix :math:`N` :footcite:t:`kostic2024learning`) only for
     a GAUSSIAN kernel
     where :math:`i = 1,\dots n, k=1, \dots d, j=1,\dots n`
     and :math:`N` is the number of training points and :math:`d` is the dimensionality of the system.
@@ -79,7 +79,7 @@ def return_phi_dphi(
         kernel_X (np.ndarray): kernel matrix of the training data
         X (np.ndarray): training data
         sigma (float): length scale of the GAUSSIAN kernel
-        friction (np.ndarray): friction parameter of the physical model :math:`s(x)` in the paper
+        friction (np.ndarray): friction parameter of the physical model :math:`s(x)` in :footcite:t:`kostic2024learning`
     Shape:
         ``kernel_X``: :math:`(N, N)`, where :math:`N` is the number of training data.
 
@@ -103,7 +103,7 @@ def return_phi_dphi(
 
 
 def return_dphi_dphi(kernel_X: np.ndarray, X: np.ndarray, sigma: float, friction: np.ndarray):
-    r"""Returns the matrix :math:`M_{(k-1)n + i,(l-1)n+j}=<d_k\phi(x_i),d_l\phi(x_j)>` (matrix :math:`M` in the paper) only for
+    r"""Returns the matrix :math:`M_{(k-1)n + i,(l-1)n+j}=<d_k\phi(x_i),d_l\phi(x_j)>` (matrix :math:`M` in :footcite:t:`kostic2024learning`) only for
     a GAUSSIAN kernel
     where :math:`i = 1,\dots n, k=1, \dots d, j=1,\dots n, l=1, \dots d`
     and :math:`N` is the number of training points and :math:`d` is the dimensionality of the system.
@@ -112,7 +112,7 @@ def return_dphi_dphi(kernel_X: np.ndarray, X: np.ndarray, sigma: float, friction
         kernel_X (np.ndarray): kernel matrix of the training data
         X (np.ndarray): training data
         sigma (float): length scale of the GAUSSIAN kernel
-        friction (np.ndarray): friction parameter of the physical model :math:`s(x)` in the paper
+        friction (np.ndarray): friction parameter of the physical model :math:`s(x)` in :footcite:t:`kostic2024learning`
     Shape:
         ``kernel_X``: :math:`(N, N)`, where :math:`N` is the number of training data.
         ``X``: :math:`(N,d)`  where :math:`N` is the number of training data and `d` the dimensionality of the system.

From 6d24f9b23de7c8351006c6c89340fd248b3fd6d1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Devergne=20Timoth=C3=A9e?= <timothee.devergne@iit.it>
Date: Tue, 18 Feb 2025 12:39:51 +0100
Subject: [PATCH 4/7] Changed name of reduced_rank function, to be consistent

---
 linear_operator_learning/kernel/regressors.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/linear_operator_learning/kernel/regressors.py b/linear_operator_learning/kernel/regressors.py
index 541ab05..cac7c52 100644
--- a/linear_operator_learning/kernel/regressors.py
+++ b/linear_operator_learning/kernel/regressors.py
@@ -20,7 +20,7 @@
     "reduced_rank",
     "nystroem_reduced_rank",
     "rand_reduced_rank",
-    "physics_informed_reduced_rank_regression",
+    "reduced_rank_regression_physics_informed",
 ]
 
 
@@ -436,7 +436,7 @@ def rand_reduced_rank(
     return result
 
 
-def physics_informed_reduced_rank_regression(
+def reduced_rank_regression_physics_informed(
     kernel_X: np.ndarray,  # kernel matrix of the training data
     dKernel_X: np.ndarray,  # derivative of the kernel: dK_X_{i,j} = <\phi(x_i),d\phi(x_j)> (matrix N in the paper)
     dKernel_dX: np.ndarray,  # derivative of the kernel dK_dX_{i,j} = <d\phi(x_i),d\phi(x_j)>

From ee8572aec275182c7625a356d3fd08d5a50c976e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Devergne=20Timoth=C3=A9e?= <timothee.devergne@iit.it>
Date: Tue, 18 Feb 2025 12:42:09 +0100
Subject: [PATCH 5/7] Updated docs

---
 docs/reference/kernel.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/reference/kernel.rst b/docs/reference/kernel.rst
index f40ea5a..80b8dbb 100644
--- a/docs/reference/kernel.rst
+++ b/docs/reference/kernel.rst
@@ -39,7 +39,7 @@ Reduced Rank
 
 .. autofunction:: linear_operator_learning.kernel.rand_reduced_rank
 
-.. autofunction:: linear_operator_learning.kernel.physics_informed_reduced_rank_regression
+.. autofunction:: linear_operator_learning.kernel.reduced_rank_regression_physics_informed
 
 .. _pcr:
 Principal Component Regression

From 3a6d23fdf9b557b8295968d2a0e8957870fa3cc0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Devergne=20Timoth=C3=A9e?= <timothee.devergne@iit.it>
Date: Tue, 4 Mar 2025 18:45:07 +0100
Subject: [PATCH 6/7] Restructured everything in a dynamics folder Added
 Kernels class

---
 docs/reference/kernel/dynamics.rst            |  47 ++++
 docs/reference/kernel/index.rst               |   1 +
 docs/reference/kernel/kernel.rst              |   6 +
 linear_operator_learning/kernel/__init__.py   |   1 +
 .../kernel/dynamics/__init__.py               |   4 +
 .../kernel/dynamics/regressors.py             | 207 ++++++++++++++++++
 .../kernel/dynamics/structs.py                |  14 ++
 linear_operator_learning/kernel/kernels.py    |  84 +++++++
 linear_operator_learning/kernel/regressors.py | 116 +---------
 9 files changed, 367 insertions(+), 113 deletions(-)
 create mode 100644 docs/reference/kernel/dynamics.rst
 create mode 100644 linear_operator_learning/kernel/dynamics/__init__.py
 create mode 100644 linear_operator_learning/kernel/dynamics/regressors.py
 create mode 100644 linear_operator_learning/kernel/dynamics/structs.py
 create mode 100644 linear_operator_learning/kernel/kernels.py

diff --git a/docs/reference/kernel/dynamics.rst b/docs/reference/kernel/dynamics.rst
new file mode 100644
index 0000000..65206bc
--- /dev/null
+++ b/docs/reference/kernel/dynamics.rst
@@ -0,0 +1,47 @@
+.. _dynamics_reference:
+==============
+:code:`kernel.dynamics`
+==============
+.. module:: linear_operator_learning.kernel
+
+.. rst-class:: lead
+
+   Kernel Methods for dynamics analysis
+
+Table of Contents
+-----------------
+
+- :ref:`Regressors <_dynamics_kernel_regressors>`
+- :ref:`Types <_dynamics_kernel_types>`
+
+
+
+.. _kernel_regressors:
+Regressors
+----------
+
+Common
+~~~~~~
+
+.. autofunction:: linear_operator_learning.kernel.dynamics.regressors.predict
+.. autofunction:: linear_operator_learning.kernel.dynamics.regressors.modes
+
+
+.. _dynamics_rrr:
+Reduced Rank
+~~~~~~~~~~~~
+.. autofunction:: linear_operator_learning.kernel.dynamics.regressors.reduced_rank_regression_dirichlet
+
+
+
+.. _dynamics_kernel_types:
+
+Types
+-----
+
+.. autoclass:: linear_operator_learning.kernel.dynamics.structs.ModeResult
+    :members:
+
+
+
+.. footbibliography::
\ No newline at end of file
diff --git a/docs/reference/kernel/index.rst b/docs/reference/kernel/index.rst
index 3ef83fb..c58b1e7 100644
--- a/docs/reference/kernel/index.rst
+++ b/docs/reference/kernel/index.rst
@@ -8,3 +8,4 @@ Algorithms and utilities to learn linear operators via kernel methods.
 .. toctree::
 
     kernel
+    dynamics
diff --git a/docs/reference/kernel/kernel.rst b/docs/reference/kernel/kernel.rst
index 698f408..1b8090d 100644
--- a/docs/reference/kernel/kernel.rst
+++ b/docs/reference/kernel/kernel.rst
@@ -75,4 +75,10 @@ General Utilities
 .. autofunction:: linear_operator_learning.kernel.utils.sanitize_complex_conjugates
 
 
+.. _kernels:
+Custom Kernels
+-----------------
+.. autoclass:: linear_operator_learning.kernel.kernels.RBF_with_grad
+    :members:
+    :exclude-members: __init__, __new__
 .. footbibliography::
\ No newline at end of file
diff --git a/linear_operator_learning/kernel/__init__.py b/linear_operator_learning/kernel/__init__.py
index 56c6d84..3837c65 100644
--- a/linear_operator_learning/kernel/__init__.py
+++ b/linear_operator_learning/kernel/__init__.py
@@ -2,3 +2,4 @@
 
 from linear_operator_learning.kernel.regressors import *  # noqa
 from linear_operator_learning.kernel.utils import *  # noqa
+from linear_operator_learning.kernel.kernels import *  # noqa
diff --git a/linear_operator_learning/kernel/dynamics/__init__.py b/linear_operator_learning/kernel/dynamics/__init__.py
new file mode 100644
index 0000000..c31939c
--- /dev/null
+++ b/linear_operator_learning/kernel/dynamics/__init__.py
@@ -0,0 +1,4 @@
+"""dynamics entry point."""
+
+from .regressors import *  # noqa: F403
+from .structs import *  # noqa: F403
diff --git a/linear_operator_learning/kernel/dynamics/regressors.py b/linear_operator_learning/kernel/dynamics/regressors.py
new file mode 100644
index 0000000..fd89893
--- /dev/null
+++ b/linear_operator_learning/kernel/dynamics/regressors.py
@@ -0,0 +1,207 @@
+"""Kernel-based regressors for linear operators based on dynamical trajectories."""
+
+from math import sqrt
+from typing import Callable, Literal, Union
+from warnings import warn
+
+import numpy as np
+import scipy.linalg
+from numpy import ndarray
+from scipy.sparse.linalg import eigs, eigsh
+
+from linear_operator_learning.kernel.dynamics.structs import ModeResult
+from linear_operator_learning.kernel.linalg import (
+    add_diagonal_,
+    stable_topk,
+    weighted_norm,
+)
+from linear_operator_learning.kernel.regressors import evaluate_eigenfunction
+from linear_operator_learning.kernel.structs import EigResult, FitResult
+from linear_operator_learning.kernel.utils import sanitize_complex_conjugates
+
+__all__ = ["reduced_rank_regression_dirichlet", "modes", "predict"]
+
+
+def reduced_rank_regression_dirichlet(
+    kernel_X: ndarray,  # kernel matrix of the training data
+    dKernel_X: ndarray,  # derivative of the kernel: dK_X_{i,j} = <\phi(x_i),d\phi(x_j)> (matrix N in the paper)
+    dKernel_dX: ndarray,  # derivative of the kernel dK_dX_{i,j} = <d\phi(x_i),d\phi(x_j)>
+    shift: float,  # shift parameter of the resolvent
+    tikhonov_reg: float,  # Tikhonov (ridge) regularization parameter, can be 0,
+    rank: int,
+):  # Rank of the estimator)
+    r"""Fits the physics informed Reduced Rank Estimator from :footcite:t:`kostic2024learning`.
+
+    Args:
+        kernel_X (np.ndarray): kernel matrix of the training data
+        dKernel_X (np.ndarray): (matrix N in :footcite:t:`kostic2024learning`) derivative of the kernel: :math:`N_{i,(k-1)n+j} = \langle \phi(x_i),d_k\phi(x_j) \rangle`
+        dKernel_dX (np.ndarray):  (matrix M in :footcite:t:`kostic2024learning`) derivative of the kernel :math:`M_{(l-1)n+i,(k-1)n+j} = \langle d_l\phi(x_i),d_k\phi(x_j) \rangle`
+        shift (float): shift parameter of the resolvent
+        tikhonov_reg (float): Tikhonov (ridge) regularization parameter
+        rank (int): Rank of the estimator
+
+    Shape:
+        ``kernel_X``: :math:`(N, N)`, where :math:`N` is the number of training data.
+
+        ``dkernel_X``: :math:`(N, (d+1)N)`. where :math:`N` is the number of training data amd :math:`d` is the dimensionality of the input data.
+
+        ``dkernel_dX``: :math:`((d+1)N, (d+1)N)`. where :math:`N` is the number of training data amd :math:`d` is the dimensionality of the input data.
+    Returns: EigResult structure containing the eigenvalues and eigenvectors
+    """
+    npts = kernel_X.shape[0]
+    sqrt_npts = np.sqrt(npts)
+
+    dimension_derivative = dKernel_dX.shape[0]
+
+    # We follow the notation of the paper
+    J = (
+        kernel_X / sqrt_npts
+        - (dKernel_X / sqrt_npts)
+        @ np.linalg.inv(
+            dKernel_dX + tikhonov_reg * shift * sqrt_npts * np.eye(dimension_derivative)
+        )
+        @ dKernel_X.T
+    )
+
+    sigmas_2, vectors = eigs(
+        J @ kernel_X / sqrt_npts, k=rank + 5, M=(J + tikhonov_reg * np.eye(J.shape[0])) * shift
+    )
+
+    values, stable_values_idxs = stable_topk(sigmas_2, rank, ignore_warnings=False)
+
+    V = vectors[:, stable_values_idxs]
+    # Normalization step
+    V = V @ np.diag(np.sqrt(sqrt_npts) / np.sqrt(np.diag(V.T @ kernel_X @ V)))
+
+    sigma = np.diag(sigmas_2[stable_values_idxs])
+
+    make_U = np.block(
+        [
+            np.eye(npts) / np.sqrt(shift),
+            -dKernel_X
+            @ np.linalg.inv(
+                dKernel_dX + shift * tikhonov_reg * sqrt_npts * np.eye(dimension_derivative)
+            ),
+        ]
+    ).T
+
+    U = make_U @ (kernel_X @ V / sqrt_npts - shift * V @ sigma) / (tikhonov_reg * shift)
+
+    evals, vl, vr = scipy.linalg.eig(V.T @ V @ sigma, left=True)
+
+    evals = sanitize_complex_conjugates(evals)  # To be coupled with the LOL function
+    lambdas = shift - 1 / evals
+    r_perm = np.argsort(-lambdas)
+    vr = vr[:, r_perm]
+    l_perm = np.argsort(-lambdas.conj())
+    vl = vl[:, l_perm]
+    evals = evals[r_perm]
+
+    vl /= np.sqrt(evals)
+    result: EigResult = {"values": lambdas[r_perm], "left": (V @ vl) * sqrt_npts, "right": U @ vr}
+    return result
+
+
+def modes(
+    eig_result: EigResult,
+    initial_conditions: ndarray,  # Feature matrix of the shape [num_initial_conditions, features] or kernel matrix of the shape [num_initial_conditions, num_training_points]
+    obs_train: ndarray,  # Observable to be predicted evaluated on the trajectory data, shape [num_training_points, obs_features]
+) -> ModeResult:
+    r"""Computes the decomposition of an observable in the eigenmode basis.
+
+    Given the eigenfunctions obtained from a fit function, this function computes
+    the decomposition of a given observable in terms of these eigenmodes. The resulting
+    modes provide insight into the dominant temporal patterns governing the system.
+
+    Args:
+        eig_result (EigResult): Eigen decomposition result containing eigenvalues and left/right eigenfunctions.
+        initial_conditions (np.ndarray):
+            kernel matrix of shape :math:`(N_{init}, N)`, where :math:`N_{init}` is the number
+            of initial conditions and :math:`N` is the number of training points.
+        obs_train (np.ndarray): Observable evaluated on the training trajectory data,
+            of shape :math:`(N, d_{obs})`, where :math:`N` is the number of training points
+            and :math:`d_{obs}` is the number of observable features.
+
+    Shape:
+        - ``eig_result["left"]``: :math:`(N, r)`, where :math:`r` is the rank of the decomposition.
+        - ``eig_result["right"]``: :math:`(N, r)`, right eigenfunctions of the transition operator.
+        - ``eig_result["values"]``: :math:`(r,)`, complex eigenvalues of the transition operator.
+        - ``initial_conditions``:  :math:`(N_{init}, N)`, kernel representation or  :math:`(N_{init}, N*(d+1))` if using the dirichlet estimator.
+        - ``obs_train``: :math:`(N, d_{obs})`, observable evaluated on training data.
+
+    Returns:
+        ModeResult: A dictionary containing:
+            - ``decay_rates`` (:math:`(r,)`): Real part of the eigenvalues, representing decay rates.
+            - ``frequencies`` (:math:`(r,)`): Imaginary part of the eigenvalues, representing oscillation frequencies.
+            - ``modes`` (:math:`(r, N_{init}, d_{obs})`): Observable decomposed in the eigenmode basis.
+    """
+    evals = eig_result["values"]
+    levecs = eig_result["left"]
+    npts = levecs.shape[
+        0
+    ]  # We use the eigenvector to be consistent with the dirichlet estimator that does not have the same shape #obs_train.shape[0]
+    if initial_conditions.ndim == 1:
+        initial_conditions = np.expand_dims(initial_conditions, axis=0)
+    conditioning = evaluate_eigenfunction(
+        eig_result, "right", initial_conditions
+    ).T  # [rank, num_initial_conditions]
+    str = "abcdefgh"  # Maximum number of feature dimensions is 8
+    einsum_str = "".join([str[k] for k in range(obs_train.ndim - 1)])  # string for features
+    modes_ = np.einsum("nr,n" + einsum_str + "->r" + einsum_str, levecs.conj(), obs_train) / sqrt(
+        npts
+    )  # [rank, features]
+    modes_ = np.expand_dims(modes_, axis=1)
+    dims_to_add = modes_.ndim - conditioning.ndim
+    if dims_to_add > 0:
+        conditioning = np.expand_dims(conditioning, axis=tuple(range(-dims_to_add, 0)))
+    modes = conditioning * modes_  # [rank, num_init_cond, obs_features]
+    result: ModeResult = {
+        "decay_rates": -evals.real,
+        "frequencies": evals.imag / (2 * np.pi),
+        "modes": modes,
+    }
+    return result
+
+
+def predict(
+    t: Union[float, ndarray],  # time in the same units as dt
+    mode_result: ModeResult,
+) -> ndarray:  # shape [num_init_cond, features] or if num_t>1 [num_init_cond, num_time, features]
+    r"""Predicts the evolution of an observable using the mode decomposition.
+
+    Given the modal decomposition of an observable, this function reconstructs
+    the time evolution by propagating the eigenmodes forward in time.
+
+    Args:
+        t (Union[float, np.ndarray]): Time or array of time values in the same units as the spectral decomposition.
+        mode_result (ModeResult): Result of the mode decomposition, containing decay rates, frequencies, and modes.
+
+    Shape:
+        - ``t``: :math:`(T,)` or scalar, where :math:`T` is the number of time steps.
+        - ``mode_result["decay_rates"]``: :math:`(r,)`, real decay rates of the modes.
+        - ``mode_result["frequencies"]``: :math:`(r,)`, imaginary frequencies of the modes.
+        - ``mode_result["modes"]``: :math:`(r, N_{init}, d_{obs})`, mode coefficients.
+        - ``predictions``: :math:`(N_{init}, T, d_{obs})`, predicted observable values at times :math:`t`.
+          If :math:`T=1` or :math:`N_{init}=1`, unnecessary dimensions are removed.
+
+    Returns:
+        np.ndarray: Predicted observable values at the given time steps.
+        If multiple time points are given, the shape is :math:`(N_{init}, T, d_{obs})`.
+        If only one time step or one initial condition is provided, the output is squeezed accordingly.
+    """
+    if type(t) == float:  # noqa: E721
+        t = np.array([t])
+    evals = -mode_result["decay_rates"] + 2 * np.pi * 1j * mode_result["frequencies"]
+    to_evals = np.exp(evals[:, None] * t[None, :])  # [rank,time_steps]
+    str = "abcdefgh"  # Maximum number of feature dimensions is 8
+    einsum_str = "".join(
+        [str[k] for k in range(mode_result["modes"].ndim - 2)]
+    )  # string for features
+    predictions = np.einsum(
+        "rs,rm" + einsum_str + "->ms" + einsum_str, to_evals, mode_result["modes"]
+    )
+    if (
+        predictions.shape[0] == 1 or predictions.shape[1] == 1
+    ):  # If only one time point or one initial condition is requested, remove unnecessary dims
+        predictions = np.squeeze(predictions)
+    return predictions.real
diff --git a/linear_operator_learning/kernel/dynamics/structs.py b/linear_operator_learning/kernel/dynamics/structs.py
new file mode 100644
index 0000000..3c8b6e0
--- /dev/null
+++ b/linear_operator_learning/kernel/dynamics/structs.py
@@ -0,0 +1,14 @@
+"""Structs used by the `kernel dynamics`  algorithms."""
+
+from typing import TypedDict
+
+import numpy as np
+from numpy import ndarray
+
+
+class ModeResult(TypedDict):
+    """Return type for modes decompositions of dynamics kernel regressors."""
+
+    decay_rates: ndarray
+    frequencies: ndarray
+    modes: ndarray
diff --git a/linear_operator_learning/kernel/kernels.py b/linear_operator_learning/kernel/kernels.py
new file mode 100644
index 0000000..d439b12
--- /dev/null
+++ b/linear_operator_learning/kernel/kernels.py
@@ -0,0 +1,84 @@
+# linear_operator_learning/kernel/dynamics/kernels.py  # noqa: D100
+
+import numpy as np
+from sklearn.gaussian_process.kernels import RBF  # type: ignore
+
+
+class RBF_with_grad(RBF):  # noqa: D101
+    def __init___(self, length_scale=1.0, length_scale_bounds=(1e-5, 1e5)):  # noqa: D105
+        # Init code here:
+        super().__init__(length_scale, length_scale_bounds)
+
+    def grad(self, kernel_X, X, friction):
+        r"""Returns the matrix :math:`N_{i,(k-1)n+j}=<\phi(x_i),d_k\phi(x_j)>` (matrix :math:`N` :footcite:t:`kostic2024learning`
+        where :math:`i = 1,\dots n, k=1, \dots d, j=1,\dots n`
+        and :math:`N` is the number of training points and :math:`d` is the dimensionality of the system.
+
+        Args:
+            kernel_X (np.ndarray): kernel matrix of the training data
+            X (np.ndarray): training data
+            sigma (float): length scale of the GAUSSIAN kernel
+            friction (np.ndarray): friction parameter of the physical model :math:`s(x)` in :footcite:t:`kostic2024learning`
+        Shape:
+            ``kernel_X``: :math:`(N, N)`, where :math:`N` is the number of training data.
+
+            ``X``: :math:`(N,d)`  where :math:`N` is the number of training data and :math:`d` the dimensionality of the system.
+
+            ``friction``: :math:`d`  where :math:`d` is the dimensionality of the system.
+
+        Output: :math:`\langle \phi(x_i),d_k\phi(x_j) \rangle` of shape `N,Nd`, where :math:`N` is the number of training data and :math:`d` the dimension of the system.
+        """  # noqa: D205
+        difference = X[:, np.newaxis, :] - X[np.newaxis, :, :]
+        n = difference.shape[0]
+        d = X.shape[1]
+        N = np.zeros((n, n * d))
+        sigma = self.length_scale
+        for i in range(n):
+            for j in range(n):
+                for k in range(0, d):
+                    N[i, k * n + j] = (
+                        np.sqrt(friction[k]) * difference[i, j, k] * kernel_X[i, j] / sigma**2
+                    )
+        return N
+
+    def grad2(self, kernel_X: np.ndarray, X: np.ndarray, friction: np.ndarray):
+        r"""Returns the matrix :math:`M_{(k-1)n + i,(l-1)n+j}=<d_k\phi(x_i),d_l\phi(x_j)>` (matrix :math:`M` in :footcite:t:`kostic2024learning`)
+        where :math:`i = 1,\dots n, k=1, \dots d, j=1,\dots n, l=1, \dots d`
+        and :math:`N` is the number of training points and :math:`d` is the dimensionality of the system.
+
+        Args:
+            kernel_X (np.ndarray): kernel matrix of the training data
+            X (np.ndarray): training data
+            sigma (float): length scale of the GAUSSIAN kernel
+            friction (np.ndarray): friction parameter of the physical model :math:`s(x)` in :footcite:t:`kostic2024learning`
+        Shape:
+            ``kernel_X``: :math:`(N, N)`, where :math:`N` is the number of training data.
+            ``X``: :math:`(N,d)`  where :math:`N` is the number of training data and `d` the dimensionality of the system.
+            ``friction``: :math:`d`  where :math:`d` is the dimensionality of the system
+        Returns: :math:`M_{(k-1)n + i,(l-1)n+j}=<d_k\phi(x_i),d_l\phi(x_j)>` of shape `N,Nd`, where :math:`N` is the number of training data and :math:`d` the dimension of the system.
+        """  # noqa: D205
+        difference = X[:, np.newaxis, :] - X[np.newaxis, :, :]
+
+        d = difference.shape[2]
+        n = difference.shape[0]
+        M = np.zeros((n * d, n * d))
+        sigma = self.length_scale
+        for i in range(n):
+            for j in range(n):
+                for k in range(0, d):
+                    for m in range(0, d):
+                        if m == k:
+                            M[(k) * n + i, (m) * n + j] = (
+                                friction[k]
+                                * (1 / sigma**2 - difference[i, j, k] ** 2 / sigma**4)
+                                * kernel_X[i, j]
+                            )
+                        else:
+                            M[(k) * n + i, (m) * n + j] = (
+                                np.sqrt(friction[k])
+                                * np.sqrt(friction[m])
+                                * (-difference[i, j, k] * difference[i, j, m] / sigma**4)
+                                * kernel_X[i, j]
+                            )
+
+        return M
diff --git a/linear_operator_learning/kernel/regressors.py b/linear_operator_learning/kernel/regressors.py
index 33f9aaa..5c710d1 100644
--- a/linear_operator_learning/kernel/regressors.py
+++ b/linear_operator_learning/kernel/regressors.py
@@ -21,13 +21,11 @@
     "predict",
     "eig",
     "evaluate_eigenfunction",
-    "predict_physics_informed",
     "pcr",
     "nystroem_pcr",
     "reduced_rank",
     "nystroem_reduced_rank",
     "rand_reduced_rank",
-    "reduced_rank_regression_physics_informed",
 ]
 
 
@@ -106,7 +104,9 @@ def evaluate_eigenfunction(
         Output: :math:`(N_0, R)`
     """
     vr_or_vl = eig_result[which]
-    rsqrt_dim = (K_Xin_X_or_Y.shape[1]) ** (-0.5)
+    rsqrt_dim = (
+        1 / np.sqrt(eig_result["left"].shape[0])
+    )  # To be consistent with the Dirichlet estimator, we get the dimension of the training set by the left eigenvector #(K_Xin_X_or_Y.shape[1]) ** (-0.5)
     return np.linalg.multi_dot([rsqrt_dim * K_Xin_X_or_Y, vr_or_vl])
 
 
@@ -147,47 +147,6 @@ def predict(
     return np.linalg.multi_dot([K_dot_U, M, V_dot_obs])
 
 
-def predict_physics_informed(
-    eig_result: FitResult,
-    kernel_obs: np.ndarray,
-    dKernel_obs: np.ndarray,
-    obs_train_X: np.ndarray,
-    shift: float,
-    time: float,
-) -> np.ndarray:
-    r"""Predicts future states using kernel matrices and fitted results using a physics informed generator from :footcite:t:`kostic2024learning`.
-
-    Args:
-        eig_result: EigResult object containing reduced rank regression results
-        kernel_obs (np.ndarray): kernel matrix of the initial conditions and the training set
-        dKernel_obs (np.ndarray): (matrix N in :footcite:t:`kostic2024learning`) derivative of the kernel between the initial condition and the training set: :math:`N_{i,(k-1)n+j} = \langle \phi(x_i),d_k\phi(x_j) \rangle`
-        obs_train_X (ndarray): Observable evaluated on output training data (or inducing points for Nystroem)
-        shift (float): shift parameter of the resolvent
-        time (float): time at which we want to estimate the prediction
-
-    Shape:
-        ``kernel_obs``: :math:`(N, N)`, where :math:`N` is the number of training data.
-
-        ``dkernel_obs``: :math:`(N, (d+1)N)`. where :math:`N` is the number of training data amd :math:`d` is the dimensionality of the input data.
-
-        ``obs_train_X``: :math:`(N, *)`, where :math:`*` is the shape of the observable.
-
-        Output: :math:`(N, *)`.
-    """
-    ul = eig_result["left"]
-    ur = eig_result["right"]
-    evs = eig_result["values"]
-    npts = kernel_obs.shape[0]
-    h = (np.sqrt(shift) * (kernel_obs @ ur[:npts, :]) + (dKernel_obs @ ur[npts:, :])) / np.sqrt(
-        npts
-    )
-    g = np.sqrt(shift) * obs_train_X @ ul
-
-    pred = (np.exp(evs * time)[np.newaxis, :] * (g[np.newaxis, :] * h)).sum(axis=-1)
-
-    return pred
-
-
 def pcr(
     kernel_X: ndarray,
     tikhonov_reg: float = 0.0,
@@ -522,72 +481,3 @@ def rand_reduced_rank(
     svals = np.sqrt(values)
     result: FitResult = {"U": U, "V": V, "svals": svals}
     return result
-
-
-def reduced_rank_regression_physics_informed(
-    kernel_X: np.ndarray,  # kernel matrix of the training data
-    dKernel_X: np.ndarray,  # derivative of the kernel: dK_X_{i,j} = <\phi(x_i),d\phi(x_j)> (matrix N in the paper)
-    dKernel_dX: np.ndarray,  # derivative of the kernel dK_dX_{i,j} = <d\phi(x_i),d\phi(x_j)>
-    shift: float,  # shift parameter of the resolvent
-    tikhonov_reg: float,  # Tikhonov (ridge) regularization parameter, can be 0,
-    rank: int,
-):  # Rank of the estimator)
-    r"""Fits the physics informed Reduced Rank Estimator from :footcite:t:`kostic2024learning`.
-
-    Args:
-        kernel_X (np.ndarray): kernel matrix of the training data
-        dKernel_X (np.ndarray): (matrix N in :footcite:t:`kostic2024learning`) derivative of the kernel: :math:`N_{i,(k-1)n+j} = \langle \phi(x_i),d_k\phi(x_j) \rangle`
-        dKernel_dX (np.ndarray):  (matrix M in :footcite:t:`kostic2024learning`) derivative of the kernel :math:`M_{(l-1)n+i,(k-1)n+j} = \langle d_l\phi(x_i),d_k\phi(x_j) \rangle`
-        shift (float): shift parameter of the resolvent
-        tikhonov_reg (float): Tikhonov (ridge) regularization parameter
-        rank (int): Rank of the estimator
-
-    Shape:
-        ``kernel_X``: :math:`(N, N)`, where :math:`N` is the number of training data.
-
-        ``dkernel_X``: :math:`(N, (d+1)N)`. where :math:`N` is the number of training data amd :math:`d` is the dimensionality of the input data.
-
-        ``dkernel_dX``: :math:`((d+1)N, (d+1)N)`. where :math:`N` is the number of training data amd :math:`d` is the dimensionality of the input data.
-    Returns: FitResult structure containing `U` and `V` matrices
-    """
-    npts = kernel_X.shape[0]
-    sqrt_npts = np.sqrt(npts)
-
-    dimension_derivative = dKernel_dX.shape[0]
-
-    # We follow the notation of the paper
-    J = (
-        kernel_X / sqrt_npts
-        - (dKernel_X / sqrt_npts)
-        @ np.linalg.inv(
-            dKernel_dX + tikhonov_reg * shift * sqrt_npts * np.eye(dimension_derivative)
-        )
-        @ dKernel_X.T
-    )
-
-    sigmas_2, vectors = eigs(
-        J @ kernel_X / sqrt_npts, k=rank + 5, M=(J + tikhonov_reg * np.eye(J.shape[0])) * shift
-    )
-
-    values, stable_values_idxs = stable_topk(sigmas_2, rank, ignore_warnings=False)
-
-    V = vectors[:, stable_values_idxs]
-    # Normalization step
-    V = V @ np.diag(np.sqrt(sqrt_npts) / np.sqrt(np.diag(V.T @ kernel_X @ V)))
-
-    sigma = np.diag(sigmas_2[stable_values_idxs])
-
-    make_U = np.block(
-        [
-            np.eye(npts) / np.sqrt(shift),
-            -dKernel_X
-            @ np.linalg.inv(
-                dKernel_dX + shift * tikhonov_reg * sqrt_npts * np.eye(dimension_derivative)
-            ),
-        ]
-    ).T
-
-    U = make_U @ (kernel_X @ V / sqrt_npts - shift * V @ sigma) / (tikhonov_reg * shift)
-    result: FitResult = {"U": U.real, "V": V.real, "svals": np.sqrt(sigmas_2[stable_values_idxs])}
-
-    return result

From d2fd7ef20197e5133157f54d8a3dde003f5b3658 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Devergne=20Timoth=C3=A9e?= <timothee.devergne@iit.it>
Date: Tue, 4 Mar 2025 19:01:39 +0100
Subject: [PATCH 7/7] Fixed docs

---
 linear_operator_learning/kernel/dynamics/regressors.py | 8 ++------
 linear_operator_learning/kernel/kernels.py             | 8 +++-----
 2 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/linear_operator_learning/kernel/dynamics/regressors.py b/linear_operator_learning/kernel/dynamics/regressors.py
index fd89893..694be58 100644
--- a/linear_operator_learning/kernel/dynamics/regressors.py
+++ b/linear_operator_learning/kernel/dynamics/regressors.py
@@ -115,18 +115,14 @@ def modes(
 
     Args:
         eig_result (EigResult): Eigen decomposition result containing eigenvalues and left/right eigenfunctions.
-        initial_conditions (np.ndarray):
-            kernel matrix of shape :math:`(N_{init}, N)`, where :math:`N_{init}` is the number
-            of initial conditions and :math:`N` is the number of training points.
+        initial_conditions (np.ndarray): kernel matrix or block matrix of the form [[K_x,0][0,N]] when using the dirichlet generator
         obs_train (np.ndarray): Observable evaluated on the training trajectory data,
-            of shape :math:`(N, d_{obs})`, where :math:`N` is the number of training points
-            and :math:`d_{obs}` is the number of observable features.
 
     Shape:
         - ``eig_result["left"]``: :math:`(N, r)`, where :math:`r` is the rank of the decomposition.
         - ``eig_result["right"]``: :math:`(N, r)`, right eigenfunctions of the transition operator.
         - ``eig_result["values"]``: :math:`(r,)`, complex eigenvalues of the transition operator.
-        - ``initial_conditions``:  :math:`(N_{init}, N)`, kernel representation or  :math:`(N_{init}, N*(d+1))` if using the dirichlet estimator.
+        - ``initial_conditions``:  :math:`(N_{init}, N)`, kernel representation or  :math:`(N_{init}, N*d)` if using the dirichlet estimator.
         - ``obs_train``: :math:`(N, d_{obs})`, observable evaluated on training data.
 
     Returns:
diff --git a/linear_operator_learning/kernel/kernels.py b/linear_operator_learning/kernel/kernels.py
index d439b12..07585d8 100644
--- a/linear_operator_learning/kernel/kernels.py
+++ b/linear_operator_learning/kernel/kernels.py
@@ -10,14 +10,13 @@ def __init___(self, length_scale=1.0, length_scale_bounds=(1e-5, 1e5)):  # noqa:
         super().__init__(length_scale, length_scale_bounds)
 
     def grad(self, kernel_X, X, friction):
-        r"""Returns the matrix :math:`N_{i,(k-1)n+j}=<\phi(x_i),d_k\phi(x_j)>` (matrix :math:`N` :footcite:t:`kostic2024learning`
+        r"""Returns the matrix :math:`N_{i,(k-1)n+j}= \langle \phi(x_i),d_k\phi(x_j) \rangle` (matrix :math:`N` :footcite:t:`kostic2024learning`
         where :math:`i = 1,\dots n, k=1, \dots d, j=1,\dots n`
         and :math:`N` is the number of training points and :math:`d` is the dimensionality of the system.
 
         Args:
             kernel_X (np.ndarray): kernel matrix of the training data
             X (np.ndarray): training data
-            sigma (float): length scale of the GAUSSIAN kernel
             friction (np.ndarray): friction parameter of the physical model :math:`s(x)` in :footcite:t:`kostic2024learning`
         Shape:
             ``kernel_X``: :math:`(N, N)`, where :math:`N` is the number of training data.
@@ -42,20 +41,19 @@ def grad(self, kernel_X, X, friction):
         return N
 
     def grad2(self, kernel_X: np.ndarray, X: np.ndarray, friction: np.ndarray):
-        r"""Returns the matrix :math:`M_{(k-1)n + i,(l-1)n+j}=<d_k\phi(x_i),d_l\phi(x_j)>` (matrix :math:`M` in :footcite:t:`kostic2024learning`)
+        r"""Returns the matrix :math:`M_{(k-1)n + i,(l-1)n+j}= \langle d_k\phi(x_i),d_l\phi(x_j) \rangle` (matrix :math:`M` in :footcite:t:`kostic2024learning`)
         where :math:`i = 1,\dots n, k=1, \dots d, j=1,\dots n, l=1, \dots d`
         and :math:`N` is the number of training points and :math:`d` is the dimensionality of the system.
 
         Args:
             kernel_X (np.ndarray): kernel matrix of the training data
             X (np.ndarray): training data
-            sigma (float): length scale of the GAUSSIAN kernel
             friction (np.ndarray): friction parameter of the physical model :math:`s(x)` in :footcite:t:`kostic2024learning`
         Shape:
             ``kernel_X``: :math:`(N, N)`, where :math:`N` is the number of training data.
             ``X``: :math:`(N,d)`  where :math:`N` is the number of training data and `d` the dimensionality of the system.
             ``friction``: :math:`d`  where :math:`d` is the dimensionality of the system
-        Returns: :math:`M_{(k-1)n + i,(l-1)n+j}=<d_k\phi(x_i),d_l\phi(x_j)>` of shape `N,Nd`, where :math:`N` is the number of training data and :math:`d` the dimension of the system.
+        Returns: :math:`M_{(k-1)n + i,(l-1)n+j}= \langle d_k\phi(x_i),d_l\phi(x_j) \rangle` of shape `Nd,Nd`, where :math:`N` is the number of training data and :math:`d` the dimension of the system.
         """  # noqa: D205
         difference = X[:, np.newaxis, :] - X[np.newaxis, :, :]