diff --git a/docs/bibliography.bib b/docs/bibliography.bib
index 145ae74..92679ca 100644
--- a/docs/bibliography.bib
+++ b/docs/bibliography.bib
@@ -194,3 +194,10 @@ @misc{Turri2023
   year = {2023},
   copyright = {arXiv.org perpetual,  non-exclusive license}
 }
+@inproceedings{kostic2024learning,
+title={Learning the Infinitesimal Generator of Stochastic Diffusion Processes},
+author={Vladimir R Kostic and H{\'e}l{\`e}ne Halconruy and Timoth{\'e}e Devergne and Karim Lounici and Massimiliano Pontil},
+booktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},
+year={2024},
+url={https://openreview.net/forum?id=H7SaaqfCUi}
+}
diff --git a/docs/reference/kernel/dynamics.rst b/docs/reference/kernel/dynamics.rst
new file mode 100644
index 0000000..65206bc
--- /dev/null
+++ b/docs/reference/kernel/dynamics.rst
@@ -0,0 +1,47 @@
+.. _dynamics_reference:
+==============
+:code:`kernel.dynamics`
+==============
+.. module:: linear_operator_learning.kernel
+
+.. rst-class:: lead
+
+   Kernel Methods for dynamics analysis
+
+Table of Contents
+-----------------
+
+- :ref:`Regressors <_dynamics_kernel_regressors>`
+- :ref:`Types <_dynamics_kernel_types>`
+
+
+
+.. _kernel_regressors:
+Regressors
+----------
+
+Common
+~~~~~~
+
+.. autofunction:: linear_operator_learning.kernel.dynamics.regressors.predict
+.. autofunction:: linear_operator_learning.kernel.dynamics.regressors.modes
+
+
+.. _dynamics_rrr:
+Reduced Rank
+~~~~~~~~~~~~
+.. autofunction:: linear_operator_learning.kernel.dynamics.regressors.reduced_rank_regression_dirichlet
+
+
+
+.. _dynamics_kernel_types:
+
+Types
+-----
+
+.. autoclass:: linear_operator_learning.kernel.dynamics.structs.ModeResult
+    :members:
+
+
+
+.. footbibliography::
\ No newline at end of file
diff --git a/docs/reference/kernel/index.rst b/docs/reference/kernel/index.rst
index 3ef83fb..c58b1e7 100644
--- a/docs/reference/kernel/index.rst
+++ b/docs/reference/kernel/index.rst
@@ -8,3 +8,4 @@ Algorithms and utilities to learn linear operators via kernel methods.
 .. toctree::
 
     kernel
+    dynamics
diff --git a/docs/reference/kernel/kernel.rst b/docs/reference/kernel/kernel.rst
index 698f408..1b8090d 100644
--- a/docs/reference/kernel/kernel.rst
+++ b/docs/reference/kernel/kernel.rst
@@ -75,4 +75,10 @@ General Utilities
 .. autofunction:: linear_operator_learning.kernel.utils.sanitize_complex_conjugates
 
 
+.. _kernels:
+Custom Kernels
+-----------------
+.. autoclass:: linear_operator_learning.kernel.kernels.RBF_with_grad
+    :members:
+    :exclude-members: __init__, __new__
 .. footbibliography::
\ No newline at end of file
diff --git a/linear_operator_learning/kernel/__init__.py b/linear_operator_learning/kernel/__init__.py
index 636e05a..3837c65 100644
--- a/linear_operator_learning/kernel/__init__.py
+++ b/linear_operator_learning/kernel/__init__.py
@@ -1,3 +1,5 @@
 """Kernel methods entry point."""
 
 from linear_operator_learning.kernel.regressors import *  # noqa
+from linear_operator_learning.kernel.utils import *  # noqa
+from linear_operator_learning.kernel.kernels import *  # noqa
diff --git a/linear_operator_learning/kernel/dynamics/__init__.py b/linear_operator_learning/kernel/dynamics/__init__.py
new file mode 100644
index 0000000..c31939c
--- /dev/null
+++ b/linear_operator_learning/kernel/dynamics/__init__.py
@@ -0,0 +1,4 @@
+"""dynamics entry point."""
+
+from .regressors import *  # noqa: F403
+from .structs import *  # noqa: F403
diff --git a/linear_operator_learning/kernel/dynamics/regressors.py b/linear_operator_learning/kernel/dynamics/regressors.py
new file mode 100644
index 0000000..694be58
--- /dev/null
+++ b/linear_operator_learning/kernel/dynamics/regressors.py
@@ -0,0 +1,203 @@
+"""Kernel-based regressors for linear operators based on dynamical trajectories."""
+
+from math import sqrt
+from typing import Callable, Literal, Union
+from warnings import warn
+
+import numpy as np
+import scipy.linalg
+from numpy import ndarray
+from scipy.sparse.linalg import eigs, eigsh
+
+from linear_operator_learning.kernel.dynamics.structs import ModeResult
+from linear_operator_learning.kernel.linalg import (
+    add_diagonal_,
+    stable_topk,
+    weighted_norm,
+)
+from linear_operator_learning.kernel.regressors import evaluate_eigenfunction
+from linear_operator_learning.kernel.structs import EigResult, FitResult
+from linear_operator_learning.kernel.utils import sanitize_complex_conjugates
+
+__all__ = ["reduced_rank_regression_dirichlet", "modes", "predict"]
+
+
+def reduced_rank_regression_dirichlet(
+    kernel_X: ndarray,  # kernel matrix of the training data
+    dKernel_X: ndarray,  # derivative of the kernel: dK_X_{i,j} = <\phi(x_i),d\phi(x_j)> (matrix N in the paper)
+    dKernel_dX: ndarray,  # derivative of the kernel dK_dX_{i,j} = <d\phi(x_i),d\phi(x_j)>
+    shift: float,  # shift parameter of the resolvent
+    tikhonov_reg: float,  # Tikhonov (ridge) regularization parameter, can be 0,
+    rank: int,
+):  # Rank of the estimator)
+    r"""Fits the physics informed Reduced Rank Estimator from :footcite:t:`kostic2024learning`.
+
+    Args:
+        kernel_X (np.ndarray): kernel matrix of the training data
+        dKernel_X (np.ndarray): (matrix N in :footcite:t:`kostic2024learning`) derivative of the kernel: :math:`N_{i,(k-1)n+j} = \langle \phi(x_i),d_k\phi(x_j) \rangle`
+        dKernel_dX (np.ndarray):  (matrix M in :footcite:t:`kostic2024learning`) derivative of the kernel :math:`M_{(l-1)n+i,(k-1)n+j} = \langle d_l\phi(x_i),d_k\phi(x_j) \rangle`
+        shift (float): shift parameter of the resolvent
+        tikhonov_reg (float): Tikhonov (ridge) regularization parameter
+        rank (int): Rank of the estimator
+
+    Shape:
+        ``kernel_X``: :math:`(N, N)`, where :math:`N` is the number of training data.
+
+        ``dkernel_X``: :math:`(N, (d+1)N)`. where :math:`N` is the number of training data amd :math:`d` is the dimensionality of the input data.
+
+        ``dkernel_dX``: :math:`((d+1)N, (d+1)N)`. where :math:`N` is the number of training data amd :math:`d` is the dimensionality of the input data.
+    Returns: EigResult structure containing the eigenvalues and eigenvectors
+    """
+    npts = kernel_X.shape[0]
+    sqrt_npts = np.sqrt(npts)
+
+    dimension_derivative = dKernel_dX.shape[0]
+
+    # We follow the notation of the paper
+    J = (
+        kernel_X / sqrt_npts
+        - (dKernel_X / sqrt_npts)
+        @ np.linalg.inv(
+            dKernel_dX + tikhonov_reg * shift * sqrt_npts * np.eye(dimension_derivative)
+        )
+        @ dKernel_X.T
+    )
+
+    sigmas_2, vectors = eigs(
+        J @ kernel_X / sqrt_npts, k=rank + 5, M=(J + tikhonov_reg * np.eye(J.shape[0])) * shift
+    )
+
+    values, stable_values_idxs = stable_topk(sigmas_2, rank, ignore_warnings=False)
+
+    V = vectors[:, stable_values_idxs]
+    # Normalization step
+    V = V @ np.diag(np.sqrt(sqrt_npts) / np.sqrt(np.diag(V.T @ kernel_X @ V)))
+
+    sigma = np.diag(sigmas_2[stable_values_idxs])
+
+    make_U = np.block(
+        [
+            np.eye(npts) / np.sqrt(shift),
+            -dKernel_X
+            @ np.linalg.inv(
+                dKernel_dX + shift * tikhonov_reg * sqrt_npts * np.eye(dimension_derivative)
+            ),
+        ]
+    ).T
+
+    U = make_U @ (kernel_X @ V / sqrt_npts - shift * V @ sigma) / (tikhonov_reg * shift)
+
+    evals, vl, vr = scipy.linalg.eig(V.T @ V @ sigma, left=True)
+
+    evals = sanitize_complex_conjugates(evals)  # To be coupled with the LOL function
+    lambdas = shift - 1 / evals
+    r_perm = np.argsort(-lambdas)
+    vr = vr[:, r_perm]
+    l_perm = np.argsort(-lambdas.conj())
+    vl = vl[:, l_perm]
+    evals = evals[r_perm]
+
+    vl /= np.sqrt(evals)
+    result: EigResult = {"values": lambdas[r_perm], "left": (V @ vl) * sqrt_npts, "right": U @ vr}
+    return result
+
+
+def modes(
+    eig_result: EigResult,
+    initial_conditions: ndarray,  # Feature matrix of the shape [num_initial_conditions, features] or kernel matrix of the shape [num_initial_conditions, num_training_points]
+    obs_train: ndarray,  # Observable to be predicted evaluated on the trajectory data, shape [num_training_points, obs_features]
+) -> ModeResult:
+    r"""Computes the decomposition of an observable in the eigenmode basis.
+
+    Given the eigenfunctions obtained from a fit function, this function computes
+    the decomposition of a given observable in terms of these eigenmodes. The resulting
+    modes provide insight into the dominant temporal patterns governing the system.
+
+    Args:
+        eig_result (EigResult): Eigen decomposition result containing eigenvalues and left/right eigenfunctions.
+        initial_conditions (np.ndarray): kernel matrix or block matrix of the form [[K_x,0][0,N]] when using the dirichlet generator
+        obs_train (np.ndarray): Observable evaluated on the training trajectory data,
+
+    Shape:
+        - ``eig_result["left"]``: :math:`(N, r)`, where :math:`r` is the rank of the decomposition.
+        - ``eig_result["right"]``: :math:`(N, r)`, right eigenfunctions of the transition operator.
+        - ``eig_result["values"]``: :math:`(r,)`, complex eigenvalues of the transition operator.
+        - ``initial_conditions``:  :math:`(N_{init}, N)`, kernel representation or  :math:`(N_{init}, N*d)` if using the dirichlet estimator.
+        - ``obs_train``: :math:`(N, d_{obs})`, observable evaluated on training data.
+
+    Returns:
+        ModeResult: A dictionary containing:
+            - ``decay_rates`` (:math:`(r,)`): Real part of the eigenvalues, representing decay rates.
+            - ``frequencies`` (:math:`(r,)`): Imaginary part of the eigenvalues, representing oscillation frequencies.
+            - ``modes`` (:math:`(r, N_{init}, d_{obs})`): Observable decomposed in the eigenmode basis.
+    """
+    evals = eig_result["values"]
+    levecs = eig_result["left"]
+    npts = levecs.shape[
+        0
+    ]  # We use the eigenvector to be consistent with the dirichlet estimator that does not have the same shape #obs_train.shape[0]
+    if initial_conditions.ndim == 1:
+        initial_conditions = np.expand_dims(initial_conditions, axis=0)
+    conditioning = evaluate_eigenfunction(
+        eig_result, "right", initial_conditions
+    ).T  # [rank, num_initial_conditions]
+    str = "abcdefgh"  # Maximum number of feature dimensions is 8
+    einsum_str = "".join([str[k] for k in range(obs_train.ndim - 1)])  # string for features
+    modes_ = np.einsum("nr,n" + einsum_str + "->r" + einsum_str, levecs.conj(), obs_train) / sqrt(
+        npts
+    )  # [rank, features]
+    modes_ = np.expand_dims(modes_, axis=1)
+    dims_to_add = modes_.ndim - conditioning.ndim
+    if dims_to_add > 0:
+        conditioning = np.expand_dims(conditioning, axis=tuple(range(-dims_to_add, 0)))
+    modes = conditioning * modes_  # [rank, num_init_cond, obs_features]
+    result: ModeResult = {
+        "decay_rates": -evals.real,
+        "frequencies": evals.imag / (2 * np.pi),
+        "modes": modes,
+    }
+    return result
+
+
+def predict(
+    t: Union[float, ndarray],  # time in the same units as dt
+    mode_result: ModeResult,
+) -> ndarray:  # shape [num_init_cond, features] or if num_t>1 [num_init_cond, num_time, features]
+    r"""Predicts the evolution of an observable using the mode decomposition.
+
+    Given the modal decomposition of an observable, this function reconstructs
+    the time evolution by propagating the eigenmodes forward in time.
+
+    Args:
+        t (Union[float, np.ndarray]): Time or array of time values in the same units as the spectral decomposition.
+        mode_result (ModeResult): Result of the mode decomposition, containing decay rates, frequencies, and modes.
+
+    Shape:
+        - ``t``: :math:`(T,)` or scalar, where :math:`T` is the number of time steps.
+        - ``mode_result["decay_rates"]``: :math:`(r,)`, real decay rates of the modes.
+        - ``mode_result["frequencies"]``: :math:`(r,)`, imaginary frequencies of the modes.
+        - ``mode_result["modes"]``: :math:`(r, N_{init}, d_{obs})`, mode coefficients.
+        - ``predictions``: :math:`(N_{init}, T, d_{obs})`, predicted observable values at times :math:`t`.
+          If :math:`T=1` or :math:`N_{init}=1`, unnecessary dimensions are removed.
+
+    Returns:
+        np.ndarray: Predicted observable values at the given time steps.
+        If multiple time points are given, the shape is :math:`(N_{init}, T, d_{obs})`.
+        If only one time step or one initial condition is provided, the output is squeezed accordingly.
+    """
+    if type(t) == float:  # noqa: E721
+        t = np.array([t])
+    evals = -mode_result["decay_rates"] + 2 * np.pi * 1j * mode_result["frequencies"]
+    to_evals = np.exp(evals[:, None] * t[None, :])  # [rank,time_steps]
+    str = "abcdefgh"  # Maximum number of feature dimensions is 8
+    einsum_str = "".join(
+        [str[k] for k in range(mode_result["modes"].ndim - 2)]
+    )  # string for features
+    predictions = np.einsum(
+        "rs,rm" + einsum_str + "->ms" + einsum_str, to_evals, mode_result["modes"]
+    )
+    if (
+        predictions.shape[0] == 1 or predictions.shape[1] == 1
+    ):  # If only one time point or one initial condition is requested, remove unnecessary dims
+        predictions = np.squeeze(predictions)
+    return predictions.real
diff --git a/linear_operator_learning/kernel/dynamics/structs.py b/linear_operator_learning/kernel/dynamics/structs.py
new file mode 100644
index 0000000..3c8b6e0
--- /dev/null
+++ b/linear_operator_learning/kernel/dynamics/structs.py
@@ -0,0 +1,14 @@
+"""Structs used by the `kernel dynamics`  algorithms."""
+
+from typing import TypedDict
+
+import numpy as np
+from numpy import ndarray
+
+
+class ModeResult(TypedDict):
+    """Return type for modes decompositions of dynamics kernel regressors."""
+
+    decay_rates: ndarray
+    frequencies: ndarray
+    modes: ndarray
diff --git a/linear_operator_learning/kernel/kernels.py b/linear_operator_learning/kernel/kernels.py
new file mode 100644
index 0000000..07585d8
--- /dev/null
+++ b/linear_operator_learning/kernel/kernels.py
@@ -0,0 +1,82 @@
+# linear_operator_learning/kernel/dynamics/kernels.py  # noqa: D100
+
+import numpy as np
+from sklearn.gaussian_process.kernels import RBF  # type: ignore
+
+
+class RBF_with_grad(RBF):  # noqa: D101
+    def __init___(self, length_scale=1.0, length_scale_bounds=(1e-5, 1e5)):  # noqa: D105
+        # Init code here:
+        super().__init__(length_scale, length_scale_bounds)
+
+    def grad(self, kernel_X, X, friction):
+        r"""Returns the matrix :math:`N_{i,(k-1)n+j}= \langle \phi(x_i),d_k\phi(x_j) \rangle` (matrix :math:`N` :footcite:t:`kostic2024learning`
+        where :math:`i = 1,\dots n, k=1, \dots d, j=1,\dots n`
+        and :math:`N` is the number of training points and :math:`d` is the dimensionality of the system.
+
+        Args:
+            kernel_X (np.ndarray): kernel matrix of the training data
+            X (np.ndarray): training data
+            friction (np.ndarray): friction parameter of the physical model :math:`s(x)` in :footcite:t:`kostic2024learning`
+        Shape:
+            ``kernel_X``: :math:`(N, N)`, where :math:`N` is the number of training data.
+
+            ``X``: :math:`(N,d)`  where :math:`N` is the number of training data and :math:`d` the dimensionality of the system.
+
+            ``friction``: :math:`d`  where :math:`d` is the dimensionality of the system.
+
+        Output: :math:`\langle \phi(x_i),d_k\phi(x_j) \rangle` of shape `N,Nd`, where :math:`N` is the number of training data and :math:`d` the dimension of the system.
+        """  # noqa: D205
+        difference = X[:, np.newaxis, :] - X[np.newaxis, :, :]
+        n = difference.shape[0]
+        d = X.shape[1]
+        N = np.zeros((n, n * d))
+        sigma = self.length_scale
+        for i in range(n):
+            for j in range(n):
+                for k in range(0, d):
+                    N[i, k * n + j] = (
+                        np.sqrt(friction[k]) * difference[i, j, k] * kernel_X[i, j] / sigma**2
+                    )
+        return N
+
+    def grad2(self, kernel_X: np.ndarray, X: np.ndarray, friction: np.ndarray):
+        r"""Returns the matrix :math:`M_{(k-1)n + i,(l-1)n+j}= \langle d_k\phi(x_i),d_l\phi(x_j) \rangle` (matrix :math:`M` in :footcite:t:`kostic2024learning`)
+        where :math:`i = 1,\dots n, k=1, \dots d, j=1,\dots n, l=1, \dots d`
+        and :math:`N` is the number of training points and :math:`d` is the dimensionality of the system.
+
+        Args:
+            kernel_X (np.ndarray): kernel matrix of the training data
+            X (np.ndarray): training data
+            friction (np.ndarray): friction parameter of the physical model :math:`s(x)` in :footcite:t:`kostic2024learning`
+        Shape:
+            ``kernel_X``: :math:`(N, N)`, where :math:`N` is the number of training data.
+            ``X``: :math:`(N,d)`  where :math:`N` is the number of training data and `d` the dimensionality of the system.
+            ``friction``: :math:`d`  where :math:`d` is the dimensionality of the system
+        Returns: :math:`M_{(k-1)n + i,(l-1)n+j}= \langle d_k\phi(x_i),d_l\phi(x_j) \rangle` of shape `Nd,Nd`, where :math:`N` is the number of training data and :math:`d` the dimension of the system.
+        """  # noqa: D205
+        difference = X[:, np.newaxis, :] - X[np.newaxis, :, :]
+
+        d = difference.shape[2]
+        n = difference.shape[0]
+        M = np.zeros((n * d, n * d))
+        sigma = self.length_scale
+        for i in range(n):
+            for j in range(n):
+                for k in range(0, d):
+                    for m in range(0, d):
+                        if m == k:
+                            M[(k) * n + i, (m) * n + j] = (
+                                friction[k]
+                                * (1 / sigma**2 - difference[i, j, k] ** 2 / sigma**4)
+                                * kernel_X[i, j]
+                            )
+                        else:
+                            M[(k) * n + i, (m) * n + j] = (
+                                np.sqrt(friction[k])
+                                * np.sqrt(friction[m])
+                                * (-difference[i, j, k] * difference[i, j, m] / sigma**4)
+                                * kernel_X[i, j]
+                            )
+
+        return M
diff --git a/linear_operator_learning/kernel/regressors.py b/linear_operator_learning/kernel/regressors.py
index cd3fc26..5c710d1 100644
--- a/linear_operator_learning/kernel/regressors.py
+++ b/linear_operator_learning/kernel/regressors.py
@@ -104,7 +104,9 @@ def evaluate_eigenfunction(
         Output: :math:`(N_0, R)`
     """
     vr_or_vl = eig_result[which]
-    rsqrt_dim = (K_Xin_X_or_Y.shape[1]) ** (-0.5)
+    rsqrt_dim = (
+        1 / np.sqrt(eig_result["left"].shape[0])
+    )  # To be consistent with the Dirichlet estimator, we get the dimension of the training set by the left eigenvector #(K_Xin_X_or_Y.shape[1]) ** (-0.5)
     return np.linalg.multi_dot([rsqrt_dim * K_Xin_X_or_Y, vr_or_vl])
 
 
diff --git a/linear_operator_learning/kernel/utils.py b/linear_operator_learning/kernel/utils.py
index 2604aac..fe47189 100644
--- a/linear_operator_learning/kernel/utils.py
+++ b/linear_operator_learning/kernel/utils.py
@@ -6,6 +6,11 @@
 from numpy import ndarray
 from scipy.spatial.distance import pdist
 
+__all__ = [
+    "return_phi_dphi",
+    "return_dphi_dphi",
+]
+
 
 def topk(vec: ndarray, k: int):
     """Get the top k values from a Numpy array.
@@ -57,3 +62,84 @@ def _row_col_from_condensed_index(d, index):
     i = (-b - sqrt(b**2 - 8 * index)) // 2
     j = index + i * (b + i + 2) // 2 + 1
     return (int(i), int(j))
+
+
+def return_phi_dphi(
+    kernel_X: np.ndarray,
+    X: np.ndarray,
+    sigma: float,
+    friction: np.ndarray,
+):
+    r"""Returns the matrix :math:`N_{i,(k-1)n+j}=<\phi(x_i),d_k\phi(x_j)>` (matrix :math:`N` :footcite:t:`kostic2024learning`) only for
+    a GAUSSIAN kernel
+    where :math:`i = 1,\dots n, k=1, \dots d, j=1,\dots n`
+    and :math:`N` is the number of training points and :math:`d` is the dimensionality of the system.
+
+    Args:
+        kernel_X (np.ndarray): kernel matrix of the training data
+        X (np.ndarray): training data
+        sigma (float): length scale of the GAUSSIAN kernel
+        friction (np.ndarray): friction parameter of the physical model :math:`s(x)` in :footcite:t:`kostic2024learning`
+    Shape:
+        ``kernel_X``: :math:`(N, N)`, where :math:`N` is the number of training data.
+
+        ``X``: :math:`(N,d)`  where :math:`N` is the number of training data and :math:`d` the dimensionality of the system.
+
+        ``friction``: :math:`d`  where :math:`d` is the dimensionality of the system.
+
+    Output: :math:`<\phi(x_i),d_k\phi(x_j)>` of shape `N,Nd`, where :math:`N` is the number of training data and :math:`d` the dimension of the system.
+    """  # noqa: D205
+    difference = X[:, np.newaxis, :] - X[np.newaxis, :, :]
+    n = difference.shape[0]
+    d = X.shape[1]
+    N = np.zeros((n, n * d))
+    for i in range(n):
+        for j in range(n):
+            for k in range(0, d):
+                N[i, k * n + j] = (
+                    np.sqrt(friction[k]) * difference[i, j, k] * kernel_X[i, j] / sigma**2
+                )
+    return N
+
+
+def return_dphi_dphi(kernel_X: np.ndarray, X: np.ndarray, sigma: float, friction: np.ndarray):
+    r"""Returns the matrix :math:`M_{(k-1)n + i,(l-1)n+j}=<d_k\phi(x_i),d_l\phi(x_j)>` (matrix :math:`M` in :footcite:t:`kostic2024learning`) only for
+    a GAUSSIAN kernel
+    where :math:`i = 1,\dots n, k=1, \dots d, j=1,\dots n, l=1, \dots d`
+    and :math:`N` is the number of training points and :math:`d` is the dimensionality of the system.
+
+    Args:
+        kernel_X (np.ndarray): kernel matrix of the training data
+        X (np.ndarray): training data
+        sigma (float): length scale of the GAUSSIAN kernel
+        friction (np.ndarray): friction parameter of the physical model :math:`s(x)` in :footcite:t:`kostic2024learning`
+    Shape:
+        ``kernel_X``: :math:`(N, N)`, where :math:`N` is the number of training data.
+        ``X``: :math:`(N,d)`  where :math:`N` is the number of training data and `d` the dimensionality of the system.
+        ``friction``: :math:`d`  where :math:`d` is the dimensionality of the system
+    Returns: :math:`M_{(k-1)n + i,(l-1)n+j}=<d_k\phi(x_i),d_l\phi(x_j)>` of shape `N,Nd`, where :math:`N` is the number of training data and :math:`d` the dimension of the system.
+    """  # noqa: D205
+    difference = X[:, np.newaxis, :] - X[np.newaxis, :, :]
+
+    d = difference.shape[2]
+    n = difference.shape[0]
+    M = np.zeros((n * d, n * d))
+    for i in range(n):
+        for j in range(n):
+            for k in range(0, d):
+                for m in range(0, d):
+                    if m == k:
+                        M[(k) * n + i, (m) * n + j] = (
+                            friction[k]
+                            * (1 / sigma**2 - difference[i, j, k] ** 2 / sigma**4)
+                            * kernel_X[i, j]
+                        )
+                    else:
+                        M[(k) * n + i, (m) * n + j] = (
+                            np.sqrt(friction[k])
+                            * np.sqrt(friction[m])
+                            * (-difference[i, j, k] * difference[i, j, m] / sigma**4)
+                            * kernel_X[i, j]
+                        )
+
+    return M