diff --git a/.github/workflows/python-wheel.yml b/.github/workflows/python-wheel.yml
index 6aa277f6..c60605f2 100644
--- a/.github/workflows/python-wheel.yml
+++ b/.github/workflows/python-wheel.yml
@@ -4,6 +4,16 @@ on:
   push:
     branches:
       - main  # Rebuild wheels on every commit to main
+  workflow_dispatch:
+    inputs:
+      channel:
+        description: "Wheel channel to publish"
+        required: true
+        default: "staging"
+        type: choice
+        options:
+          - staging
+          - production
 
 permissions:
   contents: write  # Needed for GITHUB_TOKEN to push
@@ -13,6 +23,7 @@ jobs:
     runs-on: ubuntu-latest
     env:
       PYTHON_VERSION: 3.12
+      PUBLISH_CHANNEL: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.channel || 'production' }}
 
     steps:
       # Checkout the repository
@@ -38,7 +49,7 @@ jobs:
       - name: Upload built wheels (optional)
         uses: actions/upload-artifact@v4
         with:
-          name: wheels
+          name: wheels-${{ env.PUBLISH_CHANNEL }}
           path: ./dist/*.whl
 
       # Publish wheels to orphan `wheels` branch
@@ -47,6 +58,7 @@ jobs:
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
         run: |
+          set -eu
 
           # Abort if no wheels were built
           if [ -z "$(ls -A ./dist/*.whl 2>/dev/null)" ]; then
@@ -54,7 +66,16 @@ jobs:
             exit 0
           fi
 
-          # Prepare fresh working directory for orphan branch
+          channel="${PUBLISH_CHANNEL}"
+          if [ "$channel" = "production" ]; then
+            channel_dir="wheels"
+          else
+            channel_dir="wheels-staging"
+          fi
+          echo "Publishing channel: $channel"
+          echo "Target directory: $channel_dir"
+
+          # Prepare working directory for the published branch
           rm -rf wheels-branch
           mkdir wheels-branch
           cd wheels-branch
@@ -64,24 +85,36 @@ jobs:
           git remote add origin https://x-access-token:${GITHUB_TOKEN}@github.com/${{ github.repository }}.git
           git fetch origin wheels || true
 
-          # Create orphan branch (separate history)
-          git checkout --orphan wheels
-          git reset --hard
+          # Reuse the existing published branch when present so multiple channels can coexist
+          if git ls-remote --exit-code --heads origin wheels >/dev/null 2>&1; then
+            git checkout -B wheels origin/wheels
+          else
+            git checkout --orphan wheels
+            git reset --hard
+          fi
 
-          # Copy wheels from main repo build output
-          mkdir -p wheels
-          cp ../dist/*.whl wheels/
+          # Replace only the selected channel contents
+          mkdir -p "$channel_dir"
+          find "$channel_dir" -maxdepth 1 -type f -name '*.whl' -delete
+          cp ../dist/*.whl "$channel_dir"/
           echo "Wheels to publish:"
-          ls -lh wheels/
+          ls -lh "$channel_dir"/
 
           # Generate latest.txt (name of newest wheel)
-          latest_wheel=$(ls -1 wheels/*.whl | sort | tail -n 1)
-          echo "$(basename $latest_wheel)" > wheels/latest.txt
-          echo "Latest wheel: $(cat wheels/latest.txt)"
+          latest_wheel=$(ls -1 "$channel_dir"/*.whl | sort | tail -n 1)
+          echo "$(basename "$latest_wheel")" > "$channel_dir/latest.txt"
+          echo "${{ github.sha }}" > "$channel_dir/commit.txt"
+          echo "${{ github.ref_name }}" > "$channel_dir/ref.txt"
+          echo "$channel" > "$channel_dir/channel.txt"
+          echo "Latest wheel: $(cat "$channel_dir/latest.txt")"
 
           # Commit and push
           git config user.name "GitHub Actions"
           git config user.email "actions@github.com"
-          git add wheels
-          git commit -m "Update wheels for commit ${{ github.sha }}"
+          git add "$channel_dir"
+          if git diff --cached --quiet; then
+            echo "No changes to publish"
+            exit 0
+          fi
+          git commit -m "Update ${channel} wheels for commit ${{ github.sha }}"
           git push origin wheels --force
diff --git a/README-DEVELOPERS.md b/README-DEVELOPERS.md
index 7ba755fe..4fc9c7bf 100644
--- a/README-DEVELOPERS.md
+++ b/README-DEVELOPERS.md
@@ -128,3 +128,8 @@ if sys.platform == "emscripten":
 ```
 
 This code is automatically loaded into jupyter notebooks via changes implemented in https://github.com/ironArray/Caterva2/commit/882d9fa930e573fdbc65d62b8dc90722670b8e9a.
+
+For pre-release browser testing, a separate staging wheel channel is also available at
+`https://ironarray.github.io/Caterva2/wheels-staging/latest.txt`.  The staging
+publishing flow and the recommended notebook override are documented in
+`RELEASING.rst`.
diff --git a/RELEASING.rst b/RELEASING.rst
index b1748e7e..6447c5c0 100644
--- a/RELEASING.rst
+++ b/RELEASING.rst
@@ -44,6 +44,75 @@ And experiment a bit with uploading, browsing and downloading files.
 If the tests pass, you are ready to release.
 
 
+Staging wheel channel
+---------------------
+
+Before publishing a production wheel for all JupyterLite users, you can publish
+a staging wheel to a separate GitHub-hosted channel.  This is useful for testing
+changes that affect the browser-side wheel installation, such as new Pyodide
+functionality or notebook helpers, without changing the production
+``wheels/latest.txt`` pointer.
+
+The wheel publishing workflow supports two channels:
+
+- production: ``https://ironarray.github.io/Caterva2/wheels/``
+- staging: ``https://ironarray.github.io/Caterva2/wheels-staging/``
+
+Each channel gets its own ``latest.txt`` file:
+
+- production: ``https://ironarray.github.io/Caterva2/wheels/latest.txt``
+- staging: ``https://ironarray.github.io/Caterva2/wheels-staging/latest.txt``
+
+The staging channel is published by manually running the
+``Build and Publish Python Wheels for Caterva2`` workflow with
+``channel=staging``.
+
+To do a staging release:
+
+- Push the branch you want to test to GitHub.
+
+- Open the workflow page for
+  ``Build and Publish Python Wheels for Caterva2``.
+
+- Click ``Run workflow``.
+
+- Select the branch to build.
+
+- Select ``channel=staging``.
+
+- Run the workflow.
+
+After it finishes, the built wheel will be available under
+``wheels-staging/`` and will not modify the production ``wheels/`` channel.
+
+The workflow also publishes these helper files in the selected channel:
+
+- ``latest.txt``: latest wheel filename in that channel
+- ``commit.txt``: commit SHA used to build the wheel
+- ``ref.txt``: Git ref name used to build the wheel
+- ``channel.txt``: published channel name
+
+Testing a staging wheel from JupyterLite
+----------------------------------------
+
+For notebook testing, point the Pyodide install to the staging channel instead
+of the production one.  For example::
+
+  import sys
+  if sys.platform == "emscripten":
+      import requests
+      import micropip
+
+      caterva_latest_url = "https://ironarray.github.io/Caterva2/wheels-staging/latest.txt"
+      caterva_wheel_name = requests.get(caterva_latest_url).text.strip()
+      caterva_wheel_url = f"https://ironarray.github.io/Caterva2/wheels-staging/{caterva_wheel_name}"
+      await micropip.install(caterva_wheel_url)
+      print(f"Installed staging wheel: {caterva_wheel_name}")
+
+Use a fresh browser tab or kernel when testing a new staging wheel, so Pyodide
+does not reuse a previously installed package from the same session.
+
+
 Check documentation
 -------------------
 
diff --git a/TESTING-LLM.md b/TESTING-LLM.md
new file mode 100644
index 00000000..56fac327
--- /dev/null
+++ b/TESTING-LLM.md
@@ -0,0 +1,174 @@
+# Testing the Server-Side LLM Integration from JupyterLite
+
+This document describes how to test the new server-side LLM feature from a
+JupyterLite notebook, using a staging Caterva2 wheel first so the production
+wheel channel is not affected.
+
+## 1. Publish a staging Caterva2 wheel
+
+1. Push the branch you want to test to GitHub.
+
+2. Open the GitHub Actions workflow:
+   `Build and Publish Python Wheels for Caterva2`
+
+3. Click `Run workflow`.
+
+4. Select the branch you want to test.
+
+5. Select `channel=staging`.
+
+6. Run the workflow and wait for it to finish successfully.
+
+## 2. Verify the staging wheel was published
+
+Check the staging wheel channel:
+
+- `https://ironarray.github.io/Caterva2/wheels-staging/latest.txt`
+
+Optional metadata checks:
+
+- `https://ironarray.github.io/Caterva2/wheels-staging/commit.txt`
+- `https://ironarray.github.io/Caterva2/wheels-staging/ref.txt`
+- `https://ironarray.github.io/Caterva2/wheels-staging/channel.txt`
+
+Make sure the commit and ref match the branch you intended to test.
+
+## 3. Start the Caterva2 server with the new backend
+
+The notebook wheel only provides the client-side Python package.  The server
+must also be running the new backend code from the same branch.
+
+Before testing, ensure:
+
+- the Caterva2 server is started from this branch
+- LLM support is enabled in the server configuration
+- the desired provider is configured
+- the required provider API key is available in the server environment if using
+  a real provider such as Groq
+
+If you only want a lightweight backend smoke test, you can also configure the
+server to use the `mock` provider.
+
+## 4. Point the notebook to the staging wheel
+
+In your JupyterLite test notebook, replace the Caterva2 production wheel
+bootstrap with the staging URL.
+
+Example:
+
+```python
+import sys
+
+if sys.platform == "emscripten":
+    import requests
+    import micropip
+
+    caterva_latest_url = (
+        "https://ironarray.github.io/Caterva2/wheels-staging/latest.txt"
+    )
+    caterva_wheel_name = requests.get(caterva_latest_url).text.strip()
+    caterva_wheel_url = (
+        f"https://ironarray.github.io/Caterva2/wheels-staging/{caterva_wheel_name}"
+    )
+    await micropip.install(caterva_wheel_url)
+    print(f"Installed staging wheel: {caterva_wheel_name}")
+```
+
+## 5. Open a fresh JupyterLite session
+
+Use a fresh browser tab or a fresh notebook kernel before testing.  This avoids
+reusing a previously installed `caterva2` wheel from the same Pyodide session.
+
+## 6. Open the LLM test notebook
+
+Open:
+
+- `_caterva2/state/personal/cd46395a-3517-4c48-baba-186d14b0fd94/prova3.ipynb`
+
+This notebook contains helper code for:
+
+- creating a server-side LLM session
+- sending prompts with `ask(...)`
+- resetting the session
+- deleting the session
+
+## 7. Run the notebook cells
+
+1. Run the bootstrap cell and confirm the staging Caterva2 wheel installs.
+
+2. Run the LLM setup cell and confirm it prints an LLM session id.
+
+## 8. Run smoke-test prompts
+
+Use the helper functions from the notebook to test the main flow:
+
+```python
+ask("List the available roots")
+ask("List datasets under @public/dir1")
+ask("Show metadata for @public/ds-1d.b2nd")
+ask("Show stats for @public/ds-1d.b2nd", show_trace=True)
+```
+
+Check that:
+
+- the response text is returned
+- the trace output lists the expected tool calls
+- metadata and stats look correct
+
+## 9. Test session lifecycle
+
+From the notebook, test:
+
+```python
+reset_agent_session()
+ask("List the available roots")
+delete_agent_session()
+new_agent_session()
+```
+
+Confirm that:
+
+- reset keeps the session usable
+- delete removes the current session
+- a new session can be created afterward
+
+## 10. Test authentication behavior
+
+If login is enabled on the server:
+
+- test from an authenticated JupyterLite session
+- confirm the LLM session can be created and used
+- confirm anonymous access is rejected when `llm_allow_public_access` is false
+
+## 11. Check server-side behavior
+
+While exercising the notebook, inspect the Caterva2 server logs and verify:
+
+- requests are reaching `/api/llm-agent/...`
+- the expected provider is being used
+- tool failures, auth failures, or provider errors are visible and readable
+
+## 12. After the staging test
+
+If the staging test passes:
+
+1. restore the notebook bootstrap to production URLs, unless you want to keep a
+   staging-only notebook
+2. publish the production wheel channel
+3. rerun the same notebook smoke tests against the production wheel
+
+## Quick checklist
+
+- branch pushed to GitHub
+- staging wheel published
+- staging wheel URLs verified
+- server started from the tested branch
+- LLM backend enabled on the server
+- provider config and API key verified
+- fresh JupyterLite session opened
+- notebook installs Caterva2 from `wheels-staging`
+- session creation works
+- prompts work
+- reset/delete/new session works
+- auth behavior is correct
+- server logs look good
diff --git a/caterva2-server.sample.toml b/caterva2-server.sample.toml
index fc1f1b16..6f69a2c4 100644
--- a/caterva2-server.sample.toml
+++ b/caterva2-server.sample.toml
@@ -19,3 +19,12 @@ urlbase = "http://localhost:8000"
 quota = "10G"
 maxusers = 5
 register = true  # allow users to register
+
+[server.llm]
+enabled = true
+# provider = "mock"
+provider = "groq"
+model = "openai/gpt-oss-20b"
+#model = "openai/gpt-oss-120b"
+allow_public_access = false
+session_ttl_seconds = 1800
diff --git a/caterva2/client.py b/caterva2/client.py
index 126a2a66..8545833a 100644
--- a/caterva2/client.py
+++ b/caterva2/client.py
@@ -875,6 +875,19 @@ def _post(self, url, json=None, auth_cookie=None, timeout=5):
         response.raise_for_status()
         return response.json()
 
+    def _delete(self, url, auth_cookie=None, timeout=5):
+        client = self.httpx_client
+        headers = {"Cookie": auth_cookie} if auth_cookie else None
+        try:
+            response = client.delete(url, headers=headers, timeout=timeout)
+        except httpx.ReadTimeout as e:
+            raise TimeoutError(
+                f"Timeout after {timeout} seconds while trying to access {url}. "
+                f"Try increasing the timeout (currently {timeout} s) for Client instance for large datasets."
+            ) from e
+        response.raise_for_status()
+        return response.json()
+
     def _xget(self, url, params=None, headers=None, timeout=5, auth_cookie=None):
         client = self.httpx_client
         # Only set Cookie header if auth_cookie is not None
@@ -1726,3 +1739,85 @@ def listusers(self, username=None):
         """
         url = f"{self.urlbase}/api/listusers/" + (f"?username={username}" if username else "")
         return self._get(url, auth_cookie=self.cookie)
+
+    def create_llm_session(self, name=None, root_hint=None, notebook_path=None):
+        """
+        Create a server-side LLM agent session.
+
+        Parameters
+        ----------
+        name : str, optional
+            Optional label for the session.
+        root_hint : str, optional
+            Optional root hint associated with the session.
+        notebook_path : str, optional
+            Optional notebook path for client context.
+
+        Returns
+        -------
+        dict
+            Session metadata as returned by the server.
+        """
+        payload = {"name": name, "root_hint": root_hint, "notebook_path": notebook_path}
+        payload = {k: v for k, v in payload.items() if v is not None}
+        return self._post(
+            f"{self.urlbase}/api/llm-agent/sessions",
+            payload,
+            auth_cookie=self.cookie,
+            timeout=self.timeout,
+        )
+
+    def get_llm_session(self, session_id):
+        """
+        Get metadata for a server-side LLM agent session.
+        """
+        return self._get(
+            f"{self.urlbase}/api/llm-agent/sessions/{session_id}",
+            auth_cookie=self.cookie,
+            timeout=self.timeout,
+        )
+
+    def chat_llm(self, session_id, message, context=None):
+        """
+        Submit a message to a server-side LLM agent session.
+
+        Parameters
+        ----------
+        session_id : str
+            Session identifier returned by ``create_llm_session``.
+        message : str
+            Prompt to send to the agent.
+        context : dict, optional
+            Optional client context metadata.
+
+        Returns
+        -------
+        dict
+            Assistant response payload.
+        """
+        return self._post(
+            f"{self.urlbase}/api/llm-agent/sessions/{session_id}/messages",
+            {"message": message, "context": context},
+            auth_cookie=self.cookie,
+            timeout=self.timeout,
+        )
+
+    def reset_llm_session(self, session_id):
+        """
+        Reset a server-side LLM agent session.
+        """
+        return self._post(
+            f"{self.urlbase}/api/llm-agent/sessions/{session_id}/reset",
+            auth_cookie=self.cookie,
+            timeout=self.timeout,
+        )
+
+    def delete_llm_session(self, session_id):
+        """
+        Delete a server-side LLM agent session.
+        """
+        return self._delete(
+            f"{self.urlbase}/api/llm-agent/sessions/{session_id}",
+            auth_cookie=self.cookie,
+            timeout=self.timeout,
+        )
diff --git a/caterva2/services/llm_agent/__init__.py b/caterva2/services/llm_agent/__init__.py
new file mode 100644
index 00000000..c02a26d1
--- /dev/null
+++ b/caterva2/services/llm_agent/__init__.py
@@ -0,0 +1 @@
+"""Server-side LLM agent integration for Caterva2."""
diff --git a/caterva2/services/llm_agent/config.py b/caterva2/services/llm_agent/config.py
new file mode 100644
index 00000000..1e932bcf
--- /dev/null
+++ b/caterva2/services/llm_agent/config.py
@@ -0,0 +1,51 @@
+"""Configuration helpers for the server-side LLM agent."""
+
+from __future__ import annotations
+
+import os
+
+from caterva2.services import settings
+
+SYSTEM_PROMPT = """You are a scientific dataset exploration assistant with access to a Caterva2 data server.
+
+The server stores N-dimensional compressed arrays (Blosc2/HDF5 format) organized as:
+- Roots: top-level data collections. Root names always start with '@' (e.g. '@public').
+- Datasets: individual arrays or files within a root, accessed as '@rootname/path/to/file'.
+
+PATH FORMAT RULES:
+- Always preserve the '@' prefix in root names.
+- Paths use '/' as separator.
+- When exploring, browse first, then inspect metadata, then compute statistics.
+
+AVAILABLE TOOLS:
+- list_roots
+- list_datasets
+- get_dataset_info
+- get_dataset_stats
+- get_slice
+
+RULES:
+1. Use tools only when needed.
+2. Be explicit about what came from the tool results.
+3. If a tool fails, explain the failure clearly.
+4. Stop after answering the user's request.
+5. When the user asks to inspect actual values or a subset of a dataset, use get_slice.
+6. For get_slice results with many elements (>100), present the summary first and offer to show full data if requested.
+"""
+
+
+def get_provider_name() -> str:
+    return settings.llm_provider
+
+
+def get_model_name() -> str:
+    return settings.llm_model
+
+
+def get_timeout() -> int:
+    return settings.llm_request_timeout
+
+
+def get_api_key() -> str | None:
+    envvar = settings.llm_api_key_envvar
+    return os.getenv(envvar) or os.getenv("GROQ_API_KEY")
diff --git a/caterva2/services/llm_agent/core.py b/caterva2/services/llm_agent/core.py
new file mode 100644
index 00000000..a7b7a028
--- /dev/null
+++ b/caterva2/services/llm_agent/core.py
@@ -0,0 +1,114 @@
+from __future__ import annotations
+
+import json
+import uuid
+
+from caterva2.services import settings
+
+from . import config, providers, sessions, tools
+from .schemas import AssistantPayload, ChatResponse, TracePayload, TraceToolCall, UsagePayload
+
+
+def owner_for_user(user) -> str:
+    return str(user.id) if user else "anonymous"
+
+
+def create_session(*, user, metadata: dict | None = None):
+    return sessions.registry.create(
+        owner=owner_for_user(user),
+        model=config.get_model_name(),
+        ttl_seconds=settings.llm_session_ttl_seconds,
+        system_prompt=config.SYSTEM_PROMPT,
+        max_sessions=settings.llm_max_concurrent_sessions,
+        metadata=metadata,
+    )
+
+
+def get_session(session_id: str, *, user):
+    return sessions.registry.get(session_id, owner_for_user(user), settings.llm_session_ttl_seconds)
+
+
+def reset_session(session_id: str, *, user):
+    return sessions.registry.reset(session_id, owner_for_user(user), settings.llm_session_ttl_seconds)
+
+
+def delete_session(session_id: str, *, user):
+    return sessions.registry.delete(session_id, owner_for_user(user))
+
+
+def _trim_messages(messages: list[dict]) -> list[dict]:
+    if not messages:
+        return []
+    return [messages[0]] + messages[1:][-settings.llm_max_history_messages :]
+
+
+def run_chat_turn(*, session_id: str, user, message: str) -> ChatResponse:
+    if len(message) > settings.llm_max_input_chars:
+        raise ValueError(f"Input too long: max {settings.llm_max_input_chars} chars")
+
+    session = get_session(session_id, user=user)
+    with session.lock:
+        if session.total_tokens_used > settings.llm_max_total_tokens:
+            raise RuntimeError("Token budget exceeded for this session; reset it before continuing")
+
+        session.messages.append({"role": "user", "content": message})
+        provider = providers.get_provider()
+        request_usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
+        trace_tool_calls: list[TraceToolCall] = []
+
+        for iteration in range(1, settings.llm_max_iterations + 1):
+            response = provider.complete(
+                messages=_trim_messages(session.messages),
+                tools=tools.TOOLS,
+                tool_choice="auto",
+                temperature=0.2,
+                max_tokens=1024,
+            )
+            for key in request_usage:
+                request_usage[key] += int(response.usage.get(key, 0) or 0)
+            session.total_tokens_used += int(response.usage.get("total_tokens", 0) or 0)
+
+            if not response.tool_calls:
+                assistant_text = response.content or "[No response from provider]"
+                session.messages.append({"role": "assistant", "content": assistant_text})
+                return ChatResponse(
+                    session_id=session.session_id,
+                    message_id=str(uuid.uuid4()),
+                    assistant=AssistantPayload(text=assistant_text, artifacts=[]),
+                    usage=UsagePayload(
+                        provider=config.get_provider_name(),
+                        model=session.model,
+                        prompt_tokens=request_usage["prompt_tokens"],
+                        completion_tokens=request_usage["completion_tokens"],
+                        total_tokens=request_usage["total_tokens"],
+                        session_total_tokens=session.total_tokens_used,
+                    ),
+                    trace=TracePayload(iterations=iteration, tool_calls=trace_tool_calls),
+                )
+
+            session.messages.append(
+                {
+                    "role": "assistant",
+                    "content": response.content,
+                    "tool_calls": [tool_call.as_api_dict() for tool_call in response.tool_calls],
+                }
+            )
+
+            for tool_call in response.tool_calls:
+                result = tools.execute_tool(tool_call.name, tool_call.arguments, user=user)
+                trace_tool_calls.append(
+                    TraceToolCall(name=tool_call.name, arguments=tool_call.arguments, ok=result["ok"])
+                )
+                session.messages.append(
+                    {
+                        "role": "tool",
+                        "tool_call_id": tool_call.id,
+                        "name": tool_call.name,
+                        "content": json.dumps(result),
+                    }
+                )
+
+            if session.total_tokens_used > settings.llm_max_total_tokens:
+                raise RuntimeError("Token budget exceeded for this session; reset it before continuing")
+
+        raise RuntimeError("Max iterations reached; please rephrase your request")
diff --git a/caterva2/services/llm_agent/providers.py b/caterva2/services/llm_agent/providers.py
new file mode 100644
index 00000000..7f714bd1
--- /dev/null
+++ b/caterva2/services/llm_agent/providers.py
@@ -0,0 +1,160 @@
+from __future__ import annotations
+
+import json
+import re
+import uuid
+from dataclasses import dataclass
+
+import httpx
+
+from . import config
+
+
+@dataclass
+class NormalizedToolCall:
+    id: str
+    name: str
+    arguments: dict
+
+    def as_api_dict(self) -> dict:
+        return {
+            "id": self.id,
+            "type": "function",
+            "function": {
+                "name": self.name,
+                "arguments": json.dumps(self.arguments),
+            },
+        }
+
+
+@dataclass
+class NormalizedResponse:
+    content: str
+    tool_calls: list[NormalizedToolCall]
+    usage: dict
+
+
+def _extract_path(text: str) -> str | None:
+    match = re.search(r"(@(?:public|shared|personal)(?:/[^\s,;]+)?)", text)
+    return match.group(1) if match else None
+
+
+def _extract_slice_spec(text: str) -> str | None:
+    match = re.search(
+        r"(?:(?:slice|values?)\s+(?:for|from)\s+.*?\s+)?((?:-?\d*:-?\d*:?-?\d*|-?\d+)(?:\s*,\s*(?:-?\d*:-?\d*:?-?\d*|-?\d+))+|(?:-?\d*:-?\d*:?-?\d*|-?\d+))",
+        text,
+    )
+    if not match:
+        return None
+    candidate = match.group(1).strip()
+    return candidate if any(ch.isdigit() for ch in candidate) else None
+
+
+class MockProvider:
+    name = "mock"
+
+    def complete(self, *, messages, tools, tool_choice, temperature, max_tokens) -> NormalizedResponse:
+        last_message = messages[-1]
+        if last_message["role"] == "tool":
+            tool_messages = []
+            for message in reversed(messages):
+                if message["role"] != "tool":
+                    break
+                tool_messages.append(message["content"])
+            tool_messages.reverse()
+            return NormalizedResponse(
+                content="\n".join(tool_messages),
+                tool_calls=[],
+                usage={"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
+            )
+
+        user_text = last_message.get("content", "")
+        lower = user_text.lower()
+        path = _extract_path(user_text)
+        tool_calls = []
+        if "roots" in lower:
+            tool_calls.append(NormalizedToolCall(str(uuid.uuid4()), "list_roots", {}))
+        elif any(keyword in lower for keyword in ("slice", "values", "rows", "elements")) and path:
+            arguments = {"path": path}
+            slice_spec = _extract_slice_spec(user_text)
+            if slice_spec:
+                arguments["slices"] = slice_spec
+            tool_calls.append(NormalizedToolCall(str(uuid.uuid4()), "get_slice", arguments))
+        elif "stats" in lower and path:
+            tool_calls.append(NormalizedToolCall(str(uuid.uuid4()), "get_dataset_stats", {"path": path}))
+        elif any(keyword in lower for keyword in ("info", "metadata")) and path:
+            tool_calls.append(NormalizedToolCall(str(uuid.uuid4()), "get_dataset_info", {"path": path}))
+        elif path:
+            tool_calls.append(NormalizedToolCall(str(uuid.uuid4()), "list_datasets", {"path": path}))
+        elif any(keyword in lower for keyword in ("list", "datasets")):
+            tool_calls.append(NormalizedToolCall(str(uuid.uuid4()), "list_roots", {}))
+
+        if tool_calls:
+            return NormalizedResponse(
+                content="",
+                tool_calls=tool_calls,
+                usage={"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
+            )
+
+        return NormalizedResponse(
+            content="No tool call was needed for that request.",
+            tool_calls=[],
+            usage={"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
+        )
+
+
+class GroqProvider:
+    name = "groq"
+    base_url = "https://api.groq.com/openai/v1/chat/completions"
+
+    def complete(self, *, messages, tools, tool_choice, temperature, max_tokens) -> NormalizedResponse:
+        api_key = config.get_api_key()
+        if not api_key:
+            raise RuntimeError("Missing provider API key for configured LLM provider")
+
+        payload = {
+            "model": config.get_model_name(),
+            "messages": messages,
+            "tools": tools,
+            "tool_choice": tool_choice,
+            "temperature": temperature,
+            "max_tokens": max_tokens,
+        }
+        headers = {
+            "Authorization": f"Bearer {api_key}",
+            "Content-Type": "application/json",
+        }
+        with httpx.Client(timeout=config.get_timeout()) as client:
+            response = client.post(self.base_url, json=payload, headers=headers)
+            response.raise_for_status()
+            data = response.json()
+
+        choice = data["choices"][0]["message"]
+        tool_calls = []
+        for item in choice.get("tool_calls") or []:
+            function = item.get("function") or {}
+            arguments = function.get("arguments") or "{}"
+            if isinstance(arguments, str):
+                arguments = json.loads(arguments or "{}")
+            tool_calls.append(
+                NormalizedToolCall(
+                    id=item.get("id", str(uuid.uuid4())),
+                    name=function.get("name", ""),
+                    arguments=arguments,
+                )
+            )
+
+        return NormalizedResponse(
+            content=choice.get("content") or "",
+            tool_calls=tool_calls,
+            usage=data.get("usage") or {},
+        )
+
+
+def get_provider():
+    provider = config.get_provider_name().lower()
+    if provider == "mock":
+        return MockProvider()
+    if provider == "groq":
+        return GroqProvider()
+    raise RuntimeError(f"Unsupported LLM provider: {provider}")
diff --git a/caterva2/services/llm_agent/schemas.py b/caterva2/services/llm_agent/schemas.py
new file mode 100644
index 00000000..3ed512a3
--- /dev/null
+++ b/caterva2/services/llm_agent/schemas.py
@@ -0,0 +1,86 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any
+
+from pydantic import BaseModel, ConfigDict, Field
+
+if TYPE_CHECKING:
+    from datetime import datetime
+
+
+class CreateSessionRequest(BaseModel):
+    name: str | None = None
+    root_hint: str | None = None
+    notebook_path: str | None = None
+
+
+class SessionMetadataResponse(BaseModel):
+    session_id: str
+    created_at: datetime
+    expires_at: datetime
+    model: str
+    owner: str
+    message_count: int
+
+
+class CreateSessionResponse(SessionMetadataResponse):
+    pass
+
+
+class ChatRequest(BaseModel):
+    message: str = Field(min_length=1)
+    context: dict[str, Any] | None = None
+
+
+class ArtifactPayload(BaseModel):
+    model_config = ConfigDict(extra="allow")
+
+    type: str
+
+
+class AssistantPayload(BaseModel):
+    text: str
+    artifacts: list[ArtifactPayload] = Field(default_factory=list)
+
+
+class UsagePayload(BaseModel):
+    provider: str
+    model: str
+    prompt_tokens: int = 0
+    completion_tokens: int = 0
+    total_tokens: int = 0
+    session_total_tokens: int = 0
+
+
+class TraceToolCall(BaseModel):
+    name: str
+    arguments: dict[str, Any] = Field(default_factory=dict)
+    ok: bool = True
+
+
+class TracePayload(BaseModel):
+    iterations: int
+    tool_calls: list[TraceToolCall] = Field(default_factory=list)
+
+
+class ChatResponse(BaseModel):
+    session_id: str
+    message_id: str
+    assistant: AssistantPayload
+    usage: UsagePayload
+    trace: TracePayload
+
+
+class ResetSessionResponse(BaseModel):
+    session_id: str
+    reset: bool
+    message_count: int
+
+
+class DeleteSessionResponse(BaseModel):
+    session_id: str
+    deleted: bool
+
+
+SessionMetadataResponse.model_rebuild(_types_namespace={"datetime": __import__("datetime").datetime})
+CreateSessionResponse.model_rebuild(_types_namespace={"datetime": __import__("datetime").datetime})
diff --git a/caterva2/services/llm_agent/sessions.py b/caterva2/services/llm_agent/sessions.py
new file mode 100644
index 00000000..55bf1285
--- /dev/null
+++ b/caterva2/services/llm_agent/sessions.py
@@ -0,0 +1,94 @@
+from __future__ import annotations
+
+import threading
+import uuid
+from dataclasses import dataclass, field
+from datetime import UTC, datetime, timedelta
+
+
+@dataclass
+class AgentSession:
+    session_id: str
+    owner: str
+    model: str
+    created_at: datetime
+    expires_at: datetime
+    messages: list[dict]
+    total_tokens_used: int = 0
+    metadata: dict = field(default_factory=dict)
+    lock: threading.RLock = field(default_factory=threading.RLock, repr=False)
+
+
+class SessionRegistry:
+    def __init__(self):
+        self._lock = threading.RLock()
+        self._sessions: dict[str, AgentSession] = {}
+
+    def _cleanup(self) -> None:
+        now = datetime.now(UTC)
+        expired = [sid for sid, session in self._sessions.items() if session.expires_at <= now]
+        for sid in expired:
+            self._sessions.pop(sid, None)
+
+    def create(
+        self,
+        *,
+        owner: str,
+        model: str,
+        ttl_seconds: int,
+        system_prompt: str,
+        max_sessions: int,
+        metadata: dict | None = None,
+    ) -> AgentSession:
+        with self._lock:
+            self._cleanup()
+            active_sessions = sum(1 for session in self._sessions.values() if session.owner == owner)
+            if active_sessions >= max_sessions:
+                raise RuntimeError("Maximum concurrent agent sessions reached")
+
+            now = datetime.now(UTC)
+            session_id = str(uuid.uuid4())
+            session = AgentSession(
+                session_id=session_id,
+                owner=owner,
+                model=model,
+                created_at=now,
+                expires_at=now + timedelta(seconds=ttl_seconds),
+                messages=[{"role": "system", "content": system_prompt}],
+                metadata=metadata or {},
+            )
+            self._sessions[session_id] = session
+            return session
+
+    def get(self, session_id: str, owner: str, ttl_seconds: int) -> AgentSession:
+        with self._lock:
+            self._cleanup()
+            session = self._sessions.get(session_id)
+            if session is None:
+                raise KeyError(session_id)
+            if session.owner != owner:
+                raise PermissionError(session_id)
+            session.expires_at = datetime.now(UTC) + timedelta(seconds=ttl_seconds)
+            return session
+
+    def reset(self, session_id: str, owner: str, ttl_seconds: int) -> AgentSession:
+        with self._lock:
+            session = self.get(session_id, owner, ttl_seconds)
+            system_prompt = session.messages[0]
+            session.messages = [system_prompt]
+            session.total_tokens_used = 0
+            return session
+
+    def delete(self, session_id: str, owner: str) -> bool:
+        with self._lock:
+            self._cleanup()
+            session = self._sessions.get(session_id)
+            if session is None:
+                raise KeyError(session_id)
+            if session.owner != owner:
+                raise PermissionError(session_id)
+            del self._sessions[session_id]
+            return True
+
+
+registry = SessionRegistry()
diff --git a/caterva2/services/llm_agent/tools.py b/caterva2/services/llm_agent/tools.py
new file mode 100644
index 00000000..9d68be35
--- /dev/null
+++ b/caterva2/services/llm_agent/tools.py
@@ -0,0 +1,315 @@
+from __future__ import annotations
+
+import json
+import pathlib
+from typing import Any
+
+import numpy as np
+
+from caterva2.services import server, srv_utils
+
+TOOLS = [
+    {
+        "type": "function",
+        "function": {
+            "name": "list_roots",
+            "description": "List the Caterva2 roots available to the current user.",
+            "parameters": {"type": "object", "properties": {}, "required": []},
+        },
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "list_datasets",
+            "description": "List datasets under a root or sub-path.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "path": {"type": "string"},
+                    "limit": {"type": "integer"},
+                    "offset": {"type": "integer"},
+                },
+                "required": ["path"],
+            },
+        },
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "get_dataset_info",
+            "description": "Get metadata for a specific dataset.",
+            "parameters": {
+                "type": "object",
+                "properties": {"path": {"type": "string"}},
+                "required": ["path"],
+            },
+        },
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "get_dataset_stats",
+            "description": "Compute summary statistics for a dataset.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "path": {"type": "string"},
+                    "stats": {"type": "array", "items": {"type": "string"}},
+                    "axis": {"type": "integer"},
+                },
+                "required": ["path"],
+            },
+        },
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "get_slice",
+            "description": (
+                "Retrieve a slice of data values from a dataset. "
+                "Use this when the user asks to inspect actual values rather than only metadata or statistics. "
+                "Limited to 10,000 elements maximum."
+            ),
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "path": {"type": "string"},
+                    "slices": {
+                        "type": "string",
+                        "description": (
+                            "Slice specification using Python syntax such as '0:10', "
+                            "'0:5, 0:3', ':, 0', or '0, :, 0:10'."
+                        ),
+                    },
+                },
+                "required": ["path"],
+            },
+        },
+    },
+]
+
+DEFAULT_STATS = ["min", "max", "mean", "std"]
+SUPPORTED_STATS = {"min", "max", "mean", "sum", "std", "var", "argmin", "argmax", "any", "all"}
+MAX_SLICE_ELEMENTS = 10_000
+SUMMARY_THRESHOLD = 100
+
+
+def _json_safe(value):
+    if hasattr(value, "model_dump"):
+        return _json_safe(value.model_dump())
+    if isinstance(value, dict):
+        return {str(key): _json_safe(item) for key, item in value.items()}
+    if isinstance(value, (list, tuple)):
+        return [_json_safe(item) for item in value]
+    if isinstance(value, np.ndarray):
+        return value.tolist()
+    if isinstance(value, np.integer):
+        return int(value)
+    if isinstance(value, np.floating):
+        return float(value)
+    if isinstance(value, np.bool_):
+        return bool(value)
+    if isinstance(value, (str, int, float, bool)) or value is None:
+        return value
+    return str(value)
+
+
+def list_roots(*, user):
+    roots = [root for root, _ in server.filter_roots(["@public", "@shared", "@personal"], user)]
+    return {"roots": roots}
+
+
+def list_datasets(*, user, path: str, limit: int = 50, offset: int = 0):
+    directory = server.get_writable_path(pathlib.Path(path), user)
+    if directory.is_file():
+        full_paths = [path]
+    else:
+        datasets = [
+            str(relpath.with_suffix("") if relpath.suffix == ".b2" else relpath)
+            for _, relpath in srv_utils.walk_files(directory)
+        ]
+        datasets = sorted(datasets)
+        full_paths = [f"{path}/{name}" for name in datasets]
+    page = full_paths[offset : offset + limit]
+    return {
+        "path": path,
+        "datasets": page,
+        "total": len(full_paths),
+        "offset": offset,
+        "has_more": offset + limit < len(full_paths),
+    }
+
+
+def get_dataset_info(*, user, path: str):
+    abspath = server.get_abspath(pathlib.Path(path), user)
+    if abspath.is_dir():
+        raise FileNotFoundError(f"Dataset not found: {path}")
+    return {"path": path, "info": srv_utils.read_metadata(abspath)}
+
+
+def get_dataset_stats(*, user, path: str, stats: list[str] | None = None, axis: int | None = None):
+    stats = stats or DEFAULT_STATS
+    invalid_stats = set(stats) - SUPPORTED_STATS
+    if invalid_stats:
+        raise ValueError(f"Unsupported statistics: {sorted(invalid_stats)}")
+
+    dataset = server.open_b2(server.get_abspath(pathlib.Path(path), user), pathlib.Path(path))
+    if not hasattr(dataset, "shape"):
+        raise TypeError(f"Target is not a dataset: {path}")
+
+    result = {
+        "path": path,
+        "shape": list(dataset.shape),
+        "dtype": str(dataset.dtype),
+        "axis": axis,
+        "stats": {},
+    }
+    for stat_name in stats:
+        method = getattr(dataset, stat_name)
+        result["stats"][stat_name] = _json_safe(method(axis=axis))
+    return result
+
+
+def _parse_slice_string(slice_str: str, shape: tuple) -> tuple:
+    parts = [p.strip() for p in slice_str.split(",")]
+    if len(parts) > len(shape):
+        raise ValueError(f"Too many dimensions in slice: got {len(parts)}, dataset has {len(shape)}")
+
+    result = []
+    for part in parts:
+        if part == "" or part == ":":
+            result.append(slice(None))
+        elif ":" in part:
+            components = part.split(":")
+            if len(components) == 2:
+                start = int(components[0]) if components[0] else None
+                stop = int(components[1]) if components[1] else None
+                result.append(slice(start, stop))
+            elif len(components) == 3:
+                start = int(components[0]) if components[0] else None
+                stop = int(components[1]) if components[1] else None
+                step = int(components[2]) if components[2] else None
+                result.append(slice(start, stop, step))
+            else:
+                raise ValueError(f"Invalid slice syntax: '{part}'")
+        else:
+            result.append(int(part))
+    return tuple(result)
+
+
+def _compute_slice_size(slices: tuple, shape: tuple) -> int:
+    size = 1
+    for i, s in enumerate(slices):
+        if i >= len(shape):
+            break
+        dim_size = shape[i]
+        if isinstance(s, int):
+            continue
+        if isinstance(s, slice):
+            start, stop, step = s.indices(dim_size)
+            length = len(range(start, stop, step))
+            size *= max(0, length)
+
+    for i in range(len(slices), len(shape)):
+        size *= shape[i]
+    return size
+
+
+def _default_slice_for_shape(shape: tuple, max_elements: int) -> tuple:
+    if len(shape) == 1:
+        return (slice(0, min(shape[0], max_elements)),)
+
+    dims = []
+    elements_per_dim = int(max_elements ** (1 / len(shape)))
+    for dim_size in shape:
+        dims.append(slice(0, min(dim_size, max(1, elements_per_dim))))
+    return tuple(dims)
+
+
+def _generate_preview(data, max_chars: int = 200) -> str:
+    full_str = str(data.tolist() if hasattr(data, "tolist") else data)
+    if len(full_str) <= max_chars:
+        return full_str
+    return full_str[:max_chars] + "..."
+
+
+def _compute_summary(data) -> dict[str, Any]:
+    arr = np.asarray(data)
+    summary = {
+        "num_elements": int(arr.size),
+        "preview": _generate_preview(arr),
+    }
+    if np.issubdtype(arr.dtype, np.number):
+        summary["min"] = _json_safe(arr.min())
+        summary["max"] = _json_safe(arr.max())
+        summary["mean"] = _json_safe(arr.mean())
+    return summary
+
+
+def get_slice(*, user, path: str, slices: str | None = None):
+    dataset = server.open_b2(server.get_abspath(pathlib.Path(path), user), pathlib.Path(path))
+    if not hasattr(dataset, "shape"):
+        raise TypeError(f"Target is not a dataset: {path}")
+
+    shape = dataset.shape
+    if slices is None:
+        slice_tuple = _default_slice_for_shape(shape, MAX_SLICE_ELEMENTS)
+        slice_str_used = str(slice_tuple)
+    else:
+        slice_tuple = _parse_slice_string(slices, shape)
+        slice_str_used = slices
+
+    estimated_size = _compute_slice_size(slice_tuple, shape)
+    if estimated_size > MAX_SLICE_ELEMENTS:
+        raise ValueError(
+            f"Requested slice would return ~{estimated_size:,} elements, exceeding limit of "
+            f"{MAX_SLICE_ELEMENTS:,}. Please request a smaller slice."
+        )
+
+    data = dataset[slice_tuple]
+    result_shape = list(data.shape) if hasattr(data, "shape") else []
+    summary = _compute_summary(data)
+    result = {
+        "path": path,
+        "dataset_shape": list(shape),
+        "dtype": str(dataset.dtype),
+        "slice": slice_str_used,
+        "result_shape": result_shape,
+        "summary": summary,
+    }
+    if summary["num_elements"] <= SUMMARY_THRESHOLD:
+        result["data"] = _json_safe(data)
+    else:
+        result["_hint"] = (
+            f"Large result ({summary['num_elements']} elements). "
+            "Present the summary to the user and offer to show full data if requested."
+        )
+    return result
+
+
+TOOL_MAP = {
+    "list_roots": list_roots,
+    "list_datasets": list_datasets,
+    "get_dataset_info": get_dataset_info,
+    "get_dataset_stats": get_dataset_stats,
+    "get_slice": get_slice,
+}
+
+
+def execute_tool(tool_name: str, tool_args: dict[str, Any], *, user) -> dict[str, Any]:
+    tool_function = TOOL_MAP.get(tool_name)
+    if tool_function is None:
+        return {"ok": False, "data": None, "error": {"code": "UNKNOWN_TOOL", "message": tool_name}}
+    try:
+        result = tool_function(user=user, **tool_args)
+        return {"ok": True, "data": _json_safe(result), "error": None}
+    except Exception as exc:
+        return {
+            "ok": False,
+            "data": None,
+            "error": {"code": type(exc).__name__.upper(), "message": str(exc)},
+        }
+
+
+def serialize_tool_result(result: dict[str, Any]) -> str:
+    return json.dumps(result)
diff --git a/caterva2/services/server.py b/caterva2/services/server.py
index 4a30eb50..a048a8fd 100644
--- a/caterva2/services/server.py
+++ b/caterva2/services/server.py
@@ -55,6 +55,7 @@
 # Project
 from caterva2 import hdf5, models, utils
 from caterva2.services import db, schemas, settings, srv_utils, users
+from caterva2.services.llm_agent import schemas as llm_schemas
 
 BASE_DIR = pathlib.Path(__file__).resolve().parent
 
@@ -1373,6 +1374,121 @@ async def add_notebook(
     return path
 
 
+def require_llm_user(user):
+    if user is None and settings.login and not settings.llm_allow_public_access:
+        raise srv_utils.raise_unauthorized("LLM agent access requires authentication")
+    return user
+
+
+def require_llm_enabled():
+    if not settings.llm_enabled:
+        raise fastapi.HTTPException(status_code=503, detail="LLM agent is disabled")
+
+
+def session_to_metadata_response(session):
+    return llm_schemas.SessionMetadataResponse(
+        session_id=session.session_id,
+        created_at=session.created_at,
+        expires_at=session.expires_at,
+        model=session.model,
+        owner=session.owner,
+        message_count=len(session.messages),
+    )
+
+
+@app.post("/api/llm-agent/sessions", response_model=llm_schemas.CreateSessionResponse)
+async def create_llm_session(
+    payload: llm_schemas.CreateSessionRequest,
+    user: db.User = Depends(optional_user),
+):
+    require_llm_enabled()
+    user = require_llm_user(user)
+    from caterva2.services.llm_agent import core
+
+    session = core.create_session(user=user, metadata=payload.model_dump(exclude_none=True))
+    return llm_schemas.CreateSessionResponse(**session_to_metadata_response(session).model_dump())
+
+
+@app.get("/api/llm-agent/sessions/{session_id}", response_model=llm_schemas.SessionMetadataResponse)
+async def get_llm_session(
+    session_id: str,
+    user: db.User = Depends(optional_user),
+):
+    require_llm_enabled()
+    user = require_llm_user(user)
+    from caterva2.services.llm_agent import core
+
+    try:
+        session = core.get_session(session_id, user=user)
+    except KeyError as exc:
+        raise fastapi.HTTPException(status_code=404, detail="LLM session not found") from exc
+    except PermissionError as exc:
+        raise fastapi.HTTPException(status_code=403, detail="LLM session belongs to another user") from exc
+    return session_to_metadata_response(session)
+
+
+@app.post("/api/llm-agent/sessions/{session_id}/messages", response_model=llm_schemas.ChatResponse)
+async def post_llm_message(
+    session_id: str,
+    payload: llm_schemas.ChatRequest,
+    user: db.User = Depends(optional_user),
+):
+    require_llm_enabled()
+    user = require_llm_user(user)
+    from caterva2.services.llm_agent import core
+
+    try:
+        return core.run_chat_turn(session_id=session_id, user=user, message=payload.message)
+    except KeyError as exc:
+        raise fastapi.HTTPException(status_code=404, detail="LLM session not found") from exc
+    except PermissionError as exc:
+        raise fastapi.HTTPException(status_code=403, detail="LLM session belongs to another user") from exc
+    except ValueError as exc:
+        raise fastapi.HTTPException(status_code=400, detail=str(exc)) from exc
+    except RuntimeError as exc:
+        detail = str(exc)
+        status_code = 503 if "provider" in detail.lower() or "api key" in detail.lower() else 400
+        raise fastapi.HTTPException(status_code=status_code, detail=detail) from exc
+
+
+@app.post("/api/llm-agent/sessions/{session_id}/reset", response_model=llm_schemas.ResetSessionResponse)
+async def reset_llm_session(
+    session_id: str,
+    user: db.User = Depends(optional_user),
+):
+    require_llm_enabled()
+    user = require_llm_user(user)
+    from caterva2.services.llm_agent import core
+
+    try:
+        session = core.reset_session(session_id, user=user)
+    except KeyError as exc:
+        raise fastapi.HTTPException(status_code=404, detail="LLM session not found") from exc
+    except PermissionError as exc:
+        raise fastapi.HTTPException(status_code=403, detail="LLM session belongs to another user") from exc
+    return llm_schemas.ResetSessionResponse(
+        session_id=session.session_id, reset=True, message_count=len(session.messages)
+    )
+
+
+@app.delete("/api/llm-agent/sessions/{session_id}", response_model=llm_schemas.DeleteSessionResponse)
+async def delete_llm_session(
+    session_id: str,
+    user: db.User = Depends(optional_user),
+):
+    require_llm_enabled()
+    user = require_llm_user(user)
+    from caterva2.services.llm_agent import core
+
+    try:
+        core.delete_session(session_id, user=user)
+    except KeyError as exc:
+        raise fastapi.HTTPException(status_code=404, detail="LLM session not found") from exc
+    except PermissionError as exc:
+        raise fastapi.HTTPException(status_code=403, detail="LLM session belongs to another user") from exc
+    return llm_schemas.DeleteSessionResponse(session_id=session_id, deleted=True)
+
+
 #
 # HTML interface
 #
diff --git a/caterva2/services/settings.py b/caterva2/services/settings.py
index 21ed6c49..ca270316 100644
--- a/caterva2/services/settings.py
+++ b/caterva2/services/settings.py
@@ -37,6 +37,19 @@ def parse_size(size):
 quota = parse_size(conf.get(".quota"))
 maxusers = conf.get(".maxusers")
 
+llm_enabled = conf.get(".llm.enabled", False)
+llm_provider = conf.get(".llm.provider", "groq")
+llm_model = conf.get(".llm.model", "openai/gpt-oss-120b")
+llm_api_key_envvar = conf.get(".llm.api_key_envvar", "GROQ_API_KEY")
+llm_max_iterations = conf.get(".llm.max_iterations", 10)
+llm_max_history_messages = conf.get(".llm.max_history_messages", 20)
+llm_max_total_tokens = conf.get(".llm.max_total_tokens", 50000)
+llm_request_timeout = conf.get(".llm.request_timeout", 30)
+llm_session_ttl_seconds = conf.get(".llm.session_ttl_seconds", 1800)
+llm_allow_public_access = conf.get(".llm.allow_public_access", not login)
+llm_max_concurrent_sessions = conf.get(".llm.max_concurrent_sessions", 20)
+llm_max_input_chars = conf.get(".llm.max_input_chars", 5000)
+
 
 # Not strictly necessary but useful for documentation
 statedir = None
diff --git a/caterva2/tests/caterva2-server-login.toml b/caterva2/tests/caterva2-server-login.toml
index 6428fe89..967d8059 100644
--- a/caterva2/tests/caterva2-server-login.toml
+++ b/caterva2/tests/caterva2-server-login.toml
@@ -2,3 +2,9 @@
 login = true
 register = true
 maxusers = 5
+
+[server.llm]
+enabled = true
+provider = "mock"
+allow_public_access = false
+session_ttl_seconds = 300
diff --git a/caterva2/tests/caterva2-server-nologin.toml b/caterva2/tests/caterva2-server-nologin.toml
index 071b3e28..08c5b5ce 100644
--- a/caterva2/tests/caterva2-server-nologin.toml
+++ b/caterva2/tests/caterva2-server-nologin.toml
@@ -1,4 +1,10 @@
 [server]
 login = false
+
+[server.llm]
+enabled = true
+provider = "mock"
+allow_public_access = true
+session_ttl_seconds = 300
 register = false
 maxusers = 5
diff --git a/caterva2/tests/test_api.py b/caterva2/tests/test_api.py
index 6d87cbf6..88c11756 100644
--- a/caterva2/tests/test_api.py
+++ b/caterva2/tests/test_api.py
@@ -72,6 +72,78 @@ def test_get_root(client, auth_client):
         assert myshared.urlbase == auth_client.urlbase
 
 
+def test_llm_session_lifecycle(client, auth_client, fill_public):
+    active_client = auth_client if auth_client else client
+
+    session = active_client.create_llm_session(name="pytest")
+    session_id = session["session_id"]
+    assert session["message_count"] == 1
+
+    metadata = active_client.get_llm_session(session_id)
+    assert metadata["session_id"] == session_id
+    assert metadata["message_count"] == 1
+
+    roots_reply = active_client.chat_llm(session_id, "List the available roots")
+    assert roots_reply["session_id"] == session_id
+    assert "@public" in roots_reply["assistant"]["text"]
+
+    datasets_reply = active_client.chat_llm(session_id, "List datasets under @public/dir1")
+    assert "@public/dir1/ds-2d.b2nd" in datasets_reply["assistant"]["text"]
+    assert "@public/dir1/ds-3d.b2nd" in datasets_reply["assistant"]["text"]
+
+    info_reply = active_client.chat_llm(session_id, "Show metadata for @public/ds-1d.b2nd")
+    assert '"path": "@public/ds-1d.b2nd"' in info_reply["assistant"]["text"]
+    assert '"dtype"' in info_reply["assistant"]["text"]
+
+    stats_reply = active_client.chat_llm(session_id, "Show stats for @public/ds-1d.b2nd")
+    assert '"min"' in stats_reply["assistant"]["text"]
+    assert '"max"' in stats_reply["assistant"]["text"]
+
+    slice_reply = active_client.chat_llm(session_id, "Show values 0:5 for @public/ds-1d.b2nd")
+    assert "0" in slice_reply["assistant"]["text"]
+    assert "4" in slice_reply["assistant"]["text"]
+    assert any(call["name"] == "get_slice" for call in slice_reply["trace"]["tool_calls"])
+
+    reset = active_client.reset_llm_session(session_id)
+    assert reset["reset"] is True
+    assert reset["message_count"] == 1
+
+    delete = active_client.delete_llm_session(session_id)
+    assert delete == {"session_id": session_id, "deleted": True}
+
+
+def test_llm_requires_auth_when_login_enabled(auth_client, services):
+    if not auth_client:
+        pytest.skip("authentication support needed")
+        return
+
+    response = httpx.post(f"{services.get_urlbase()}/api/llm-agent/sessions", json={"name": "anon"})
+    assert response.status_code == 401
+
+
+def test_llm_session_ownership(auth_client, services, fill_auth):
+    if not auth_client:
+        return pytest.skip("authentication support needed")
+
+    session = auth_client.create_llm_session(name="owner-check")
+    username = "llm-owner-check@example.com"
+    password = "foobar11"
+    auth_client.adduser(username, password)
+    try:
+        other_client = cat2.Client(services.get_urlbase(), (username, password))
+
+        response = httpx.get(
+            f"{services.get_urlbase()}/api/llm-agent/sessions/{session['session_id']}",
+            headers={"Cookie": other_client.cookie},
+        )
+        assert response.status_code == 403
+    finally:
+        with contextlib.suppress(Exception):
+            auth_client.delete_llm_session(session["session_id"])
+        with contextlib.suppress(Exception):
+            auth_client.deluser(username)
+
+
 def test_get_file(client, fill_public):
     myfile = client.get("@public/README.md")
     assert myfile.name == "README.md"
diff --git a/plans/ss-llm-agent.md b/plans/ss-llm-agent.md
new file mode 100644
index 00000000..645f4523
--- /dev/null
+++ b/plans/ss-llm-agent.md
@@ -0,0 +1,889 @@
+# Server-Side LLM Agent Integration Plan
+
+## Objective
+
+Integrate `Caterva2-LLM-Agent` into Caterva2 as a server-side capability exposed through FastAPI, with JupyterLite notebooks acting only as thin clients.
+
+The browser-side notebook must not hold provider API keys or execute provider SDK logic directly. All LLM calls and tool execution should happen in the Caterva2 server process or in a closely related server-side module.
+
+## Implementation Status
+
+Phase 1 is implemented in this branch.
+
+Delivered:
+
+- vendored server-side agent package under `caterva2/services/llm_agent/`
+- FastAPI endpoints under `/api/llm-agent/...`
+- in-memory TTL-backed session registry with per-session locking
+- read-only tool set:
+  - `list_roots`
+  - `list_datasets`
+  - `get_dataset_info`
+  - `get_dataset_stats`
+- provider abstraction with:
+  - `mock` provider for tests and local verification
+  - `groq` provider for real server-side calls
+- `caterva2.Client` helpers:
+  - `create_llm_session()`
+  - `get_llm_session()`
+  - `chat_llm()`
+  - `reset_llm_session()`
+  - `delete_llm_session()`
+- JupyterLite notebook example wiring in
+  `_caterva2/state/personal/cd46395a-3517-4c48-baba-186d14b0fd94/prova3.ipynb`
+- staging wheel channel support in the GitHub wheel workflow
+- release and developer documentation for the staging wheel flow
+
+Not implemented yet:
+
+- persisted agent sessions
+- streaming responses
+- richer artifacts beyond plain response payloads
+- a dedicated widget chat UI for the server-side agent
+- additional data-access and mutation tools
+
+## Why Server-Side
+
+- JupyterLite notebooks run in Pyodide in the browser, not in the server conda env.
+- Provider API keys must stay server-side.
+- The current `Caterva2-LLM-Agent` codebase assumes local Python imports, local filesystem logging, and direct provider access.
+- Caterva2 already has the right server and notebook plumbing to expose a new API and consume it from notebooks.
+
+## End State
+
+At the end of this work:
+
+- Caterva2 exposes authenticated FastAPI endpoints for agent sessions and chat turns.
+- The server owns provider credentials and LLM client instantiation.
+- Agent tools use Caterva2 server internals or the Caterva2 client library from the server side.
+- JupyterLite notebooks call the Caterva2 agent API with `fetch` or Python `requests/httpx` from Pyodide.
+- A reusable notebook helper or widget UI can be injected or imported without shipping provider secrets to the browser.
+- `Caterva2-LLM-Agent` becomes either:
+  - a vendored server module inside Caterva2, or
+  - an installable library consumed by Caterva2 with a clean package API.
+
+## Non-Goals
+
+- Running the agent provider SDK in Pyodide.
+- Exposing provider keys to users.
+- Reproducing the current notebook-local `ask()` pattern as the primary architecture.
+- Full multi-agent orchestration.
+- Arbitrary code execution by the agent.
+
+## Recommended Architecture
+
+## High-Level Model
+
+1. JupyterLite notebook sends a message to Caterva2 over HTTPS.
+2. FastAPI endpoint authenticates the user and resolves the agent session.
+3. Server-side agent loop calls the provider and executes Caterva2 tools.
+4. Server returns a structured response payload:
+   - assistant text
+   - optional structured artifacts
+   - usage and trace metadata
+5. Notebook renders the payload.
+
+## Session Ownership
+
+- Agent sessions should be scoped to the authenticated user.
+- Each session should have a server-generated `session_id`.
+- Session state should live outside the browser.
+- Session state should survive multiple notebook cells and page reloads if desired.
+
+Recommended persistence model:
+
+- Phase 1: in-memory session registry with TTL.
+- Phase 2: persisted session state in SQLite or the existing Caterva2 DB layer.
+
+## Integration Strategy
+
+Use Caterva2 as the integration host and adapt `Caterva2-LLM-Agent` into a library-like core.
+
+Recommended decomposition:
+
+- `agent core`
+  - provider-agnostic loop
+  - conversation state
+  - tool dispatch
+  - response shaping
+- `provider adapter`
+  - Groq or future providers
+  - reads credentials from server config/env
+- `Caterva2 tool adapter`
+  - list roots
+  - list datasets
+  - dataset info
+  - dataset stats
+  - future dataset access tools
+- `FastAPI transport`
+  - session endpoints
+  - chat endpoint
+  - error and auth handling
+- `notebook client`
+  - minimal helper for session creation and chat calls
+  - optional `ipywidgets` UI
+
+## Packaging Decision
+
+Two acceptable paths:
+
+### Option A: Vendor into Caterva2
+
+Move the reusable logic into something like:
+
+- `caterva2/services/llm_agent/`
+
+Pros:
+
+- simplest deployment
+- direct control over imports and config
+- easier access to Caterva2 internals
+
+Cons:
+
+- duplicates project ownership boundaries
+
+### Option B: Keep `Caterva2-LLM-Agent` as a dependency
+
+Refactor `../Caterva2-LLM-Agent` into a proper package and consume it from Caterva2.
+
+Pros:
+
+- cleaner project separation
+- reusable outside Caterva2
+
+Cons:
+
+- requires packaging cleanup first
+- introduces version coordination
+
+Recommendation: start with Option A for fastest delivery, then extract a reusable package later if needed.
+
+## Required Refactor in `Caterva2-LLM-Agent`
+
+Even if vendored, the current code needs structural cleanup before integration.
+
+### 1. Fix package imports
+
+Current files use same-directory imports such as:
+
+- `from config import ...`
+- `from tools import ...`
+- `from agent import ...`
+
+These need to become package-relative or be reorganized into explicit modules.
+
+### 2. Separate CLI and notebook glue from core agent logic
+
+The current repository mixes:
+
+- provider config
+- core loop
+- CLI entrypoint
+- notebook UI code
+
+Refactor into:
+
+- `core.py`
+- `providers.py`
+- `tools.py`
+- `schemas.py`
+- `session.py`
+- `api_models.py`
+- `cli.py` or notebook examples outside the core
+
+### 3. Remove direct `.env` assumptions from core
+
+Provider configuration must be injected by Caterva2 server config, not loaded implicitly from `find_dotenv()`.
+
+### 4. Remove thread dependency in the core API surface
+
+Server-side threads are fine if desired, but the public agent core should not require `ThreadPoolExecutor`.
+The orchestration layer should decide whether tool execution is:
+
+- sequential
+- threaded
+- async
+
+### 5. Replace filesystem logging assumptions
+
+Current rotating log file behavior is not ideal as a hardcoded library default.
+
+Use Caterva2 logging configuration instead:
+
+- logger namespaced under `caterva2.llm_agent`
+- no automatic filesystem writes unless explicitly configured
+
+## Caterva2 Server Changes
+
+## New Module Layout
+
+Recommended new server modules:
+
+- `caterva2/services/llm_agent/__init__.py`
+- `caterva2/services/llm_agent/config.py`
+- `caterva2/services/llm_agent/providers.py`
+- `caterva2/services/llm_agent/core.py`
+- `caterva2/services/llm_agent/tools.py`
+- `caterva2/services/llm_agent/sessions.py`
+- `caterva2/services/llm_agent/schemas.py`
+
+Optional:
+
+- `caterva2/services/llm_agent/notebook_client.py`
+- `caterva2/services/llm_agent/render.py`
+
+Actual phase 1 implementation note:
+
+- API route definitions were added directly to `caterva2/services/server.py`
+  instead of introducing a separate `router.py` module.
+
+## Server Configuration
+
+Add config keys for the agent to Caterva2 settings.
+
+Suggested settings:
+
+- `llm.enabled`
+- `llm.provider`
+- `llm.model`
+- `llm.api_key_envvar`
+- `llm.max_iterations`
+- `llm.max_history_messages`
+- `llm.max_total_tokens`
+- `llm.request_timeout`
+- `llm.session_ttl_seconds`
+- `llm.allow_public_access`
+- `llm.max_concurrent_sessions`
+- `llm.max_input_chars`
+
+Environment variables:
+
+- `CATERVA2_LLM_ENABLED`
+- `CATERVA2_LLM_PROVIDER`
+- `CATERVA2_LLM_MODEL`
+- `CATERVA2_LLM_API_KEY`
+
+If Groq is the first provider:
+
+- `GROQ_API_KEY`
+
+Recommendation: normalize into Caterva2 settings and only read provider env vars in one place.
+
+## FastAPI API Design
+
+## Base Path
+
+Suggested base path:
+
+- `/api/llm-agent`
+
+## Endpoints
+
+### `POST /api/llm-agent/sessions`
+
+Create a new session.
+
+Request:
+
+```json
+{
+  "name": "optional notebook session label",
+  "root_hint": "@personal"
+}
+```
+
+Response:
+
+```json
+{
+  "session_id": "uuid",
+  "created_at": "timestamp",
+  "expires_at": "timestamp",
+  "model": "configured-model"
+}
+```
+
+### `GET /api/llm-agent/sessions/{session_id}`
+
+Return session metadata and status.
+
+### `DELETE /api/llm-agent/sessions/{session_id}`
+
+Delete the session and its state.
+
+### `POST /api/llm-agent/sessions/{session_id}/messages`
+
+Submit a user turn and get the assistant response.
+
+Request:
+
+```json
+{
+  "message": "show me the datasets under @public/examples",
+  "stream": false,
+  "context": {
+    "notebook_path": "@personal/user/prova3.ipynb"
+  }
+}
+```
+
+Response:
+
+```json
+{
+  "session_id": "uuid",
+  "message_id": "uuid",
+  "assistant": {
+    "text": "I found ...",
+    "artifacts": []
+  },
+  "usage": {
+    "provider": "groq",
+    "model": "openai/gpt-oss-120b",
+    "prompt_tokens": 0,
+    "completion_tokens": 0,
+    "total_tokens": 0
+  },
+  "trace": {
+    "iterations": 2,
+    "tool_calls": [
+      {"name": "list_roots"},
+      {"name": "list_datasets", "path": "@public/examples"}
+    ]
+  }
+}
+```
+
+### `POST /api/llm-agent/sessions/{session_id}/reset`
+
+Clears conversation history but keeps the session.
+
+### `GET /api/llm-agent/sessions/{session_id}/history`
+
+Optional debugging/admin endpoint.
+
+Disable or restrict in production if needed.
+
+## Streaming
+
+Do not start with streaming.
+
+Phase 1:
+
+- synchronous request-response
+
+Phase 2:
+
+- SSE endpoint such as `POST /api/llm-agent/sessions/{session_id}/messages/stream`
+
+Streaming is useful, but it adds complexity in:
+
+- provider abstraction
+- notebook client rendering
+- proxy and timeout behavior
+
+## Auth and Authorization
+
+The agent endpoints must follow Caterva2 auth rules.
+
+Rules:
+
+- authenticated users can create and own personal sessions
+- session access must be restricted to the owning user
+- public anonymous access should be disabled by default
+- if enabled for demo mode, tools must be read-only and scoped to public roots
+
+Authorization checks must cover:
+
+- session creation
+- session lookup
+- message submission
+- reset and delete
+
+## Session Storage
+
+## Phase 1: In-Memory
+
+Implement a process-local session registry:
+
+- key: `session_id`
+- value:
+  - owner user id
+  - created timestamp
+  - expiry timestamp
+  - message history
+  - token counters
+  - tool trace metadata
+
+Pros:
+
+- fastest to implement
+
+Cons:
+
+- sessions disappear on restart
+- not suitable for multi-process horizontal scaling
+
+## Phase 2: Persistent
+
+Store session records and message history in the Caterva2 DB.
+
+Suggested tables:
+
+- `llm_agent_sessions`
+- `llm_agent_messages`
+- `llm_agent_tool_calls`
+
+Minimal fields:
+
+- session id
+- owner id
+- model
+- created at
+- updated at
+- expires at
+- status
+- serialized message history
+
+Recommendation:
+
+- keep the persistence format simple first
+- do not prematurely normalize every provider-specific detail
+
+## Tool Layer Design
+
+## Initial Tool Set
+
+Mirror the existing agent toolset first:
+
+- `list_roots`
+- `list_datasets`
+- `get_dataset_info`
+- `get_dataset_stats`
+
+## Tool Implementation Choice
+
+Prefer direct server-side Caterva2 integration over making the server call itself over HTTP.
+
+Preferred order:
+
+1. call existing server-side service/helpers directly where practical
+2. fall back to `caterva2.Client` against local server URL only if necessary
+
+Benefits of direct calls:
+
+- avoids extra HTTP hops
+- simpler auth propagation
+- easier performance control
+
+## Tool Contracts
+
+Every tool should return JSON-serializable data only.
+
+Tool results should be normalized into:
+
+```json
+{
+  "ok": true,
+  "data": {},
+  "error": null
+}
+```
+
+or:
+
+```json
+{
+  "ok": false,
+  "data": null,
+  "error": {
+    "code": "DATASET_NOT_FOUND",
+    "message": "..."
+  }
+}
+```
+
+This is preferable to returning ad hoc dicts with optional `"error"` keys.
+
+## Future Tool Set
+
+Do not ship these in phase 1 unless required:
+
+- dataset slicing
+- filter/where operations
+- plotting helpers
+- upload/mutate operations
+- notebook file manipulation
+
+Phase 1 should stay read-only.
+
+## Provider Abstraction
+
+Hide provider SDK details behind a small interface.
+
+Suggested interface:
+
+```python
+class ChatProvider:
+    def complete(
+        self, *, model, messages, tools, tool_choice, temperature, max_tokens
+    ): ...
+```
+
+Provider adapters:
+
+- `GroqProvider`
+- later `OpenAIProvider`
+- later `LocalProvider`
+
+The rest of Caterva2 should not know about provider SDK response classes.
+
+Normalize provider outputs into internal models:
+
+- `AssistantMessage`
+- `ToolCall`
+- `UsageInfo`
+
+## Data Models
+
+Create explicit Pydantic models for API boundaries and internal response shaping.
+
+Suggested request/response models:
+
+- `CreateSessionRequest`
+- `CreateSessionResponse`
+- `ChatRequest`
+- `ChatResponse`
+- `AssistantPayload`
+- `Artifact`
+- `UsagePayload`
+- `TracePayload`
+- `ResetSessionResponse`
+- `SessionMetadata`
+
+Do not pass raw provider objects across module boundaries.
+
+## Notebook Client Design
+
+## Phase 1: Simple Helper
+
+Provide a notebook helper cell or small Python helper module that:
+
+1. creates a session
+2. sends user prompts
+3. renders `assistant.text`
+4. optionally renders artifacts
+
+This can live as:
+
+- a notebook example under `examples/`
+- a small importable helper in Caterva2
+
+Actual phase 1 implementation note:
+
+- the notebook path chosen for the first integration is
+  `_caterva2/state/personal/cd46395a-3517-4c48-baba-186d14b0fd94/prova3.ipynb`
+- the helper uses `caterva2.Client(None)` from Pyodide and talks to the new
+  server endpoints through the newly added client methods
+
+## Phase 2: Widget UI
+
+Adapt the existing `caterva2_agent.ipynb` UI into a client of the FastAPI API.
+
+Important changes:
+
+- notebook no longer imports provider SDK
+- notebook no longer imports agent core
+- notebook only talks to Caterva2 HTTP endpoints
+- session state id is stored client-side, not message history
+
+## Notebook Bootstrapping
+
+Possible approaches:
+
+- inject a helper cell into notebooks served through JupyterLite
+- provide a ready-made notebook template
+- expose a static JS or Python helper that notebooks can import
+
+Recommendation:
+
+- keep the existing bootstrap cell for `blosc2` and `caterva2`
+- add a documented optional helper cell for the LLM client
+- avoid silently injecting too much notebook UI logic into every notebook
+
+## Error Handling
+
+The API must distinguish:
+
+- user input errors
+- auth errors
+- session ownership errors
+- provider failures
+- tool failures
+- timeout and rate limit errors
+
+Suggested HTTP mapping:
+
+- `400` invalid request
+- `401` unauthenticated
+- `403` forbidden session access
+- `404` session not found
+- `409` session state conflict
+- `422` validation error
+- `429` rate limit / concurrency limit
+- `502` provider upstream failure
+- `504` provider timeout
+
+Response bodies should include stable machine-readable codes.
+
+## Security Constraints
+
+Mandatory constraints for phase 1:
+
+- read-only tools only
+- no arbitrary Python execution
+- no shell access
+- no filesystem browsing outside Caterva2 permissions
+- no provider key exposure in any notebook payload
+- no session access across users
+- no unbounded prompt size or conversation growth
+
+Prompt and session guards:
+
+- max input chars
+- max iterations
+- max history length
+- max tool calls per turn
+- request timeout
+- per-user concurrency cap
+
+## Observability
+
+Add namespaced logging and basic metrics.
+
+Log fields:
+
+- user id
+- session id
+- request id
+- provider
+- model
+- latency
+- token usage
+- tool names
+- error code
+
+Avoid logging:
+
+- API keys
+- full sensitive prompts by default
+- raw dataset content unless explicitly needed for debug mode
+
+Recommended metrics:
+
+- sessions created
+- active sessions
+- chat requests
+- request latency
+- provider errors
+- tool errors
+- tokens used
+
+## Testing Plan
+
+## Unit Tests
+
+Add tests for:
+
+- provider adapter normalization
+- session lifecycle
+- tool dispatch
+- tool result schema
+- auth guards
+- prompt size and iteration limits
+- error mapping
+
+## API Tests
+
+Add FastAPI tests for:
+
+- create session
+- submit message
+- reset session
+- delete session
+- forbidden access by another user
+- anonymous access behavior
+- provider failure handling
+
+## Notebook Integration Tests
+
+Minimal integration coverage:
+
+- notebook helper can create a session
+- notebook helper can submit a prompt and render response
+
+This can be tested outside a full browser first by exercising the API and helper code.
+
+## Manual Validation
+
+Manual end-to-end checks:
+
+1. open JupyterLite notebook
+2. create agent session
+3. ask for roots
+4. ask for dataset listing
+5. ask for dataset metadata
+6. reset session
+7. verify another user cannot access the session
+
+Actual verification performed during implementation:
+
+- full local pytest run was confirmed green outside the sandbox
+- within the sandbox:
+  - the new modules were compiled with `py_compile`
+  - the agent core and tool flow were exercised inside the `blosc2` conda env
+    using the `mock` provider
+  - direct subprocess-backed HTTP pytest validation was blocked by local bind
+    restrictions in the sandbox environment
+
+## Implementation Phases
+
+## Phase 0: Refactor Preparation
+
+- decide vendored vs dependency path
+- isolate reusable code from `Caterva2-LLM-Agent`
+- define internal response models
+- define provider adapter interface
+
+Deliverable:
+
+- clean server-usable agent core module
+
+## Phase 1: Basic Server API
+
+- add server config
+- implement in-memory session registry
+- implement provider adapter
+- implement read-only tools
+- add `create session`, `message`, `reset`, `delete`
+- add API tests
+
+Deliverable:
+
+- FastAPI-backed agent usable from scripts or curl
+
+Status:
+
+- implemented
+
+## Phase 2: Notebook Client
+
+- implement simple notebook helper
+- create example notebook
+- document notebook usage
+- optionally adapt existing widget UI
+
+Deliverable:
+
+- working JupyterLite notebook integration with server-side chat
+
+Status:
+
+- partially implemented
+- a simple notebook helper flow is in place
+- the richer widget-based chat UI remains future work
+
+## Phase 3: Hardening
+
+- persistent session storage
+- structured logging and metrics
+- rate limiting and concurrency controls
+- timeout and retry policy tuning
+- stricter tool result schemas
+
+Deliverable:
+
+- production-capable service behavior
+
+Status:
+
+- not implemented
+
+## Phase 4: Extended Capabilities
+
+- streaming responses
+- richer artifacts
+- plotting support
+- dataset slice tools
+- admin/debug endpoints
+
+Deliverable:
+
+- improved UX, still within the same server-side model
+
+Status:
+
+- not implemented
+
+## Concrete File Targets in Caterva2
+
+Likely files to touch:
+
+- `caterva2/services/settings.py`
+- `caterva2/services/server.py`
+- `caterva2/services/db.py`
+- `caterva2/services/schemas.py`
+- `caterva2/services/llm_agent/*`
+- `caterva2/tests/test_api.py`
+- `caterva2/tests/services.py`
+- `examples/` or `_caterva2/state/public/` for notebook examples
+
+Optional:
+
+- `README-DEVELOPERS.md`
+- `README.md`
+- release notes
+
+## Key Design Choices to Resolve Early
+
+These should be decided before implementation starts:
+
+1. Vendor `Caterva2-LLM-Agent` into Caterva2 now, or keep it as an external dependency.
+2. Keep sessions in memory first, or invest immediately in DB persistence.
+3. Start with plain request-response only, or include streaming in the first API.
+4. Restrict phase 1 to read-only tools, which is strongly recommended.
+5. Whether the tool layer should use server internals directly or go through `caterva2.Client`.
+
+Recommended answers:
+
+1. Vendor now.
+2. In-memory first.
+3. No streaming in phase 1.
+4. Read-only only.
+5. Prefer direct server internals where practical.
+
+## Acceptance Criteria
+
+The first complete milestone is done when:
+
+- an authenticated user can create an agent session through FastAPI
+- a JupyterLite notebook can send a prompt to that session
+- the server calls the provider using server-side credentials
+- the agent can list roots and datasets and return structured answers
+- session reset and delete work
+- session ownership is enforced
+- no provider secret appears in notebook code or responses
+- tests cover the main API paths and auth boundaries
+
+## Suggested First Slice
+
+If implementing incrementally, the highest-value first slice is:
+
+1. vendor or copy the minimal agent core into `caterva2/services/llm_agent/`
+2. implement `POST /api/llm-agent/sessions`
+3. implement `POST /api/llm-agent/sessions/{id}/messages`
+4. support only `list_roots` and `list_datasets`
+5. create a simple notebook helper cell that calls those endpoints
+
+This is enough to validate the architecture before adding more tools and UI polish.
diff --git a/root-example/test-llm.ipynb b/root-example/test-llm.ipynb
new file mode 100644
index 00000000..5ab39c58
--- /dev/null
+++ b/root-example/test-llm.ipynb
@@ -0,0 +1,137 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b32d85c3",
+   "metadata": {
+    "caterva2_pyodide_bootstrap": true,
+    "trusted": true
+   },
+   "outputs": [],
+   "source": [
+    "# Install blosc2 and caterva2 in Pyodide environments (automatically added)\n",
+    "import sys\n",
+    "\n",
+    "if sys.platform == \"emscripten\":\n",
+    "    import micropip\n",
+    "    import requests\n",
+    "\n",
+    "    # Install latest blosc2\n",
+    "    blosc_latest_url = \"https://blosc.github.io/python-blosc2/wheels/latest.txt\"\n",
+    "    blosc_wheel_name = requests.get(blosc_latest_url).text.strip()\n",
+    "    blosc_wheel_url = f\"https://blosc.github.io/python-blosc2/wheels/{blosc_wheel_name}\"\n",
+    "    await micropip.install(blosc_wheel_url)\n",
+    "    print(f\"Installed {blosc_wheel_name} successfully!\")\n",
+    "\n",
+    "    # Install latest caterva2\n",
+    "    caterva_latest_url = \"https://ironarray.github.io/Caterva2/wheels/latest.txt\"\n",
+    "    caterva_wheel_name = requests.get(caterva_latest_url).text.strip()\n",
+    "    caterva_wheel_url = f\"https://ironarray.github.io/Caterva2/wheels/{caterva_wheel_name}\"\n",
+    "    await micropip.install(caterva_wheel_url)\n",
+    "    print(f\"Installed {caterva_wheel_name} successfully!\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e5380365-9013-42c3-9ee9-4be367c49bd6",
+   "metadata": {
+    "trusted": true
+   },
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "\n",
+    "import caterva2 as cat2\n",
+    "\n",
+    "# In JupyterLite/Pyodide, Caterva2 can use the same origin as the notebook.\n",
+    "client = cat2.Client(None)\n",
+    "\n",
+    "session = None\n",
+    "session_id = None\n",
+    "\n",
+    "\n",
+    "def new_agent_session(name=\"prova3\", notebook_path=\"prova3.ipynb\"):\n",
+    "    global session, session_id\n",
+    "    session = client.create_llm_session(name=name, notebook_path=notebook_path)\n",
+    "    session_id = session[\"session_id\"]\n",
+    "    print(f\"LLM session ready: {session_id}\")\n",
+    "    return session\n",
+    "\n",
+    "\n",
+    "def ask(message, *, show_trace=False):\n",
+    "    if not session_id:\n",
+    "        raise RuntimeError(\"No LLM session yet. Run new_agent_session() first.\")\n",
+    "\n",
+    "    response = client.chat_llm(session_id, message)\n",
+    "    print(response[\"assistant\"][\"text\"])\n",
+    "\n",
+    "    if show_trace:\n",
+    "        print(\"\\nTRACE:\")\n",
+    "        print(json.dumps(response[\"trace\"], indent=2))\n",
+    "\n",
+    "    return response\n",
+    "\n",
+    "\n",
+    "def reset_agent_session():\n",
+    "    if not session_id:\n",
+    "        raise RuntimeError(\"No LLM session yet. Run new_agent_session() first.\")\n",
+    "    result = client.reset_llm_session(session_id)\n",
+    "    print(f\"LLM session reset: {result['session_id']}\")\n",
+    "    return result\n",
+    "\n",
+    "\n",
+    "def delete_agent_session():\n",
+    "    global session, session_id\n",
+    "    if not session_id:\n",
+    "        raise RuntimeError(\"No LLM session yet. Run new_agent_session() first.\")\n",
+    "    result = client.delete_llm_session(session_id)\n",
+    "    print(f\"LLM session deleted: {result['session_id']}\")\n",
+    "    session = None\n",
+    "    session_id = None\n",
+    "    return result\n",
+    "\n",
+    "\n",
+    "new_agent_session()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f9170e13-8b9e-461b-b8b0-edc40e6d9014",
+   "metadata": {
+    "trusted": true
+   },
+   "outputs": [],
+   "source": [
+    "# Example prompts\n",
+    "# ask(\"List the available roots\")\n",
+    "# ask(\"List datasets under @public/dir1\")\n",
+    "# ask(\"Show metadata for @public/ds-1d.b2nd\")\n",
+    "# ask(\"Show stats for @public/ds-1d.b2nd\", show_trace=True)\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python (Pyodide)",
+   "language": "python",
+   "name": "python"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "python",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}