From 67d4f454f23d22773e40a71a28a987d313c137b1 Mon Sep 17 00:00:00 2001
From: tsuiusi <ryantsui786@gmail.com>
Date: Tue, 10 Mar 2026 15:04:54 +0100
Subject: [PATCH 01/10] lmcache docker

---
 docker/Dockerfile.lmcache | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)
 create mode 100644 docker/Dockerfile.lmcache

diff --git a/docker/Dockerfile.lmcache b/docker/Dockerfile.lmcache
new file mode 100644
index 0000000..63146de
--- /dev/null
+++ b/docker/Dockerfile.lmcache
@@ -0,0 +1,17 @@
+ARG LMCACHE_VERSION=latest
+FROM lmcache/vllm-openai:${LMCACHE_VERSION}
+
+COPY . /opt/contextpilot
+WORKDIR /opt/contextpilot
+
+RUN pip install --no-cache-dir . && \
+    python3 -m contextpilot.install_hook
+
+ENV CONTEXTPILOT_INDEX_URL=http://localhost:8765
+EXPOSE 8000 8765
+
+COPY docker/entrypoint-vllm.sh /entrypoint.sh
+RUN chmod +x /entrypoint.sh
+
+ENTRYPOINT ["/entrypoint.sh"]
+CMD ["Qwen/Qwen2.5-7B-Instruct", "--enable-prefix-caching", "--kv-transfer-config", "{\"kv_connector\":\"LMCacheConnectorV1\",\"kv_role\":\"kv_both\"}"]

From 2e4d57dd6b52d766c8cbab0134fd5d228e1d50ec Mon Sep 17 00:00:00 2001
From: tsuiusi <ryantsui786@gmail.com>
Date: Tue, 10 Mar 2026 15:24:05 +0100
Subject: [PATCH 02/10] lmcache support

---
 docker/Dockerfile.lmcache | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/Dockerfile.lmcache b/docker/Dockerfile.lmcache
index 63146de..0038b76 100644
--- a/docker/Dockerfile.lmcache
+++ b/docker/Dockerfile.lmcache
@@ -4,7 +4,7 @@ FROM lmcache/vllm-openai:${LMCACHE_VERSION}
 COPY . /opt/contextpilot
 WORKDIR /opt/contextpilot
 
-RUN pip install --no-cache-dir . && \
+RUN pip install --no-cache-dir --break-system-packages . && \
     python3 -m contextpilot.install_hook
 
 ENV CONTEXTPILOT_INDEX_URL=http://localhost:8765

From 266a130185b4b15cf6d87baef9e71445ddd5843b Mon Sep 17 00:00:00 2001
From: tsuiusi <ryantsui786@gmail.com>
Date: Tue, 10 Mar 2026 15:59:08 +0100
Subject: [PATCH 03/10] fix

---
 docker/Dockerfile.lmcache | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docker/Dockerfile.lmcache b/docker/Dockerfile.lmcache
index 0038b76..0bc04b4 100644
--- a/docker/Dockerfile.lmcache
+++ b/docker/Dockerfile.lmcache
@@ -4,8 +4,8 @@ FROM lmcache/vllm-openai:${LMCACHE_VERSION}
 COPY . /opt/contextpilot
 WORKDIR /opt/contextpilot
 
-RUN pip install --no-cache-dir --break-system-packages . && \
-    python3 -m contextpilot.install_hook
+RUN /opt/venv/bin/pip install --no-cache-dir . && \
+    /opt/venv/bin/python3 -m contextpilot.install_hook
 
 ENV CONTEXTPILOT_INDEX_URL=http://localhost:8765
 EXPOSE 8000 8765

From dab83ec98dc7148e03f5989b961fd024759a393c Mon Sep 17 00:00:00 2001
From: tsuiusi <ryantsui786@gmail.com>
Date: Tue, 10 Mar 2026 16:05:28 +0100
Subject: [PATCH 04/10] Use venv python3 -m pip in LMCache Dockerfile

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 docker/Dockerfile.lmcache | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/Dockerfile.lmcache b/docker/Dockerfile.lmcache
index 0bc04b4..bdfe428 100644
--- a/docker/Dockerfile.lmcache
+++ b/docker/Dockerfile.lmcache
@@ -4,7 +4,7 @@ FROM lmcache/vllm-openai:${LMCACHE_VERSION}
 COPY . /opt/contextpilot
 WORKDIR /opt/contextpilot
 
-RUN /opt/venv/bin/pip install --no-cache-dir . && \
+RUN /opt/venv/bin/python3 -m pip install --no-cache-dir . && \
     /opt/venv/bin/python3 -m contextpilot.install_hook
 
 ENV CONTEXTPILOT_INDEX_URL=http://localhost:8765

From 88ca651a8276d95c09f273338e75350be6f939e8 Mon Sep 17 00:00:00 2001
From: tsuiusi <ryantsui786@gmail.com>
Date: Tue, 10 Mar 2026 16:19:22 +0100
Subject: [PATCH 05/10] Use system pip + direct script for LMCache Dockerfile

The lmcache/vllm-openai image has no pip in its venv.
System pip works but python3 -m contextpilot.install_hook
triggers the full package __init__.py import chain.
Run install_hook.py directly as a script to avoid this.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 docker/Dockerfile.lmcache | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docker/Dockerfile.lmcache b/docker/Dockerfile.lmcache
index bdfe428..5a513e5 100644
--- a/docker/Dockerfile.lmcache
+++ b/docker/Dockerfile.lmcache
@@ -4,8 +4,8 @@ FROM lmcache/vllm-openai:${LMCACHE_VERSION}
 COPY . /opt/contextpilot
 WORKDIR /opt/contextpilot
 
-RUN /opt/venv/bin/python3 -m pip install --no-cache-dir . && \
-    /opt/venv/bin/python3 -m contextpilot.install_hook
+RUN pip install --no-cache-dir --break-system-packages . && \
+    python3 contextpilot/install_hook.py
 
 ENV CONTEXTPILOT_INDEX_URL=http://localhost:8765
 EXPOSE 8000 8765

From e07c323ce7f1bfe01185f1be9ed713468190ff2f Mon Sep 17 00:00:00 2001
From: tsuiusi <ryantsui786@gmail.com>
Date: Tue, 10 Mar 2026 20:30:20 +0100
Subject: [PATCH 06/10] Install contextpilot into venv, not system Python

The lmcache image runs python3 from /opt/venv but had no pip there.
Use ensurepip to bootstrap pip into the venv, then install everything
into the same environment that runs at runtime.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 docker/Dockerfile.lmcache | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/docker/Dockerfile.lmcache b/docker/Dockerfile.lmcache
index 5a513e5..5ebef07 100644
--- a/docker/Dockerfile.lmcache
+++ b/docker/Dockerfile.lmcache
@@ -4,8 +4,9 @@ FROM lmcache/vllm-openai:${LMCACHE_VERSION}
 COPY . /opt/contextpilot
 WORKDIR /opt/contextpilot
 
-RUN pip install --no-cache-dir --break-system-packages . && \
-    python3 contextpilot/install_hook.py
+RUN /opt/venv/bin/python3 -m ensurepip && \
+    /opt/venv/bin/python3 -m pip install --no-cache-dir . && \
+    /opt/venv/bin/python3 contextpilot/install_hook.py
 
 ENV CONTEXTPILOT_INDEX_URL=http://localhost:8765
 EXPOSE 8000 8765

From 2421feccf87f577c0da2b09d748b8a12f260d2b9 Mon Sep 17 00:00:00 2001
From: tsuiusi <ryantsui786@gmail.com>
Date: Tue, 10 Mar 2026 20:50:59 +0100
Subject: [PATCH 07/10] docs

---
 docs/getting_started/docker.md | 39 ++++++++++++++++++++++++++++++++--
 1 file changed, 37 insertions(+), 2 deletions(-)

diff --git a/docs/getting_started/docker.md b/docs/getting_started/docker.md
index f48d0c3..289359b 100644
--- a/docs/getting_started/docker.md
+++ b/docs/getting_started/docker.md
@@ -40,8 +40,9 @@ Single container with both the engine and ContextPilot server.
 ### Build
 
 ```bash
-docker build -t contextpilot-sglang -f docker/Dockerfile.sglang .
-docker build -t contextpilot-vllm   -f docker/Dockerfile.vllm .
+docker build -t contextpilot-sglang  -f docker/Dockerfile.sglang .
+docker build -t contextpilot-vllm    -f docker/Dockerfile.vllm .
+docker build -t contextpilot-lmcache -f docker/Dockerfile.lmcache .
 ```
 
 Pin a specific engine version:
@@ -75,6 +76,40 @@ docker run --gpus all --ipc=host \
 
 Everything after the image name is passed to the engine. Defaults are `Qwen/Qwen3.5-2B` for both images.
 
+**vLLM + LMCache (KV cache CPU offloading):**
+
+[LMCache](https://github.com/LMCache/LMCache) offloads KV cache to CPU/disk so evicted prefixes can be restored without recomputation. ContextPilot works with LMCache out of the box — the `BlockPool` hook is unaffected.
+
+```bash
+docker build -t contextpilot-lmcache -f docker/Dockerfile.lmcache .
+```
+
+Pin a specific LMCache version:
+
+```bash
+docker build -t contextpilot-lmcache -f docker/Dockerfile.lmcache --build-arg LMCACHE_VERSION=latest .
+```
+
+Run:
+
+```bash
+docker run --gpus all --ipc=host \
+  -p 8000:8000 -p 8765:8765 \
+  -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN \
+  contextpilot-lmcache
+```
+
+Override the model or LMCache config:
+
+```bash
+docker run --gpus all --ipc=host \
+  -p 8000:8000 -p 8765:8765 \
+  -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN \
+  contextpilot-lmcache \
+  Qwen/Qwen3-4B --enable-prefix-caching \
+  --kv-transfer-config '{"kv_connector":"LMCacheConnectorV1","kv_role":"kv_both"}'
+```
+
 ## GPU Selection
 
 ```bash

From df76f6c41edae01310f132df360602ff9321be74 Mon Sep 17 00:00:00 2001
From: tsuiusi <ryantsui786@gmail.com>
Date: Tue, 10 Mar 2026 23:25:54 +0100
Subject: [PATCH 08/10] docs

---
 docs/getting_started/quickstart.md | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/docs/getting_started/quickstart.md b/docs/getting_started/quickstart.md
index 3365b08..3a2a33d 100644
--- a/docs/getting_started/quickstart.md
+++ b/docs/getting_started/quickstart.md
@@ -175,6 +175,19 @@ python -m vllm.entrypoints.openai.api_server \
     --enable-prefix-caching
 ```
 
+**vLLM + LMCache (optional KV cache CPU offloading):**
+
+[LMCache](https://github.com/LMCache/LMCache) offloads evicted KV cache to CPU/disk so prefixes can be restored without recomputation. Just install it and add the `--kv-transfer-config` flag — ContextPilot works with LMCache out of the box.
+
+```bash
+pip install lmcache
+python -m vllm.entrypoints.openai.api_server \
+    --model Qwen/Qwen3-4B \
+    --port 30000 \
+    --enable-prefix-caching \
+    --kv-transfer-config '{"kv_connector":"LMCacheConnectorV1","kv_role":"kv_both"}'
+```
+
 > **Note:** For eviction sync, prefix with `CONTEXTPILOT_INDEX_URL=http://localhost:8765`. This lets the inference engine notify ContextPilot when KV cache entries are evicted.
 
 ## Step 2: Start ContextPilot

From 1b698cdb53f73a390eb3fb4eebf92b4b7e46b917 Mon Sep 17 00:00:00 2001
From: tsuiusi <ryantsui786@gmail.com>
Date: Tue, 10 Mar 2026 23:29:14 +0100
Subject: [PATCH 09/10] docs

---
 README.md | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/README.md b/README.md
index 69c8521..0f15843 100644
--- a/README.md
+++ b/README.md
@@ -131,6 +131,22 @@ xcode-select --install    # one-time: provides clang++ to compile the native hoo
 
 More [detailed installation instructions](https://efficientcontext.github.io/contextpilot-docs/getting_started/installation) are available in the docs.
 
+---
+
+### LMCache (Optional KV Cache CPU Offloading)
+
+[LMCache](https://github.com/LMCache/LMCache) offloads evicted KV cache to CPU/disk so prefixes can be restored without recomputation. ContextPilot works with LMCache out of the box — just install it and add one flag:
+
+```bash
+pip install lmcache
+vllm serve Qwen/Qwen3-4B --enable-prefix-caching \
+    --kv-transfer-config '{"kv_connector":"LMCacheConnectorV1","kv_role":"kv_both"}'
+```
+
+Docker images are also available — see the [Quick Start](https://efficientcontext.github.io/contextpilot-docs/getting_started/quickstart) and [Docker guide](https://efficientcontext.github.io/contextpilot-docs/getting_started/docker) for details.
+
+---
+
 Docker images are also available for both all-in-one and standalone deployment. See the [Docker guide](https://efficientcontext.github.io/contextpilot-docs/getting_started/docker).
 
 ## Getting Started

From 4e2bab88d4f580b7711a4486836d1511453e070a Mon Sep 17 00:00:00 2001
From: tsuiusi <ryantsui786@gmail.com>
Date: Tue, 10 Mar 2026 23:37:08 +0100
Subject: [PATCH 10/10] removed unnecessary docs

---
 README.md | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/README.md b/README.md
index 0f15843..69c8521 100644
--- a/README.md
+++ b/README.md
@@ -131,22 +131,6 @@ xcode-select --install    # one-time: provides clang++ to compile the native hoo
 
 More [detailed installation instructions](https://efficientcontext.github.io/contextpilot-docs/getting_started/installation) are available in the docs.
 
----
-
-### LMCache (Optional KV Cache CPU Offloading)
-
-[LMCache](https://github.com/LMCache/LMCache) offloads evicted KV cache to CPU/disk so prefixes can be restored without recomputation. ContextPilot works with LMCache out of the box — just install it and add one flag:
-
-```bash
-pip install lmcache
-vllm serve Qwen/Qwen3-4B --enable-prefix-caching \
-    --kv-transfer-config '{"kv_connector":"LMCacheConnectorV1","kv_role":"kv_both"}'
-```
-
-Docker images are also available — see the [Quick Start](https://efficientcontext.github.io/contextpilot-docs/getting_started/quickstart) and [Docker guide](https://efficientcontext.github.io/contextpilot-docs/getting_started/docker) for details.
-
----
-
 Docker images are also available for both all-in-one and standalone deployment. See the [Docker guide](https://efficientcontext.github.io/contextpilot-docs/getting_started/docker).
 
 ## Getting Started