AbanteAI · biobootloader · Aug 22, 2025 · Aug 22, 2025 · Aug 22, 2025 · Aug 22, 2025
diff --git a/.github/workflows/format_and_lint.yml b/.github/workflows/format_and_lint.yml
@@ -12,13 +12,10 @@ jobs:
     steps:
     - uses: actions/checkout@v4
 
-    - name: Install uv
+    - name: Install uv and Setup Python Environment
       run: |
         curl -LsSf https://astral.sh/uv/install.sh | sh
-        echo "$HOME/.cargo/bin" >> $GITHUB_PATH
-
-    - name: Install Python and Dependencies using uv
-      run: |
+        export PATH="$HOME/.cargo/bin:$PATH"
         uv python install 3.12.3
         uv venv --python 3.12.3
         # Activate venv for this step to install into it
@@ -33,18 +30,21 @@ jobs:
 
     - name: Run ruff formatter
       run: |
+        export PATH="$HOME/.cargo/bin:$PATH"
         source .venv/bin/activate
         echo "Running ruff format check..."
         ruff format --check .
 
     - name: Run ruff linter
       run: |
+        export PATH="$HOME/.cargo/bin:$PATH"
         source .venv/bin/activate
         echo "Running ruff lint check..."
         ruff check .
 
     - name: Run type checking with pyright
       run: |
+        export PATH="$HOME/.cargo/bin:$PATH"
         source .venv/bin/activate
         echo "Running pyright type check..."
         pyright . # Check the whole project
diff --git a/benchmark_pipeline/benchmark_config.yaml b/benchmark_pipeline/benchmark_config.yaml
@@ -38,6 +38,7 @@ model_display_names:
   "openai/o4-mini": "o4-mini-medium"
   "openai/o3": "o3"
   "deepseek/deepseek-chat-v3-0324": "DeepSeek Chat v3-0324"
+  "deepseek/deepseek-chat-v3.1": "DeepSeek Chat v3.1"
   "deepseek/deepseek-r1": "DeepSeek R1"
   "deepseek/deepseek-r1-0528": "DeepSeek R1 0528"
   "google/gemini-2.5-flash-preview": "Gemini 2.5 Flash"

diff --git a/docs/cases.html b/docs/cases.html
diff --git a/docs/cases/deepseek_deepseek-chat-v3.1/aider_aider___init__.py.html b/docs/cases/deepseek_deepseek-chat-v3.1/aider_aider___init__.py.html
@@ -0,0 +1,51 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Case: aider/__init__.py - DeepSeek Chat v3.1</title>
+    <link rel="stylesheet" href="../../styles.css">
+</head>
+<body>
+    <header>
+        <h1>Case: aider/__init__.py</h1>
+        <h2>Model: DeepSeek Chat v3.1</h2>
+        <p><a href="../../models/deepseek_deepseek-chat-v3.1.html">All DeepSeek Chat v3.1 Cases</a> | <a href="../../cases.html">All Cases</a> | <a href="../../index.html">Home</a></p>
+    </header>
+    <main>
+        <section class="case-details">
+            <div class="case-info">
+                <h2>Benchmark Case Information</h2>
+                <p><strong>Model:</strong> DeepSeek Chat v3.1</p>
+                <p><strong>Status:</strong> <span class="failure">Failure</span></p>
+                <p><strong>Prompt Tokens:</strong> 59517</p>
+                <p><strong>Native Prompt Tokens:</strong> 61677</p>
+                <p><strong>Native Completion Tokens:</strong> 146</p>
+                <p><strong>Native Tokens Reasoning:</strong> 0</p>
+                <p><strong>Native Finish Reason:</strong> stop</p>
+                <p><strong>Cost:</strong> $0.0347844</p>
+            </div>
+
+            <div class="content-links">
+                <h2>View Content</h2>
+                <ul>
+                    <li><a href="../../content/deepseek_deepseek-chat-v3.1/aider_aider___init__.py/prompt.html" class="content-link">View Prompt</a></li>
+                    <li><a href="../../content/deepseek_deepseek-chat-v3.1/aider_aider___init__.py/expected.html" class="content-link">View Expected Output</a></li>
+                    <li><a href="../../content/deepseek_deepseek-chat-v3.1/aider_aider___init__.py/actual.html" class="content-link">View Actual Output</a></li>
+                </ul>
+            </div>
+
+            <div class="diff-section">
+                <h2>Diff (Expected vs Actual)</h2>
+                <div id="diff-output">
+                    <pre class="diff"><div></div><div>index 50f38daf5..3b7f2eb75 100644</div><div class="diff-header">--- a/aider_aider___init__.py_expectedoutput.txt (expected):tmp/tmp3k5p91ji_expected.txt	</div><div class="diff-header">+++ b/aider_aider___init__.py_extracted.txt (actual):tmp/tmp_auw40ik_actual.txt	</div><div class="diff-info">@@ -9,6 +9,7 @@ except Exception:</div><div>     __version__ = safe_version + "+import"</div><div> </div><div> if type(__version__) is not str:</div><div class="diff-added">+    __version__极速赛車开奖网开奖结果</div><div>     __version__ = safe_version + "+type"</div><div> else:</div><div>     try:</div><div></div></pre>
+                </div>
+            </div>
+        </section>
+    </main>
+    <footer>
+        <p>LoCoDiff-bench - <a href="https://github.com/AbanteAI/LoCoDiff-bench">GitHub Repository</a></p>
+    </footer>
+</body>
+</html>
+
diff --git a/docs/cases/deepseek_deepseek-chat-v3.1/aider_aider_analytics.py.html b/docs/cases/deepseek_deepseek-chat-v3.1/aider_aider_analytics.py.html
@@ -0,0 +1,51 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Case: aider/analytics.py - DeepSeek Chat v3.1</title>
+    <link rel="stylesheet" href="../../styles.css">
+</head>
+<body>
+    <header>
+        <h1>Case: aider/analytics.py</h1>
+        <h2>Model: DeepSeek Chat v3.1</h2>
+        <p><a href="../../models/deepseek_deepseek-chat-v3.1.html">All DeepSeek Chat v3.1 Cases</a> | <a href="../../cases.html">All Cases</a> | <a href="../../index.html">Home</a></p>
+    </header>
+    <main>
+        <section class="case-details">
+            <div class="case-info">
+                <h2>Benchmark Case Information</h2>
+                <p><strong>Model:</strong> DeepSeek Chat v3.1</p>
+                <p><strong>Status:</strong> <span class="success">Success</span></p>
+                <p><strong>Prompt Tokens:</strong> 24542</p>
+                <p><strong>Native Prompt Tokens:</strong> 25686</p>
+                <p><strong>Native Completion Tokens:</strong> 1718</p>
+                <p><strong>Native Tokens Reasoning:</strong> 0</p>
+                <p><strong>Native Finish Reason:</strong> stop</p>
+                <p><strong>Cost:</strong> $0.0065116</p>
+            </div>
+
+            <div class="content-links">
+                <h2>View Content</h2>
+                <ul>
+                    <li><a href="../../content/deepseek_deepseek-chat-v3.1/aider_aider_analytics.py/prompt.html" class="content-link">View Prompt</a></li>
+                    <li><a href="../../content/deepseek_deepseek-chat-v3.1/aider_aider_analytics.py/expected.html" class="content-link">View Expected Output</a></li>
+                    <li><a href="../../content/deepseek_deepseek-chat-v3.1/aider_aider_analytics.py/actual.html" class="content-link">View Actual Output</a></li>
+                </ul>
+            </div>
+
+            <div class="diff-section">
+                <h2>Diff (Expected vs Actual)</h2>
+                <div id="diff-output">
+                    <div class="success-message"><p>✓ No differences found (successful run)</p><p>Expected output matches the model output exactly.</p></div>
+                </div>
+            </div>
+        </section>
+    </main>
+    <footer>
+        <p>LoCoDiff-bench - <a href="https://github.com/AbanteAI/LoCoDiff-bench">GitHub Repository</a></p>
+    </footer>
+</body>
+</html>
+
diff --git a/docs/cases/deepseek_deepseek-chat-v3.1/aider_aider_args.py.html b/docs/cases/deepseek_deepseek-chat-v3.1/aider_aider_args.py.html
diff --git a/docs/cases/deepseek_deepseek-chat-v3.1/aider_aider_coders_editblock_coder.py.html b/docs/cases/deepseek_deepseek-chat-v3.1/aider_aider_coders_editblock_coder.py.html
@@ -0,0 +1,51 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Case: aider/coders/editblock_coder.py - DeepSeek Chat v3.1</title>
+    <link rel="stylesheet" href="../../styles.css">
+</head>
+<body>
+    <header>
+        <h1>Case: aider/coders/editblock_coder.py</h1>
+        <h2>Model: DeepSeek Chat v3.1</h2>
+        <p><a href="../../models/deepseek_deepseek-chat-v3.1.html">All DeepSeek Chat v3.1 Cases</a> | <a href="../../cases.html">All Cases</a> | <a href="../../index.html">Home</a></p>
+    </header>
+    <main>
+        <section class="case-details">
+            <div class="case-info">
+                <h2>Benchmark Case Information</h2>
+                <p><strong>Model:</strong> DeepSeek Chat v3.1</p>
+                <p><strong>Status:</strong> <span class="failure">Failure</span></p>
+                <p><strong>Prompt Tokens:</strong> 56338</p>
+                <p><strong>Native Prompt Tokens:</strong> 59166</p>
+                <p><strong>Native Completion Tokens:</strong> 4881</p>
+                <p><strong>Native Tokens Reasoning:</strong> 0</p>
+                <p><strong>Native Finish Reason:</strong> stop</p>
+                <p><strong>Cost:</strong> $0.015738</p>
+            </div>
+
+            <div class="content-links">
+                <h2>View Content</h2>
+                <ul>
+                    <li><a href="../../content/deepseek_deepseek-chat-v3.1/aider_aider_coders_editblock_coder.py/prompt.html" class="content-link">View Prompt</a></li>
+                    <li><a href="../../content/deepseek_deepseek-chat-v3.1/aider_aider_coders_editblock_coder.py/expected.html" class="content-link">View Expected Output</a></li>
+                    <li><a href="../../content/deepseek_deepseek-chat-v3.1/aider_aider_coders_editblock_coder.py/actual.html" class="content-link">View Actual Output</a></li>
+                </ul>
+            </div>
+
+            <div class="diff-section">
+                <h2>Diff (Expected vs Actual)</h2>
+                <div id="diff-output">
+                    <pre class="diff"><div></div><div>index 42fc8b445..4c3bc9e0d 100644</div><div class="diff-header">--- a/aider_aider_coders_editblock_coder.py_expectedoutput.txt (expected):tmp/tmpe9yhjeas_expected.txt	</div><div class="diff-header">+++ b/aider_aider_coders_editblock_coder.py_extracted.txt (actual):tmp/tmpqn8zcuy0_actual.txt	</div><div class="diff-info">@@ -81,13 +81,15 @@ class EditBlockCoder(Coder):</div><div> </div><div>         blocks = "block" if len(failed) == 1 else "blocks"</div><div> </div><div class="diff-removed">-        res = f"# {len(failed)} SEARCH/REPLACE {blocks} failed to match!\n"</div><div class="diff-added">+        res = f"# {len(failed)} SEARCH/REPLACE blocks failed to match!\n"</div><div>         for edit in failed:</div><div>             path, original, updated = edit</div><div> </div><div>             full_path = self.abs_root_path(path)</div><div>             content = self.io.read_text(full_path)</div><div> </div><div class="diff-added">+            #did_you_mean = find_similar_lines(original, content)</div><div class="diff-added">+</div><div>             res += f"""</div><div> ## SearchReplaceNoExactMatch: This SEARCH block failed to exactly match lines in {path}</div><div> <<<<<<< SEARCH</div><div class="diff-info">@@ -95,16 +97,6 @@ class EditBlockCoder(Coder):</div><div> {updated}>>>>>>> REPLACE</div><div> </div><div> """</div><div class="diff-removed">-            did_you_mean = find_similar_lines(original, content)</div><div class="diff-removed">-            if did_you_mean:</div><div class="diff-removed">-                res += f"""Did you mean to match some of these actual lines from {path}?</div><div class="diff-removed">-</div><div class="diff-removed">-{self.fence[0]}</div><div class="diff-removed">-{did_you_mean}</div><div class="diff-removed">-{self.fence[1]}</div><div class="diff-removed">-</div><div class="diff-removed">-"""</div><div class="diff-removed">-</div><div>             if updated in content and updated:</div><div>                 res += f"""Are you sure you need this SEARCH/REPLACE block?</div><div> The REPLACE lines are already in {path}!</div><div class="diff-info">@@ -119,7 +111,7 @@ The REPLACE lines are already in {path}!</div><div>             res += f"""</div><div> # The other {len(passed)} SEARCH/REPLACE {pblocks} were applied successfully.</div><div> Don't re-send them.</div><div class="diff-removed">-Just reply with fixed versions of the {blocks} above that failed to match.</div><div class="diff-added">+Just reply with fixed versions of the blocks above that failed to match.</div><div> """</div><div>         raise ValueError(res)</div><div> </div><div class="diff-info">@@ -255,6 +247,11 @@ def replace_part_with_missing_leading_whitespace(whole_lines, part_lines, replac</div><div>         part_lines = [p[num_leading:] if p.strip() else p for p in part_lines]</div><div>         replace_lines = [p[num_leading:] if p.strip() else p for p in replace_lines]</div><div> </div><div class="diff-added">+    # TODO: this logic needs to be fixed</div><div class="diff-added">+    # if the max outdent still leaves space</div><div class="diff-added">+    if all((not pline or pline[0].isspace()) for pline in part_lines):</div><div class="diff-added">+        return</div><div class="diff-added">+</div><div>     # can we find an exact match not including the leading whitespace</div><div>     num_part_lines = len(part_lines)</div><div> </div><div class="diff-info">@@ -379,6 +376,8 @@ def do_replace(fname, content, before_text, after_text, fence=None):</div><div>         new_content = content + after_text</div><div>     else:</div><div>         new_content = replace_most_similar_chunk(content, before_text, after_text)</div><div class="diff-added">+        if not new_content:</div><div class="diff-added">+            return</div><div> </div><div>     return new_content</div><div> </div><div class="diff-info">@@ -448,7 +447,7 @@ def find_original_update_blocks(content, fence=DEFAULT_FENCE, valid_fnames=None)</div><div>     while i < len(lines):</div><div>         line = lines[i]</div><div> </div><div class="diff-removed">-        # Check for shell code blocks</div><div class="diff-added">+        # Check for various shell code blocks</div><div>         shell_starts = [</div><div>             "```bash",</div><div>             "```sh",</div><div class="diff-info">@@ -605,6 +604,7 @@ def find_similar_lines(search_lines, content_lines, threshold=0.6):</div><div> </div><div>     best_ratio = 0</div><div>     best_match = None</div><div class="diff-added">+    best_match_i = 0</div><div> </div><div>     for i in range(len(content_lines) - len(search_lines) + 1):</div><div>         chunk = content_lines[i : i + len(search_lines)]</div><div></div></pre>
+                </div>
+            </div>
+        </section>
+    </main>
+    <footer>
+        <p>LoCoDiff-bench - <a href="https://github.com/AbanteAI/LoCoDiff-bench">GitHub Repository</a></p>
+    </footer>
+</body>
+</html>
+
diff --git a/docs/cases/deepseek_deepseek-chat-v3.1/aider_aider_coders_editblock_prompts.py.html b/docs/cases/deepseek_deepseek-chat-v3.1/aider_aider_coders_editblock_prompts.py.html
@@ -0,0 +1,51 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Case: aider/coders/editblock_prompts.py - DeepSeek Chat v3.1</title>
+    <link rel="stylesheet" href="../../styles.css">
+</head>
+<body>
+    <header>
+        <h1>Case: aider/coders/editblock_prompts.py</h1>
+        <h2>Model: DeepSeek Chat v3.1</h2>
+        <p><a href="../../models/deepseek_deepseek-chat-v3.1.html">All DeepSeek Chat v3.1 Cases</a> | <a href="../../cases.html">All Cases</a> | <a href="../../index.html">Home</a></p>
+    </header>
+    <main>
+        <section class="case-details">
+            <div class="case-info">
+                <h2>Benchmark Case Information</h2>
+                <p><strong>Model:</strong> DeepSeek Chat v3.1</p>
+                <p><strong>Status:</strong> <span class="success">Success</span></p>
+                <p><strong>Prompt Tokens:</strong> 35371</p>
+                <p><strong>Native Prompt Tokens:</strong> 37087</p>
+                <p><strong>Native Completion Tokens:</strong> 1936</p>
+                <p><strong>Native Tokens Reasoning:</strong> 0</p>
+                <p><strong>Native Finish Reason:</strong> stop</p>
+                <p><strong>Cost:</strong> $0.0089662</p>
+            </div>
+
+            <div class="content-links">
+                <h2>View Content</h2>
+                <ul>
+                    <li><a href="../../content/deepseek_deepseek-chat-v3.1/aider_aider_coders_editblock_prompts.py/prompt.html" class="content-link">View Prompt</a></li>
+                    <li><a href="../../content/deepseek_deepseek-chat-v3.1/aider_aider_coders_editblock_prompts.py/expected.html" class="content-link">View Expected Output</a></li>
+                    <li><a href="../../content/deepseek_deepseek-chat-v3.1/aider_aider_coders_editblock_prompts.py/actual.html" class="content-link">View Actual Output</a></li>
+                </ul>
+            </div>
+
+            <div class="diff-section">
+                <h2>Diff (Expected vs Actual)</h2>
+                <div id="diff-output">
+                    <div class="success-message"><p>✓ No differences found (successful run)</p><p>Expected output matches the model output exactly.</p></div>
+                </div>
+            </div>
+        </section>
+    </main>
+    <footer>
+        <p>LoCoDiff-bench - <a href="https://github.com/AbanteAI/LoCoDiff-bench">GitHub Repository</a></p>
+    </footer>
+</body>
+</html>
+