Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 83 additions & 0 deletions app/Commands/VectorizeCodeCommand.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
<?php

declare(strict_types=1);

namespace App\Commands;

use App\Services\CodeIndexerService;
use App\Services\SymbolIndexService;
use LaravelZero\Framework\Commands\Command;

use function Laravel\Prompts\error;
use function Laravel\Prompts\info;
use function Laravel\Prompts\note;

class VectorizeCodeCommand extends Command
{
protected $signature = 'vectorize-code
{repo : Repository identifier (e.g. local/pstrax-laravel)}
{--kind=* : Symbol kinds to include (e.g. class, method, function)}
{--language= : Filter by language (e.g. php, typescript)}';

protected $description = 'Vectorize tree-sitter symbols into Qdrant for semantic code search';

public function handle(SymbolIndexService $symbolIndex, CodeIndexerService $codeIndexer): int
{
$repo = $this->argument('repo');
if (! is_string($repo)) {
error('Repository argument is required.');

return self::FAILURE;
}

$home = getenv('HOME') !== false ? (string) getenv('HOME') : '/tmp';
$indexPath = "{$home}/.code-index/".str_replace('/', '-', $repo).'.json';

if (! file_exists($indexPath)) {
error("Index not found at {$indexPath}. Run index-code first.");

return self::FAILURE;
}

if (! $codeIndexer->ensureCollection()) {
error('Failed to create/verify Qdrant code collection.');

return self::FAILURE;
}

/** @var array<string> $kinds */
$kinds = $this->option('kind');
$language = $this->option('language');
$language = is_string($language) ? $language : null;

$label = $repo;
if ($kinds !== []) {
$label .= ' ('.implode(', ', $kinds).')';
}
if ($language !== null) {
$label .= " [{$language}]";
}

info("Vectorizing symbols from {$label}");

$lastReport = 0;
$result = $codeIndexer->vectorizeFromIndex(
$indexPath,
$repo,
$symbolIndex,
$kinds,
$language,
function (int $success, int $failed, int $total) use (&$lastReport): void {
$done = $success + $failed;
if ($done - $lastReport >= 100 || $done === $total) {
$lastReport = $done;
note("{$done}/{$total} processed ({$success} ok, {$failed} failed)");
}
},
);

info("Done: {$result['success']}/{$result['total']} symbols vectorized, {$result['failed']} failed");

return self::SUCCESS;
}
}
2 changes: 2 additions & 0 deletions app/Mcp/Servers/KnowledgeServer.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
use App\Mcp\Tools\CorrectTool;
use App\Mcp\Tools\RecallTool;
use App\Mcp\Tools\RememberTool;
use App\Mcp\Tools\SearchCodeTool;
use App\Mcp\Tools\StatsTool;
use Laravel\Mcp\Server;
use Laravel\Mcp\Server\Attributes\Instructions;
Expand All @@ -25,6 +26,7 @@ class KnowledgeServer extends Server
CorrectTool::class,
ContextTool::class,
StatsTool::class,
SearchCodeTool::class,
];

protected array $resources = [];
Expand Down
85 changes: 85 additions & 0 deletions app/Mcp/Tools/SearchCodeTool.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
<?php

declare(strict_types=1);

namespace App\Mcp\Tools;

use App\Services\CodeIndexerService;
use Illuminate\Contracts\JsonSchema\JsonSchema;
use Laravel\Mcp\Request;
use Laravel\Mcp\Response;
use Laravel\Mcp\Server\Attributes\Description;
use Laravel\Mcp\Server\Tool;
use Laravel\Mcp\Server\Tools\Annotations\IsIdempotent;
use Laravel\Mcp\Server\Tools\Annotations\IsReadOnly;

#[Description('Semantic code search across indexed repositories. Search by natural language to find classes, methods, functions, and their source code.')]
#[IsReadOnly]
#[IsIdempotent]
class SearchCodeTool extends Tool
{
public function __construct(
private readonly CodeIndexerService $codeIndexer,
) {}

public function handle(Request $request): Response
{
/** @var string $query */
$query = $request->get('query');

if (! is_string($query) || strlen($query) < 2) {
return Response::error('A search query of at least 2 characters is required.');
}

$limit = is_int($request->get('limit')) ? min($request->get('limit'), 20) : 10;

$filters = array_filter([
'repo' => is_string($request->get('repo')) ? $request->get('repo') : null,
'language' => is_string($request->get('language')) ? $request->get('language') : null,
]);

$results = $this->codeIndexer->search($query, $limit, $filters);

if ($results === []) {
return Response::text(json_encode([
'results' => [],
'meta' => ['query' => $query, 'total' => 0],
], JSON_THROW_ON_ERROR));
}

$formatted = array_map(fn (array $r): array => [
'filepath' => $r['filepath'],
'repo' => $r['repo'],
'language' => $r['language'],
'symbol_name' => $r['symbol_name'] ?? null,
'symbol_kind' => $r['symbol_kind'] ?? null,
'line' => $r['start_line'],
'score' => round($r['score'], 3),
'content' => $r['content'],
], $results);

return Response::text(json_encode([
'results' => $formatted,
'meta' => [
'query' => $query,
'total' => count($formatted),
],
], JSON_THROW_ON_ERROR));
}

public function schema(JsonSchema $schema): array
{
return [
'query' => $schema->string()
->description('Natural language query (e.g., "rate limiting middleware", "database migration logic")')
->required(),
'repo' => $schema->string()
->description('Filter to a specific repo (e.g., "local/pstrax-laravel").'),
'language' => $schema->string()
->description('Filter by language (php, typescript, javascript, python).'),
'limit' => $schema->integer()
->description('Max results (default 10, max 20).')
->default(10),
];
}
}
156 changes: 154 additions & 2 deletions app/Services/CodeIndexerService.php
Original file line number Diff line number Diff line change
Expand Up @@ -204,12 +204,164 @@ public function search(string $query, int $limit = 10, array $filters = []): arr
'content' => $payload['content'] ?? '',
'score' => $result['score'] ?? 0.0,
'functions' => $payload['functions'] ?? [],
'start_line' => $payload['start_line'] ?? 1,
'end_line' => $payload['end_line'] ?? 1,
'symbol_name' => $payload['symbol_name'] ?? null,
'symbol_kind' => $payload['symbol_kind'] ?? null,
'signature' => $payload['signature'] ?? null,
'start_line' => $payload['start_line'] ?? $payload['line'] ?? 1,
'end_line' => $payload['end_line'] ?? $payload['line'] ?? 1,
];
}, $results);
}

/**
* Index a single tree-sitter symbol into Qdrant.
*
* @return array{success: bool, error?: string}
*/
public function indexSymbol(
string $text,
string $filepath,
string $repo,
string $language,
string $symbolName,
string $symbolKind,
int $line,
string $signature,
): array {
$vector = $this->embeddingService->generate($text);

if ($vector === []) {
return ['success' => false, 'error' => 'Empty embedding'];
}

$id = md5("{$repo}:{$filepath}:{$symbolName}:{$line}");

$points = [[
'id' => $id,
'vector' => $vector,
'payload' => [
'filepath' => $filepath,
'repo' => $repo,
'language' => $language,
'symbol_name' => $symbolName,
'symbol_kind' => $symbolKind,
'line' => $line,
'signature' => $signature,
'content' => mb_substr($text, 0, 4000),
'indexed_at' => now()->toIso8601String(),
],
]];

$response = $this->connector->send(new UpsertPoints(self::COLLECTION_NAME, $points));

return $response->successful()
? ['success' => true]
: ['success' => false, 'error' => 'Upsert failed'];
}

/**
* Batch-vectorize symbols from a tree-sitter index file.
*
* @param array<string> $kinds Symbol kinds to include (empty = all structural kinds)
* @param callable(int $success, int $failed, int $total): void $onProgress
* @return array{success: int, failed: int, total: int}
*/
public function vectorizeFromIndex(
string $indexPath,
string $repo,
SymbolIndexService $symbolIndex,
array $kinds = [],
?string $language = null,
?callable $onProgress = null,
): array {
$content = file_get_contents($indexPath);
if ($content === false) {
return ['success' => 0, 'failed' => 0, 'total' => 0];
}

/** @var array{symbols: array<array<string, mixed>>}|null $index */
$index = json_decode($content, true);
if (! is_array($index) || ! isset($index['symbols'])) {
return ['success' => 0, 'failed' => 0, 'total' => 0];
}

$allowedKinds = $kinds !== [] ? $kinds : ['class', 'method', 'function', 'interface', 'trait', 'enum'];

$symbols = array_values(array_filter(
$index['symbols'],
function (array $s) use ($allowedKinds, $language): bool {
if (! in_array($s['kind'] ?? '', $allowedKinds, true)) {
return false;
}
if ($language !== null) {
$ext = strtolower(pathinfo($s['file'] ?? '', PATHINFO_EXTENSION));
$fileLang = $this->detectLanguage($ext);
if ($fileLang !== $language) {
return false;
}
}

return true;
},
));

$total = count($symbols);
$success = 0;
$failed = 0;

foreach ($symbols as $symbol) {
$text = $this->buildSymbolText($symbol);
if (trim($text) === '') {
$failed++;

continue;
}

$source = $symbolIndex->getSymbolSource($symbol['id'] ?? '', $repo);
if ($source !== null) {
$text .= "\n".$source;
}

$ext = strtolower(pathinfo($symbol['file'] ?? '', PATHINFO_EXTENSION));
$symbolLanguage = $this->detectLanguage($ext);

$result = $this->indexSymbol(
text: $text,
filepath: $symbol['file'] ?? '',
repo: $repo,
language: $symbolLanguage,
symbolName: $symbol['name'] ?? '',
symbolKind: $symbol['kind'] ?? '',
line: (int) ($symbol['line'] ?? 0),
signature: $symbol['signature'] ?? '',
);

$result['success'] ? $success++ : $failed++;

if ($onProgress !== null) {
$onProgress($success, $failed, $total);
}
}

return ['success' => $success, 'failed' => $failed, 'total' => $total];
}

/**
* Build searchable text from a tree-sitter symbol.
*
* @param array<string, mixed> $symbol
*/
private function buildSymbolText(array $symbol): string
{
return implode("\n", array_filter([
($symbol['kind'] ?? '').' '.($symbol['name'] ?? ''),
$symbol['signature'] ?? '',
$symbol['summary'] ?? '',
$symbol['docstring'] ?? '',
isset($symbol['file']) ? 'file: '.$symbol['file'] : '',
]));
}

/**
* Chunk content into smaller pieces.
*
Expand Down
5 changes: 3 additions & 2 deletions app/Services/SymbolIndexService.php
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ public function indexFolder(string $path, bool $incremental = false): array
$incrementalFlag = $incremental ? 'True' : 'False';

$script = <<<PYTHON
import sys
import os, sys
os.environ['JCODEMUNCH_MAX_INDEX_FILES'] = '10000'
sys.path.insert(0, '/tmp/jcodemunch-inspect')
from jcodemunch_mcp.tools.index_folder import index_folder
import json
Expand All @@ -47,7 +48,7 @@ public function indexFolder(string $path, bool $incremental = false): array
print(json.dumps(result))
PYTHON;

$result = Process::timeout(120)->run(['python3', '-c', $script]);
$result = Process::timeout(600)->run(['/opt/homebrew/opt/python@3.12/bin/python3.12', '-c', $script]);
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Hardcoded Python path breaks portability.

The path /opt/homebrew/opt/python@3.12/bin/python3.12 is specific to macOS with Homebrew on Apple Silicon. This will fail on:

  • Intel Macs (/usr/local/opt/...)
  • Linux systems
  • Any system using pyenv, asdf, or system Python
🔧 Suggested fix: Use configurable or discoverable path
-        $result = Process::timeout(600)->run(['/opt/homebrew/opt/python@3.12/bin/python3.12', '-c', $script]);
+        $pythonPath = config('services.jcodemunch.python_path', 'python3');
+        $result = Process::timeout(600)->run([$pythonPath, '-c', $script]);

Then add to config/services.php:

'jcodemunch' => [
    'python_path' => env('JCODEMUNCH_PYTHON_PATH', 'python3'),
],
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@app/Services/SymbolIndexService.php` at line 51, The hardcoded Python binary
in the Process run call (Process::timeout(600)->run([...])) breaks portability;
replace the literal '/opt/homebrew/opt/python@3.12/bin/python3.12' with a
configurable path pulled from config('services.jcodemunch.python_path',
'python3') (or env('JCODEMUNCH_PYTHON_PATH', 'python3') if you prefer), add the
suggested 'jcodemunch' => ['python_path' => env('JCODEMUNCH_PYTHON_PATH',
'python3')] entry to config/services.php, and update the code that constructs
the Process (the array passed to run in SymbolIndexService.php) to use that
config value so systems fall back to 'python3' if no env override is provided.


if (! $result->successful()) {
return ['success' => false, 'error' => $result->errorOutput()];
Expand Down
Loading
Loading