diff --git a/app/Commands/VectorizeCodeCommand.php b/app/Commands/VectorizeCodeCommand.php new file mode 100644 index 0000000..a221591 --- /dev/null +++ b/app/Commands/VectorizeCodeCommand.php @@ -0,0 +1,83 @@ +argument('repo'); + if (! is_string($repo)) { + error('Repository argument is required.'); + + return self::FAILURE; + } + + $home = getenv('HOME') !== false ? (string) getenv('HOME') : '/tmp'; + $indexPath = "{$home}/.code-index/".str_replace('/', '-', $repo).'.json'; + + if (! file_exists($indexPath)) { + error("Index not found at {$indexPath}. Run index-code first."); + + return self::FAILURE; + } + + if (! $codeIndexer->ensureCollection()) { + error('Failed to create/verify Qdrant code collection.'); + + return self::FAILURE; + } + + /** @var array $kinds */ + $kinds = $this->option('kind'); + $language = $this->option('language'); + $language = is_string($language) ? $language : null; + + $label = $repo; + if ($kinds !== []) { + $label .= ' ('.implode(', ', $kinds).')'; + } + if ($language !== null) { + $label .= " [{$language}]"; + } + + info("Vectorizing symbols from {$label}"); + + $lastReport = 0; + $result = $codeIndexer->vectorizeFromIndex( + $indexPath, + $repo, + $symbolIndex, + $kinds, + $language, + function (int $success, int $failed, int $total) use (&$lastReport): void { + $done = $success + $failed; + if ($done - $lastReport >= 100 || $done === $total) { + $lastReport = $done; + note("{$done}/{$total} processed ({$success} ok, {$failed} failed)"); + } + }, + ); + + info("Done: {$result['success']}/{$result['total']} symbols vectorized, {$result['failed']} failed"); + + return self::SUCCESS; + } +} diff --git a/app/Mcp/Servers/KnowledgeServer.php b/app/Mcp/Servers/KnowledgeServer.php index 93da666..748bc7f 100644 --- a/app/Mcp/Servers/KnowledgeServer.php +++ b/app/Mcp/Servers/KnowledgeServer.php @@ -8,6 +8,7 @@ use App\Mcp\Tools\CorrectTool; use App\Mcp\Tools\RecallTool; use App\Mcp\Tools\RememberTool; +use App\Mcp\Tools\SearchCodeTool; use App\Mcp\Tools\StatsTool; use Laravel\Mcp\Server; use Laravel\Mcp\Server\Attributes\Instructions; @@ -25,6 +26,7 @@ class KnowledgeServer extends Server CorrectTool::class, ContextTool::class, StatsTool::class, + SearchCodeTool::class, ]; protected array $resources = []; diff --git a/app/Mcp/Tools/SearchCodeTool.php b/app/Mcp/Tools/SearchCodeTool.php new file mode 100644 index 0000000..e4ca98a --- /dev/null +++ b/app/Mcp/Tools/SearchCodeTool.php @@ -0,0 +1,85 @@ +get('query'); + + if (! is_string($query) || strlen($query) < 2) { + return Response::error('A search query of at least 2 characters is required.'); + } + + $limit = is_int($request->get('limit')) ? min($request->get('limit'), 20) : 10; + + $filters = array_filter([ + 'repo' => is_string($request->get('repo')) ? $request->get('repo') : null, + 'language' => is_string($request->get('language')) ? $request->get('language') : null, + ]); + + $results = $this->codeIndexer->search($query, $limit, $filters); + + if ($results === []) { + return Response::text(json_encode([ + 'results' => [], + 'meta' => ['query' => $query, 'total' => 0], + ], JSON_THROW_ON_ERROR)); + } + + $formatted = array_map(fn (array $r): array => [ + 'filepath' => $r['filepath'], + 'repo' => $r['repo'], + 'language' => $r['language'], + 'symbol_name' => $r['symbol_name'] ?? null, + 'symbol_kind' => $r['symbol_kind'] ?? null, + 'line' => $r['start_line'], + 'score' => round($r['score'], 3), + 'content' => $r['content'], + ], $results); + + return Response::text(json_encode([ + 'results' => $formatted, + 'meta' => [ + 'query' => $query, + 'total' => count($formatted), + ], + ], JSON_THROW_ON_ERROR)); + } + + public function schema(JsonSchema $schema): array + { + return [ + 'query' => $schema->string() + ->description('Natural language query (e.g., "rate limiting middleware", "database migration logic")') + ->required(), + 'repo' => $schema->string() + ->description('Filter to a specific repo (e.g., "local/pstrax-laravel").'), + 'language' => $schema->string() + ->description('Filter by language (php, typescript, javascript, python).'), + 'limit' => $schema->integer() + ->description('Max results (default 10, max 20).') + ->default(10), + ]; + } +} diff --git a/app/Services/CodeIndexerService.php b/app/Services/CodeIndexerService.php index 8af0823..3262090 100644 --- a/app/Services/CodeIndexerService.php +++ b/app/Services/CodeIndexerService.php @@ -204,12 +204,164 @@ public function search(string $query, int $limit = 10, array $filters = []): arr 'content' => $payload['content'] ?? '', 'score' => $result['score'] ?? 0.0, 'functions' => $payload['functions'] ?? [], - 'start_line' => $payload['start_line'] ?? 1, - 'end_line' => $payload['end_line'] ?? 1, + 'symbol_name' => $payload['symbol_name'] ?? null, + 'symbol_kind' => $payload['symbol_kind'] ?? null, + 'signature' => $payload['signature'] ?? null, + 'start_line' => $payload['start_line'] ?? $payload['line'] ?? 1, + 'end_line' => $payload['end_line'] ?? $payload['line'] ?? 1, ]; }, $results); } + /** + * Index a single tree-sitter symbol into Qdrant. + * + * @return array{success: bool, error?: string} + */ + public function indexSymbol( + string $text, + string $filepath, + string $repo, + string $language, + string $symbolName, + string $symbolKind, + int $line, + string $signature, + ): array { + $vector = $this->embeddingService->generate($text); + + if ($vector === []) { + return ['success' => false, 'error' => 'Empty embedding']; + } + + $id = md5("{$repo}:{$filepath}:{$symbolName}:{$line}"); + + $points = [[ + 'id' => $id, + 'vector' => $vector, + 'payload' => [ + 'filepath' => $filepath, + 'repo' => $repo, + 'language' => $language, + 'symbol_name' => $symbolName, + 'symbol_kind' => $symbolKind, + 'line' => $line, + 'signature' => $signature, + 'content' => mb_substr($text, 0, 4000), + 'indexed_at' => now()->toIso8601String(), + ], + ]]; + + $response = $this->connector->send(new UpsertPoints(self::COLLECTION_NAME, $points)); + + return $response->successful() + ? ['success' => true] + : ['success' => false, 'error' => 'Upsert failed']; + } + + /** + * Batch-vectorize symbols from a tree-sitter index file. + * + * @param array $kinds Symbol kinds to include (empty = all structural kinds) + * @param callable(int $success, int $failed, int $total): void $onProgress + * @return array{success: int, failed: int, total: int} + */ + public function vectorizeFromIndex( + string $indexPath, + string $repo, + SymbolIndexService $symbolIndex, + array $kinds = [], + ?string $language = null, + ?callable $onProgress = null, + ): array { + $content = file_get_contents($indexPath); + if ($content === false) { + return ['success' => 0, 'failed' => 0, 'total' => 0]; + } + + /** @var array{symbols: array>}|null $index */ + $index = json_decode($content, true); + if (! is_array($index) || ! isset($index['symbols'])) { + return ['success' => 0, 'failed' => 0, 'total' => 0]; + } + + $allowedKinds = $kinds !== [] ? $kinds : ['class', 'method', 'function', 'interface', 'trait', 'enum']; + + $symbols = array_values(array_filter( + $index['symbols'], + function (array $s) use ($allowedKinds, $language): bool { + if (! in_array($s['kind'] ?? '', $allowedKinds, true)) { + return false; + } + if ($language !== null) { + $ext = strtolower(pathinfo($s['file'] ?? '', PATHINFO_EXTENSION)); + $fileLang = $this->detectLanguage($ext); + if ($fileLang !== $language) { + return false; + } + } + + return true; + }, + )); + + $total = count($symbols); + $success = 0; + $failed = 0; + + foreach ($symbols as $symbol) { + $text = $this->buildSymbolText($symbol); + if (trim($text) === '') { + $failed++; + + continue; + } + + $source = $symbolIndex->getSymbolSource($symbol['id'] ?? '', $repo); + if ($source !== null) { + $text .= "\n".$source; + } + + $ext = strtolower(pathinfo($symbol['file'] ?? '', PATHINFO_EXTENSION)); + $symbolLanguage = $this->detectLanguage($ext); + + $result = $this->indexSymbol( + text: $text, + filepath: $symbol['file'] ?? '', + repo: $repo, + language: $symbolLanguage, + symbolName: $symbol['name'] ?? '', + symbolKind: $symbol['kind'] ?? '', + line: (int) ($symbol['line'] ?? 0), + signature: $symbol['signature'] ?? '', + ); + + $result['success'] ? $success++ : $failed++; + + if ($onProgress !== null) { + $onProgress($success, $failed, $total); + } + } + + return ['success' => $success, 'failed' => $failed, 'total' => $total]; + } + + /** + * Build searchable text from a tree-sitter symbol. + * + * @param array $symbol + */ + private function buildSymbolText(array $symbol): string + { + return implode("\n", array_filter([ + ($symbol['kind'] ?? '').' '.($symbol['name'] ?? ''), + $symbol['signature'] ?? '', + $symbol['summary'] ?? '', + $symbol['docstring'] ?? '', + isset($symbol['file']) ? 'file: '.$symbol['file'] : '', + ])); + } + /** * Chunk content into smaller pieces. * diff --git a/app/Services/SymbolIndexService.php b/app/Services/SymbolIndexService.php index 5c38b22..63e769f 100644 --- a/app/Services/SymbolIndexService.php +++ b/app/Services/SymbolIndexService.php @@ -34,7 +34,8 @@ public function indexFolder(string $path, bool $incremental = false): array $incrementalFlag = $incremental ? 'True' : 'False'; $script = <<run(['python3', '-c', $script]); + $result = Process::timeout(600)->run(['/opt/homebrew/opt/python@3.12/bin/python3.12', '-c', $script]); if (! $result->successful()) { return ['success' => false, 'error' => $result->errorOutput()]; diff --git a/tests/Feature/Commands/VectorizeCodeCommandTest.php b/tests/Feature/Commands/VectorizeCodeCommandTest.php new file mode 100644 index 0000000..845dadd --- /dev/null +++ b/tests/Feature/Commands/VectorizeCodeCommandTest.php @@ -0,0 +1,113 @@ +codeIndexerMock = Mockery::mock(CodeIndexerService::class); + $this->symbolIndexMock = Mockery::mock(SymbolIndexService::class); + $this->app->instance(CodeIndexerService::class, $this->codeIndexerMock); + $this->app->instance(SymbolIndexService::class, $this->symbolIndexMock); +}); + +afterEach(function (): void { + Mockery::close(); +}); + +describe('vectorize-code command', function (): void { + it('fails when index file does not exist', function (): void { + $this->codeIndexerMock->shouldNotReceive('ensureCollection'); + + $this->artisan('vectorize-code', ['repo' => 'local/nonexistent']) + ->assertFailed(); + }); + + it('fails when collection creation fails', function (): void { + $home = getenv('HOME') !== false ? (string) getenv('HOME') : '/tmp'; + $indexPath = "{$home}/.code-index/local-test-vectorize.json"; + + // Create temporary index file + @mkdir(dirname($indexPath), 0755, true); + file_put_contents($indexPath, json_encode(['symbols' => []])); + + $this->codeIndexerMock->shouldReceive('ensureCollection') + ->once() + ->andReturn(false); + + $this->artisan('vectorize-code', ['repo' => 'local/test-vectorize']) + ->assertFailed(); + + @unlink($indexPath); + }); + + it('successfully vectorizes symbols', function (): void { + $home = getenv('HOME') !== false ? (string) getenv('HOME') : '/tmp'; + $indexPath = "{$home}/.code-index/local-test-vectorize.json"; + + @mkdir(dirname($indexPath), 0755, true); + file_put_contents($indexPath, json_encode(['symbols' => []])); + + $this->codeIndexerMock->shouldReceive('ensureCollection') + ->once() + ->andReturn(true); + + $this->codeIndexerMock->shouldReceive('vectorizeFromIndex') + ->once() + ->andReturn(['success' => 5, 'failed' => 1, 'total' => 6]); + + $this->artisan('vectorize-code', ['repo' => 'local/test-vectorize']) + ->assertSuccessful(); + + @unlink($indexPath); + }); + + it('passes kind filters', function (): void { + $home = getenv('HOME') !== false ? (string) getenv('HOME') : '/tmp'; + $indexPath = "{$home}/.code-index/local-test-vectorize.json"; + + @mkdir(dirname($indexPath), 0755, true); + file_put_contents($indexPath, json_encode(['symbols' => []])); + + $this->codeIndexerMock->shouldReceive('ensureCollection')->once()->andReturn(true); + + $this->codeIndexerMock->shouldReceive('vectorizeFromIndex') + ->withArgs(function (string $path, string $repo, $si, array $kinds) { + return $kinds === ['class', 'method']; + }) + ->once() + ->andReturn(['success' => 3, 'failed' => 0, 'total' => 3]); + + $this->artisan('vectorize-code', [ + 'repo' => 'local/test-vectorize', + '--kind' => ['class', 'method'], + ])->assertSuccessful(); + + @unlink($indexPath); + }); + + it('passes language filter', function (): void { + $home = getenv('HOME') !== false ? (string) getenv('HOME') : '/tmp'; + $indexPath = "{$home}/.code-index/local-test-vectorize.json"; + + @mkdir(dirname($indexPath), 0755, true); + file_put_contents($indexPath, json_encode(['symbols' => []])); + + $this->codeIndexerMock->shouldReceive('ensureCollection')->once()->andReturn(true); + + $this->codeIndexerMock->shouldReceive('vectorizeFromIndex') + ->withArgs(function (string $path, string $repo, $si, array $kinds, ?string $language) { + return $language === 'php'; + }) + ->once() + ->andReturn(['success' => 2, 'failed' => 0, 'total' => 2]); + + $this->artisan('vectorize-code', [ + 'repo' => 'local/test-vectorize', + '--language' => 'php', + ])->assertSuccessful(); + + @unlink($indexPath); + }); +}); diff --git a/tests/Unit/Mcp/Tools/SearchCodeToolTest.php b/tests/Unit/Mcp/Tools/SearchCodeToolTest.php new file mode 100644 index 0000000..ecc7137 --- /dev/null +++ b/tests/Unit/Mcp/Tools/SearchCodeToolTest.php @@ -0,0 +1,148 @@ +group('mcp-tools'); + +beforeEach(function (): void { + $this->codeIndexer = Mockery::mock(CodeIndexerService::class); + $this->tool = new SearchCodeTool($this->codeIndexer); +}); + +describe('search code tool', function (): void { + it('returns error when query is missing', function (): void { + $request = new Request([]); + + $response = $this->tool->handle($request); + + expect($response->isError())->toBeTrue(); + }); + + it('returns error when query is too short', function (): void { + $request = new Request(['query' => 'a']); + + $response = $this->tool->handle($request); + + expect($response->isError())->toBeTrue(); + }); + + it('returns empty results when nothing found', function (): void { + $this->codeIndexer->shouldReceive('search') + ->once() + ->andReturn([]); + + $request = new Request(['query' => 'authentication middleware']); + $response = $this->tool->handle($request); + + expect($response->isError())->toBeFalse(); + + $data = json_decode((string) $response->content(), true); + expect($data['results'])->toBeEmpty() + ->and($data['meta']['total'])->toBe(0) + ->and($data['meta']['query'])->toBe('authentication middleware'); + }); + + it('returns formatted results', function (): void { + $this->codeIndexer->shouldReceive('search') + ->once() + ->andReturn([ + [ + 'filepath' => '/app/Http/Middleware/Auth.php', + 'repo' => 'local/pstrax-laravel', + 'language' => 'php', + 'content' => 'class Auth extends Middleware {}', + 'score' => 0.92, + 'functions' => ['handle'], + 'symbol_name' => 'Auth', + 'symbol_kind' => 'class', + 'signature' => 'class Auth extends Middleware', + 'start_line' => 5, + 'end_line' => 30, + ], + ]); + + $request = new Request(['query' => 'authentication middleware']); + $response = $this->tool->handle($request); + + $data = json_decode((string) $response->content(), true); + expect($data['results'])->toHaveCount(1) + ->and($data['results'][0]['filepath'])->toBe('/app/Http/Middleware/Auth.php') + ->and($data['results'][0]['symbol_name'])->toBe('Auth') + ->and($data['results'][0]['symbol_kind'])->toBe('class') + ->and($data['results'][0]['score'])->toBe(0.92) + ->and($data['results'][0]['line'])->toBe(5) + ->and($data['meta']['total'])->toBe(1); + }); + + it('passes repo filter to search', function (): void { + $this->codeIndexer->shouldReceive('search') + ->withArgs(function (string $query, int $limit, array $filters): bool { + return $query === 'test' && $filters === ['repo' => 'local/pstrax-laravel']; + }) + ->once() + ->andReturn([]); + + $request = new Request(['query' => 'test', 'repo' => 'local/pstrax-laravel']); + $this->tool->handle($request); + }); + + it('passes language filter to search', function (): void { + $this->codeIndexer->shouldReceive('search') + ->withArgs(function (string $query, int $limit, array $filters): bool { + return $filters === ['language' => 'php']; + }) + ->once() + ->andReturn([]); + + $request = new Request(['query' => 'test', 'language' => 'php']); + $this->tool->handle($request); + }); + + it('respects limit parameter', function (): void { + $this->codeIndexer->shouldReceive('search') + ->withArgs(function (string $query, int $limit): bool { + return $limit === 5; + }) + ->once() + ->andReturn([]); + + $request = new Request(['query' => 'test', 'limit' => 5]); + $this->tool->handle($request); + }); + + it('caps limit at 20', function (): void { + $this->codeIndexer->shouldReceive('search') + ->withArgs(function (string $query, int $limit): bool { + return $limit === 20; + }) + ->once() + ->andReturn([]); + + $request = new Request(['query' => 'test', 'limit' => 50]); + $this->tool->handle($request); + }); + + it('defaults limit to 10', function (): void { + $this->codeIndexer->shouldReceive('search') + ->withArgs(function (string $query, int $limit): bool { + return $limit === 10; + }) + ->once() + ->andReturn([]); + + $request = new Request(['query' => 'test']); + $this->tool->handle($request); + }); + + it('returns non-integer query as error', function (): void { + $request = new Request(['query' => 123]); + + $response = $this->tool->handle($request); + + expect($response->isError())->toBeTrue(); + }); +}); diff --git a/tests/Unit/Services/CodeIndexerServiceTest.php b/tests/Unit/Services/CodeIndexerServiceTest.php index 114e549..f8c1f80 100644 --- a/tests/Unit/Services/CodeIndexerServiceTest.php +++ b/tests/Unit/Services/CodeIndexerServiceTest.php @@ -858,6 +858,365 @@ function JsxComponent() { }); }); +describe('indexSymbol', function (): void { + it('successfully indexes a symbol', function (): void { + $this->mockEmbedding->shouldReceive('generate') + ->once() + ->andReturn(array_fill(0, 1024, 0.1)); + + $upsertResponse = createCodeMockResponse(true); + $this->mockConnector->shouldReceive('send') + ->with(Mockery::type(UpsertPoints::class)) + ->once() + ->andReturn($upsertResponse); + + $result = $this->service->indexSymbol( + text: 'class UserController extends Controller', + filepath: 'app/Http/Controllers/UserController.php', + repo: 'local/pstrax', + language: 'php', + symbolName: 'UserController', + symbolKind: 'class', + line: 10, + signature: 'class UserController extends Controller', + ); + + expect($result)->toMatchArray(['success' => true]); + }); + + it('returns error when embedding is empty', function (): void { + $this->mockEmbedding->shouldReceive('generate') + ->once() + ->andReturn([]); + + $result = $this->service->indexSymbol( + text: 'class Foo', + filepath: 'Foo.php', + repo: 'local/test', + language: 'php', + symbolName: 'Foo', + symbolKind: 'class', + line: 1, + signature: 'class Foo', + ); + + expect($result)->toMatchArray(['success' => false, 'error' => 'Empty embedding']); + }); + + it('returns error when upsert fails', function (): void { + $this->mockEmbedding->shouldReceive('generate') + ->once() + ->andReturn(array_fill(0, 1024, 0.1)); + + $upsertResponse = createCodeMockResponse(false, 500); + $this->mockConnector->shouldReceive('send') + ->with(Mockery::type(UpsertPoints::class)) + ->once() + ->andReturn($upsertResponse); + + $result = $this->service->indexSymbol( + text: 'class Foo', + filepath: 'Foo.php', + repo: 'local/test', + language: 'php', + symbolName: 'Foo', + symbolKind: 'class', + line: 1, + signature: 'class Foo', + ); + + expect($result)->toMatchArray(['success' => false, 'error' => 'Upsert failed']); + }); + + it('truncates content to 4000 chars', function (): void { + $longText = str_repeat('x', 5000); + + $this->mockEmbedding->shouldReceive('generate') + ->once() + ->andReturn(array_fill(0, 1024, 0.1)); + + $upsertResponse = createCodeMockResponse(true); + $this->mockConnector->shouldReceive('send') + ->with(Mockery::on(function ($request) { + // Verify the upsert request has truncated content + return $request instanceof UpsertPoints; + })) + ->once() + ->andReturn($upsertResponse); + + $result = $this->service->indexSymbol( + text: $longText, + filepath: 'Foo.php', + repo: 'local/test', + language: 'php', + symbolName: 'Foo', + symbolKind: 'class', + line: 1, + signature: 'class Foo', + ); + + expect($result['success'])->toBeTrue(); + }); +}); + +describe('vectorizeFromIndex', function (): void { + it('returns zeros for non-existent file', function (): void { + $tempFile = tempnam(sys_get_temp_dir(), 'idx_'); + unlink($tempFile); // Ensure it doesn't exist + + $symbolIndex = Mockery::mock(\App\Services\SymbolIndexService::class); + + // Suppress the E_WARNING from file_get_contents on non-existent file + $result = @$this->service->vectorizeFromIndex( + $tempFile, + 'local/test', + $symbolIndex, + ); + + expect($result)->toMatchArray(['success' => 0, 'failed' => 0, 'total' => 0]); + }); + + it('returns zeros for invalid JSON', function (): void { + $tempFile = tempnam(sys_get_temp_dir(), 'idx_'); + file_put_contents($tempFile, 'not json'); + + $symbolIndex = Mockery::mock(\App\Services\SymbolIndexService::class); + + $result = $this->service->vectorizeFromIndex($tempFile, 'local/test', $symbolIndex); + + expect($result)->toMatchArray(['success' => 0, 'failed' => 0, 'total' => 0]); + + unlink($tempFile); + }); + + it('returns zeros for JSON without symbols key', function (): void { + $tempFile = tempnam(sys_get_temp_dir(), 'idx_'); + file_put_contents($tempFile, json_encode(['no_symbols' => true])); + + $symbolIndex = Mockery::mock(\App\Services\SymbolIndexService::class); + + $result = $this->service->vectorizeFromIndex($tempFile, 'local/test', $symbolIndex); + + expect($result)->toMatchArray(['success' => 0, 'failed' => 0, 'total' => 0]); + + unlink($tempFile); + }); + + it('processes symbols from valid index', function (): void { + $indexData = [ + 'symbols' => [ + [ + 'id' => 'sym-1', + 'kind' => 'class', + 'name' => 'UserController', + 'file' => 'app/Controllers/UserController.php', + 'line' => 10, + 'signature' => 'class UserController', + 'summary' => 'Handles user actions', + ], + ], + ]; + $tempFile = tempnam(sys_get_temp_dir(), 'idx_'); + file_put_contents($tempFile, json_encode($indexData)); + + $symbolIndex = Mockery::mock(\App\Services\SymbolIndexService::class); + $symbolIndex->shouldReceive('getSymbolSource') + ->with('sym-1', 'local/test') + ->once() + ->andReturn('class UserController { }'); + + $this->mockEmbedding->shouldReceive('generate') + ->once() + ->andReturn(array_fill(0, 1024, 0.1)); + + $upsertResponse = createCodeMockResponse(true); + $this->mockConnector->shouldReceive('send') + ->with(Mockery::type(UpsertPoints::class)) + ->once() + ->andReturn($upsertResponse); + + $result = $this->service->vectorizeFromIndex($tempFile, 'local/test', $symbolIndex); + + expect($result)->toMatchArray(['success' => 1, 'failed' => 0, 'total' => 1]); + + unlink($tempFile); + }); + + it('filters by kind', function (): void { + $indexData = [ + 'symbols' => [ + ['id' => 'sym-1', 'kind' => 'class', 'name' => 'Foo', 'file' => 'Foo.php', 'line' => 1, 'signature' => 'class Foo'], + ['id' => 'sym-2', 'kind' => 'function', 'name' => 'bar', 'file' => 'helpers.php', 'line' => 1, 'signature' => 'function bar()'], + ], + ]; + $tempFile = tempnam(sys_get_temp_dir(), 'idx_'); + file_put_contents($tempFile, json_encode($indexData)); + + $symbolIndex = Mockery::mock(\App\Services\SymbolIndexService::class); + $symbolIndex->shouldReceive('getSymbolSource')->once()->andReturnNull(); + + $this->mockEmbedding->shouldReceive('generate')->once()->andReturn(array_fill(0, 1024, 0.1)); + + $upsertResponse = createCodeMockResponse(true); + $this->mockConnector->shouldReceive('send') + ->with(Mockery::type(UpsertPoints::class)) + ->once() + ->andReturn($upsertResponse); + + $result = $this->service->vectorizeFromIndex($tempFile, 'local/test', $symbolIndex, ['class']); + + expect($result['total'])->toBe(1) + ->and($result['success'])->toBe(1); + + unlink($tempFile); + }); + + it('filters by language', function (): void { + $indexData = [ + 'symbols' => [ + ['id' => 'sym-1', 'kind' => 'class', 'name' => 'Foo', 'file' => 'Foo.php', 'line' => 1, 'signature' => 'class Foo'], + ['id' => 'sym-2', 'kind' => 'class', 'name' => 'Bar', 'file' => 'Bar.ts', 'line' => 1, 'signature' => 'class Bar'], + ], + ]; + $tempFile = tempnam(sys_get_temp_dir(), 'idx_'); + file_put_contents($tempFile, json_encode($indexData)); + + $symbolIndex = Mockery::mock(\App\Services\SymbolIndexService::class); + $symbolIndex->shouldReceive('getSymbolSource')->once()->andReturnNull(); + + $this->mockEmbedding->shouldReceive('generate')->once()->andReturn(array_fill(0, 1024, 0.1)); + + $upsertResponse = createCodeMockResponse(true); + $this->mockConnector->shouldReceive('send') + ->with(Mockery::type(UpsertPoints::class)) + ->once() + ->andReturn($upsertResponse); + + $result = $this->service->vectorizeFromIndex($tempFile, 'local/test', $symbolIndex, [], 'php'); + + expect($result['total'])->toBe(1) + ->and($result['success'])->toBe(1); + + unlink($tempFile); + }); + + it('calls progress callback', function (): void { + $indexData = [ + 'symbols' => [ + ['id' => 'sym-1', 'kind' => 'class', 'name' => 'Foo', 'file' => 'Foo.php', 'line' => 1, 'signature' => 'class Foo'], + ], + ]; + $tempFile = tempnam(sys_get_temp_dir(), 'idx_'); + file_put_contents($tempFile, json_encode($indexData)); + + $symbolIndex = Mockery::mock(\App\Services\SymbolIndexService::class); + $symbolIndex->shouldReceive('getSymbolSource')->once()->andReturnNull(); + + $this->mockEmbedding->shouldReceive('generate')->once()->andReturn(array_fill(0, 1024, 0.1)); + + $upsertResponse = createCodeMockResponse(true); + $this->mockConnector->shouldReceive('send') + ->with(Mockery::type(UpsertPoints::class)) + ->once() + ->andReturn($upsertResponse); + + $progressCalled = false; + $result = $this->service->vectorizeFromIndex( + $tempFile, + 'local/test', + $symbolIndex, + [], + null, + function (int $success, int $failed, int $total) use (&$progressCalled): void { + $progressCalled = true; + expect($total)->toBe(1); + }, + ); + + expect($progressCalled)->toBeTrue(); + + unlink($tempFile); + }); + + it('counts failed symbols with empty text', function (): void { + $indexData = [ + 'symbols' => [ + ['id' => 'sym-1', 'kind' => 'class', 'name' => '', 'file' => '', 'line' => 0, 'signature' => '', 'summary' => '', 'docstring' => ''], + ], + ]; + $tempFile = tempnam(sys_get_temp_dir(), 'idx_'); + file_put_contents($tempFile, json_encode($indexData)); + + $symbolIndex = Mockery::mock(\App\Services\SymbolIndexService::class); + + // buildSymbolText produces "class \n\n\nfile: " which has content, so it won't fail on empty text. + // Instead, simulate an embedding failure. + $symbolIndex->shouldReceive('getSymbolSource')->once()->andReturnNull(); + $this->mockEmbedding->shouldReceive('generate')->once()->andReturn([]); + + $result = $this->service->vectorizeFromIndex($tempFile, 'local/test', $symbolIndex); + + expect($result['failed'])->toBe(1) + ->and($result['success'])->toBe(0); + + unlink($tempFile); + }); + + it('excludes non-structural kinds by default', function (): void { + $indexData = [ + 'symbols' => [ + ['id' => 'sym-1', 'kind' => 'variable', 'name' => '$foo', 'file' => 'Foo.php', 'line' => 1, 'signature' => '$foo'], + ['id' => 'sym-2', 'kind' => 'import', 'name' => 'Bar', 'file' => 'Foo.php', 'line' => 2, 'signature' => 'use Bar'], + ], + ]; + $tempFile = tempnam(sys_get_temp_dir(), 'idx_'); + file_put_contents($tempFile, json_encode($indexData)); + + $symbolIndex = Mockery::mock(\App\Services\SymbolIndexService::class); + + $result = $this->service->vectorizeFromIndex($tempFile, 'local/test', $symbolIndex); + + expect($result['total'])->toBe(0); + + unlink($tempFile); + }); + + it('appends source code when available', function (): void { + $indexData = [ + 'symbols' => [ + ['id' => 'sym-1', 'kind' => 'class', 'name' => 'Foo', 'file' => 'Foo.php', 'line' => 1, 'signature' => 'class Foo'], + ], + ]; + $tempFile = tempnam(sys_get_temp_dir(), 'idx_'); + file_put_contents($tempFile, json_encode($indexData)); + + $symbolIndex = Mockery::mock(\App\Services\SymbolIndexService::class); + $symbolIndex->shouldReceive('getSymbolSource') + ->with('sym-1', 'local/test') + ->once() + ->andReturn('class Foo { public function bar() {} }'); + + $this->mockEmbedding->shouldReceive('generate') + ->withArgs(function (string $text): bool { + return str_contains($text, 'class Foo { public function bar() {} }'); + }) + ->once() + ->andReturn(array_fill(0, 1024, 0.1)); + + $upsertResponse = createCodeMockResponse(true); + $this->mockConnector->shouldReceive('send') + ->with(Mockery::type(UpsertPoints::class)) + ->once() + ->andReturn($upsertResponse); + + $result = $this->service->vectorizeFromIndex($tempFile, 'local/test', $symbolIndex); + + expect($result['success'])->toBe(1); + + unlink($tempFile); + }); +}); + describe('constructor', function (): void { it('uses default vector size of 1024', function (): void { $service = new CodeIndexerService($this->mockEmbedding);