-
Notifications
You must be signed in to change notification settings - Fork 1.7k
feat: add /api/retrieve endpoint for pure vector search #496
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -537,6 +537,88 @@ async def delete_wiki_cache( | |||||
| logger.warning(f"Wiki cache not found, cannot delete: {cache_path}") | ||||||
| raise HTTPException(status_code=404, detail="Wiki cache not found") | ||||||
|
|
||||||
| class RetrieveRequest(BaseModel): | ||||||
| """Request body for pure RAG retrieval (no LLM).""" | ||||||
| repo_url: str = Field(..., description="Full repository URL") | ||||||
| query: str = Field(..., description="Search query") | ||||||
| type: str = Field(default="github", description="Repository type: github, gitlab, bitbucket") | ||||||
| token: Optional[str] = Field(default=None, description="Access token for private repos") | ||||||
| top_k: int = Field(default=5, ge=1, le=20, description="Number of chunks to return") | ||||||
|
|
||||||
| @app.post("/api/retrieve") | ||||||
| async def retrieve(request: RetrieveRequest): | ||||||
| """Pure vector retrieval — returns relevant code chunks without calling any LLM.""" | ||||||
| from api.rag import RAG | ||||||
| from api.data_pipeline import DatabaseManager | ||||||
| from api.tools.embedder import get_embedder | ||||||
| from api.config import configs, get_embedder_type | ||||||
| from adalflow.components.retriever.faiss_retriever import FAISSRetriever | ||||||
|
|
||||||
| try: | ||||||
| embedder_type = get_embedder_type() | ||||||
| is_ollama = (embedder_type == 'ollama') | ||||||
|
|
||||||
| # Prepare database (loads cached .pkl if available) | ||||||
| db_manager = DatabaseManager() | ||||||
| transformed_docs = db_manager.prepare_database( | ||||||
| request.repo_url, request.type, request.token, embedder_type=embedder_type | ||||||
| ) | ||||||
| if not transformed_docs: | ||||||
| raise HTTPException(status_code=404, detail="No indexed data found for this repo. Index it via the web UI first.") | ||||||
|
|
||||||
| # Use RAG's validation to filter embeddings with consistent sizes | ||||||
| rag_instance = RAG.__new__(RAG) | ||||||
| valid_docs = rag_instance._validate_and_filter_embeddings(transformed_docs) | ||||||
|
Comment on lines
+570
to
+571
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Using This would allow you to call it directly on the class ( |
||||||
| if not valid_docs: | ||||||
| raise HTTPException(status_code=404, detail="No valid embeddings found for this repo.") | ||||||
|
|
||||||
| # Build embedder for query | ||||||
| embedder = get_embedder(embedder_type=embedder_type) | ||||||
| if is_ollama: | ||||||
| import weakref | ||||||
| embedder_ref = embedder | ||||||
| def query_embedder(query): | ||||||
| if isinstance(query, list): | ||||||
| query = query[0] | ||||||
| return embedder_ref(input=query) | ||||||
|
Comment on lines
+578
to
+583
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The def query_embedder(query):
if isinstance(query, list):
query = query[0]
return embedder(input=query) |
||||||
| else: | ||||||
| query_embedder = embedder | ||||||
|
|
||||||
| # Build FAISS retriever | ||||||
| retriever_config = {**configs["retriever"], "top_k": request.top_k} | ||||||
| retriever = FAISSRetriever( | ||||||
| **retriever_config, | ||||||
| embedder=query_embedder, | ||||||
| documents=valid_docs, | ||||||
| document_map_func=lambda doc: doc.vector, | ||||||
| ) | ||||||
|
|
||||||
| # Retrieve | ||||||
| results = retriever(request.query) | ||||||
| docs = [valid_docs[i] for i in results[0].doc_indices] | ||||||
|
|
||||||
| return { | ||||||
| "query": request.query, | ||||||
| "total_chunks": len(valid_docs), | ||||||
| "results": [ | ||||||
| { | ||||||
| "text": doc.text, | ||||||
| "file_path": doc.meta_data.get("file_path", ""), | ||||||
| "is_code": doc.meta_data.get("is_code", False), | ||||||
| "token_count": doc.meta_data.get("token_count", 0), | ||||||
| } | ||||||
| for doc in docs | ||||||
| ] | ||||||
| } | ||||||
|
Comment on lines
+600
to
+612
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This endpoint returns a raw dictionary. For better type safety, automatic serialization, and API documentation, it's a good practice in FastAPI to define a Pydantic You could define models like this above the endpoint definition: from typing import List
class RetrieveResult(BaseModel):
text: str
file_path: str
is_code: bool
token_count: int
class RetrieveResponse(BaseModel):
query: str
total_chunks: int
results: List[RetrieveResult]And then use it in the endpoint: |
||||||
| except HTTPException: | ||||||
| raise | ||||||
| except ValueError as e: | ||||||
| raise HTTPException(status_code=400, detail=str(e)) | ||||||
| except Exception as e: | ||||||
| logger.error(f"Retrieve error: {str(e)}") | ||||||
| raise HTTPException(status_code=500, detail=str(e)) | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Exposing raw exception details (
Suggested change
|
||||||
|
|
||||||
|
|
||||||
| @app.get("/health") | ||||||
| async def health_check(): | ||||||
| """Health check endpoint for Docker and monitoring""" | ||||||
|
|
||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These imports are local to the
retrievefunction. It's generally better to place imports at the top of the file. This improves readability, makes dependencies clear, and allows for import errors to be caught at application startup rather than at runtime when the endpoint is first called. Other endpoints in this file follow this convention, so for consistency it would be good to move these.