From 94b59adbda1f656811d21c82b2d2c5198cb1e655 Mon Sep 17 00:00:00 2001 From: ATMackay Date: Wed, 18 Mar 2026 20:59:39 +1100 Subject: [PATCH 1/4] feat(tools): Add search files tool, enable read file snippet --- agents/documentor/documentor.go | 32 +-- agents/documentor/prompt.go | 16 +- agents/documentor/tools.go | 193 ------------- cmd/documentor.go | 11 +- tools/config.go | 37 +++ .../documentor/repo.go => tools/git_repo.go | 68 ++++- tools/read_file.go | 259 ++++++++++++++++++ tools/search.go | 217 +++++++++++++++ {agents/documentor => tools}/state.go | 4 +- tools/tools.go | 61 +++++ tools/write_file.go | 68 +++++ 11 files changed, 741 insertions(+), 225 deletions(-) delete mode 100644 agents/documentor/tools.go create mode 100644 tools/config.go rename agents/documentor/repo.go => tools/git_repo.go (84%) create mode 100644 tools/read_file.go create mode 100644 tools/search.go rename {agents/documentor => tools}/state.go (81%) create mode 100644 tools/tools.go create mode 100644 tools/write_file.go diff --git a/agents/documentor/documentor.go b/agents/documentor/documentor.go index 657766b..547d76c 100644 --- a/agents/documentor/documentor.go +++ b/agents/documentor/documentor.go @@ -3,10 +3,10 @@ package documentor import ( "context" + "github.com/ATMackay/agent/tools" "google.golang.org/adk/agent" "google.golang.org/adk/agent/llmagent" "google.golang.org/adk/model" - "google.golang.org/adk/tool" ) type Documentor struct { @@ -15,18 +15,16 @@ type Documentor struct { // NewDocumentor returns a Documentor agent. func NewDocumentor(ctx context.Context, cfg *Config, model model.LLM) (*Documentor, error) { - // Configure documentor agent tools - fetchRepoTreeTool, err := NewFetchRepoTreeTool(cfg) - if err != nil { - return nil, err - } - - readRepoFileTool, err := NewReadRepoFileTool(cfg) - if err != nil { - return nil, err - } - - writeOutputTool, err := NewWriteOutputTool(cfg) + // Configure documentor agent tools and dependencies. + deps := tools.Deps{} + deps.AddConfig(tools.FetchRepoTree, tools.FetchRepoTreeConfig{WorkDir: cfg.WorkDir}) + + functionTools, err := tools.GetTools([]tools.Kind{ + tools.FetchRepoTree, // Fetch repository tree to understand the structure of the codebase. + tools.ReadFile, // Read specific files to understand code details and extract relevant information for documentation. + tools.SearchRepo, // Search the repository to find relevant code snippets or information. + tools.WriteFile, // Write documentation or other output files. + }, &deps) if err != nil { return nil, err } @@ -37,12 +35,8 @@ func NewDocumentor(ctx context.Context, cfg *Config, model model.LLM) (*Document Model: model, Description: "Retrieves code from a GitHub repository and writes high-quality markdown documentation.", Instruction: buildInstruction(), - Tools: []tool.Tool{ - fetchRepoTreeTool, // Fetch Git Repository files - readRepoFileTool, // Read files tool - writeOutputTool, // Write output to file tool - }, - OutputKey: StateDocumentation, + Tools: functionTools, + OutputKey: tools.StateDocumentation, }) if err != nil { return nil, err diff --git a/agents/documentor/prompt.go b/agents/documentor/prompt.go index 4df1d45..a4a5cfa 100644 --- a/agents/documentor/prompt.go +++ b/agents/documentor/prompt.go @@ -15,11 +15,11 @@ Workflow: 2. Inspect the manifest and identify the most relevant files for architecture and code-level documentation. 3. Prefer entry points, cmd/, internal/, pkg/, config, and core domain files. 4. Skip tests, generated files, vendor, binaries, and irrelevant assets unless they are central. -5. Do not read more than max_files files. -6. Use resources efficiently. Reading files can be expensive so inspect them in the order that gets highest insight, you may reach the max_files limit before all files can be read. -6. Call read_repo_file for each selected file. -7. Write detailed maintainers' documentation in markdown. -8. Call write_output_file with the completed markdown and output_path. +5. Use the search_repo tool to find specific code snippets or information if needed before reading files. +6. Do not read more than max_files files. +7. Call read_repo_file for selected files. +8. Write detailed maintainers' documentation in markdown. +9. Call write_output_file with the completed markdown and output_path. Requirements: - Explain architecture and package responsibilities. @@ -28,5 +28,11 @@ Requirements: - Mention important file paths and symbol names. - Do not invent behavior beyond the code retrieved. - If repository coverage is partial, say so explicitly. + +Important Constraints: +- Always call fetch_repo_tree first to get the repository structure. +- Use search_repo to find relevant code before reading files to optimize context. +- Do not read more than max_files files; choose wisely based on relevance. +- Write clear, concise, and accurate documentation based on the retrieved code. ` } diff --git a/agents/documentor/tools.go b/agents/documentor/tools.go deleted file mode 100644 index 4c0d0b6..0000000 --- a/agents/documentor/tools.go +++ /dev/null @@ -1,193 +0,0 @@ -package documentor - -import ( - "encoding/json" - "fmt" - "log/slog" - "os" - "path/filepath" - - "google.golang.org/adk/tool" - "google.golang.org/adk/tool/functiontool" -) - -type FetchRepoTreeArgs struct { - RepositoryURL string `json:"repository_url"` - Ref string `json:"ref,omitempty"` - SubPath string `json:"sub_path,omitempty"` -} - -type FileEntry struct { - Path string `json:"path"` - Kind string `json:"kind"` - Size int64 `json:"size,omitempty"` -} - -type FetchRepoTreeResult struct { - FileCount int `json:"file_count"` - Manifest []FileEntry `json:"manifest"` -} - -func newFetchRepoTreeTool(cfg *Config) func(tool.Context, FetchRepoTreeArgs) (FetchRepoTreeResult, error) { - return func(ctx tool.Context, args FetchRepoTreeArgs) (FetchRepoTreeResult, error) { - slog.Info("tool call", "function", "fetch_repo_tree", "args", toJSONString(args)) - localPath, manifest, err := fetchRepoManifest(args.RepositoryURL, args.Ref, args.SubPath, cfg.WorkDir) - if err != nil { - return FetchRepoTreeResult{}, err - } - - raw, err := json.Marshal(manifest) - if err != nil { - return FetchRepoTreeResult{}, err - } - - ctx.Actions().StateDelta[StateRepoURL] = args.RepositoryURL - ctx.Actions().StateDelta[StateRepoRef] = args.Ref - ctx.Actions().StateDelta[StateSubPath] = args.SubPath - ctx.Actions().StateDelta[StateRepoManifest] = string(raw) - ctx.Actions().StateDelta[StateRepoLocalPath] = localPath - - return FetchRepoTreeResult{ - FileCount: len(manifest), - Manifest: manifest, - }, nil - } -} - -// NewFetchRepoTool returns a fetch_repo_tree function tool. -func NewFetchRepoTreeTool(cfg *Config) (tool.Tool, error) { - fetchRepoTreeTool, err := functiontool.New( - functiontool.Config{ - Name: "fetch_repo_tree", - Description: "Download the GitHub repository to a local cache, build a source-file manifest, and store both in state.", - }, - newFetchRepoTreeTool(cfg), - ) - if err != nil { - return nil, fmt.Errorf("create fetch_repo_tree tool: %w", err) - } - return fetchRepoTreeTool, nil -} - -type ReadRepoFileArgs struct { - Path string `json:"path"` -} - -type ReadRepoFileResult struct { - Path string `json:"path"` - Content string `json:"content"` -} - -func newReadRepoFileTool() func(tool.Context, ReadRepoFileArgs) (ReadRepoFileResult, error) { - return func(ctx tool.Context, args ReadRepoFileArgs) (ReadRepoFileResult, error) { - slog.Info("tool call", "function", "read_repo_file", "args", toJSONString(args)) - v, err := ctx.State().Get(StateRepoLocalPath) - if err != nil { - return ReadRepoFileResult{}, fmt.Errorf("read repo local path from state: %w", err) - } - - localPath, ok := v.(string) - if !ok || localPath == "" { - return ReadRepoFileResult{}, fmt.Errorf("repository cache not initialized; call fetch_repo_tree first") - } - - content, err := readRepoFileFromCachedCheckout(localPath, args.Path) - if err != nil { - return ReadRepoFileResult{}, err - } - - loaded := map[string]string{} - existing, err := ctx.State().Get(StateLoadedFiles) - if err == nil && existing != nil { - if s, ok := existing.(string); ok && s != "" { - _ = json.Unmarshal([]byte(s), &loaded) - } - } - - loaded[args.Path] = content - raw, _ := json.Marshal(loaded) - ctx.Actions().StateDelta[StateLoadedFiles] = string(raw) - - return ReadRepoFileResult{ - Path: args.Path, - Content: content, - }, nil - } -} - -// NewFetchRepoTool returns a fetch_repo_tree function tool. -func NewReadRepoFileTool(_ *Config) (tool.Tool, error) { - readRepoFileTool, err := functiontool.New( - functiontool.Config{ - Name: "read_repo_file", - Description: "Read a repository file from the cached checkout and store it in state.", - }, - newReadRepoFileTool(), - ) - if err != nil { - return nil, fmt.Errorf("create read_repo_file tool: %w", err) - } - return readRepoFileTool, nil -} - -type WriteOutputFileArgs struct { - Markdown string `json:"markdown"` - OutputPath string `json:"output_path,omitempty"` -} - -type WriteOutputFileResult struct { - Path string `json:"path"` -} - -func newWriteOutputFileTool() func(tool.Context, WriteOutputFileArgs) (WriteOutputFileResult, error) { - return func(ctx tool.Context, args WriteOutputFileArgs) (WriteOutputFileResult, error) { - slog.Info("tool call", "function", "write_output_file", "content_length", len(toJSONString(args))) - out := args.OutputPath - if out == "" { - v, err := ctx.State().Get(StateOutputPath) - if err == nil { - if s, ok := v.(string); ok { - out = s - } - } - } - if out == "" { - return WriteOutputFileResult{}, fmt.Errorf("output path is required") - } - - if err := writeTextFile(out, args.Markdown); err != nil { - return WriteOutputFileResult{}, err - } - - ctx.Actions().StateDelta[StateDocumentation] = args.Markdown - return WriteOutputFileResult{Path: out}, nil - } -} - -// writeTextFile creates parent directories as needed and writes content to path. -func writeTextFile(path, content string) error { - if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { - return fmt.Errorf("mkdir %s: %w", filepath.Dir(path), err) - } - return os.WriteFile(path, []byte(content), 0o644) -} - -// NewWriteOutputTool returns a write_output_file function tool. -func NewWriteOutputTool(_ *Config) (tool.Tool, error) { - writeOutputTool, err := functiontool.New( - functiontool.Config{ - Name: "write_output_file", - Description: "Write markdown documentation to the requested output file.", - }, - newWriteOutputFileTool(), - ) - if err != nil { - return nil, fmt.Errorf("create write_output_file tool: %w", err) - } - return writeOutputTool, nil -} - -func toJSONString(v any) string { - b, _ := json.Marshal(v) - return string(b) -} diff --git a/cmd/documentor.go b/cmd/documentor.go index b7cfb10..a1f65e1 100644 --- a/cmd/documentor.go +++ b/cmd/documentor.go @@ -8,6 +8,7 @@ import ( "github.com/ATMackay/agent/agents/documentor" "github.com/ATMackay/agent/model" + "github.com/ATMackay/agent/tools" "github.com/spf13/cobra" "github.com/spf13/viper" agentpkg "google.golang.org/adk/agent" @@ -101,13 +102,13 @@ func NewDocumentorCmd() *cobra.Command { } initState := map[string]any{ - documentor.StateRepoURL: repoURL, - documentor.StateRepoRef: ref, - documentor.StateOutputPath: output, - documentor.StateMaxFiles: maxFiles, + tools.StateRepoURL: repoURL, + tools.StateRepoRef: ref, + tools.StateOutputPath: output, + tools.StateMaxFiles: maxFiles, } if pathPrefix != "" { - initState[documentor.StateSubPath] = pathPrefix + initState[tools.StateSubPath] = pathPrefix } resp, err := sessService.Create(ctx, &session.CreateRequest{ diff --git a/tools/config.go b/tools/config.go new file mode 100644 index 0000000..27ac277 --- /dev/null +++ b/tools/config.go @@ -0,0 +1,37 @@ +package tools + +import "fmt" + +// Deps enables arbitrary tool configurations. TODO may be refactored in future. +type Deps struct { + Configs map[Kind]any +} + +func (d *Deps) AddConfig(kind Kind, cfg any) { + if d.Configs == nil { + d.Configs = make(map[Kind]any) + } + d.Configs[kind] = cfg +} + + +// getConfig returns config for the specified tool type. +func getConfig[T any](kind Kind, deps *Deps) (T, error) { + var zero T + + if deps.Configs == nil { + return zero, fmt.Errorf("no configs provided") + } + + raw, ok := deps.Configs[kind] + if !ok { + return zero, fmt.Errorf("missing config for tool %q", kind) + } + + cfg, ok := raw.(T) + if !ok { + return zero, fmt.Errorf("invalid config type for tool %q: got %T", kind, raw) + } + + return cfg, nil +} \ No newline at end of file diff --git a/agents/documentor/repo.go b/tools/git_repo.go similarity index 84% rename from agents/documentor/repo.go rename to tools/git_repo.go index 36dc255..8e37702 100644 --- a/agents/documentor/repo.go +++ b/tools/git_repo.go @@ -1,8 +1,9 @@ -package documentor +package tools import ( "archive/tar" "compress/gzip" + "encoding/json" "errors" "fmt" "io" @@ -16,6 +17,9 @@ import ( "sort" "strings" "time" + + "google.golang.org/adk/tool" + "google.golang.org/adk/tool/functiontool" ) const ( @@ -24,6 +28,68 @@ const ( httpTimeout = 90 * time.Second ) +type FetchRepoTreeConfig struct { + WorkDir string +} + +type FetchRepoTreeArgs struct { + RepositoryURL string `json:"repository_url"` + Ref string `json:"ref,omitempty"` + SubPath string `json:"sub_path,omitempty"` +} + +type FileEntry struct { + Path string `json:"path"` + Kind string `json:"kind"` + Size int64 `json:"size,omitempty"` +} + +type FetchRepoTreeResult struct { + FileCount int `json:"file_count"` + Manifest []FileEntry `json:"manifest"` +} + +// NewFetchRepoTool returns a fetch_repo_tree function tool. +func NewFetchRepoTreeTool(workDir string) (tool.Tool, error) { + fetchRepoTreeTool, err := functiontool.New( + functiontool.Config{ + Name: "fetch_repo_tree", + Description: "Download the GitHub repository to a local cache, build a source-file manifest, and store both in state.", + }, + newFetchRepoTreeTool(workDir), + ) + if err != nil { + return nil, fmt.Errorf("create fetch_repo_tree tool: %w", err) + } + return fetchRepoTreeTool, nil +} + +func newFetchRepoTreeTool(workDir string) func(tool.Context, FetchRepoTreeArgs) (FetchRepoTreeResult, error) { + return func(ctx tool.Context, args FetchRepoTreeArgs) (FetchRepoTreeResult, error) { + slog.Info("tool call", "function", "fetch_repo_tree", "args", toJSONString(args)) + localPath, manifest, err := fetchRepoManifest(args.RepositoryURL, args.Ref, args.SubPath, workDir) + if err != nil { + return FetchRepoTreeResult{}, err + } + + raw, err := json.Marshal(manifest) + if err != nil { + return FetchRepoTreeResult{}, err + } + + ctx.Actions().StateDelta[StateRepoURL] = args.RepositoryURL + ctx.Actions().StateDelta[StateRepoRef] = args.Ref + ctx.Actions().StateDelta[StateSubPath] = args.SubPath + ctx.Actions().StateDelta[StateRepoManifest] = string(raw) + ctx.Actions().StateDelta[StateRepoLocalPath] = localPath + + return FetchRepoTreeResult{ + FileCount: len(manifest), + Manifest: manifest, + }, nil + } +} + func fetchRepoManifest(repoURL, ref, subPath, workDir string) (string, []FileEntry, error) { if strings.TrimSpace(repoURL) == "" { return "", nil, fmt.Errorf("repository URL is required") diff --git a/tools/read_file.go b/tools/read_file.go new file mode 100644 index 0000000..15c71b2 --- /dev/null +++ b/tools/read_file.go @@ -0,0 +1,259 @@ +package tools + +import ( + "bufio" + "encoding/json" + "fmt" + "log/slog" + "os" + "path/filepath" + "strings" + + "google.golang.org/adk/tool" + "google.golang.org/adk/tool/functiontool" +) + +type ReadFileArgs struct { + Path string `json:"path"` + StartLine int `json:"start_line,omitempty"` + EndLine int `json:"end_line,omitempty"` + MaxBytes int `json:"max_bytes,omitempty"` + FullFile bool `json:"full_file,omitempty"` +} + +type ReadFileResult struct { + Path string `json:"path"` + StartLine int `json:"start_line"` + EndLine int `json:"end_line"` + TotalLines int `json:"total_lines"` + Truncated bool `json:"truncated"` + Content string `json:"content"` +} + +type LoadedFileMeta struct { + Path string `json:"path"` + StartLine int `json:"start_line"` + EndLine int `json:"end_line"` + BytesRead int `json:"bytes_read"` + SnippetOnly bool `json:"snippet_only"` +} + +func newReadFileTool() func(tool.Context, ReadFileArgs) (ReadFileResult, error) { + return func(ctx tool.Context, args ReadFileArgs) (ReadFileResult, error) { + slog.Info("tool call", "function", "read_repo_file", "args", toJSONString(args)) + + v, err := ctx.State().Get(StateRepoLocalPath) + if err != nil { + return ReadFileResult{}, fmt.Errorf("read repo local path from state: %w", err) + } + + localPath, ok := v.(string) + if !ok || localPath == "" { + return ReadFileResult{}, fmt.Errorf("repository cache not initialized; call fetch_repo_tree first") + } + + result, err := ReadFileSnippetFromCachedCheckout(localPath, args) + if err != nil { + return ReadFileResult{}, err + } + + loaded := map[string]LoadedFileMeta{} + existing, err := ctx.State().Get(StateLoadedFiles) + if err == nil && existing != nil { + if s, ok := existing.(string); ok && s != "" { + _ = json.Unmarshal([]byte(s), &loaded) + } + } + + loaded[args.Path] = LoadedFileMeta{ + Path: result.Path, + StartLine: result.StartLine, + EndLine: result.EndLine, + BytesRead: len(result.Content), + SnippetOnly: !args.FullFile || result.StartLine != 1 || result.EndLine != result.TotalLines || result.Truncated, + } + + raw, _ := json.Marshal(loaded) + ctx.Actions().StateDelta[StateLoadedFiles] = string(raw) + + return result, nil + } +} + +func ReadFileSnippetFromCachedCheckout(localPath string, args ReadFileArgs) (ReadFileResult, error) { + if strings.TrimSpace(args.Path) == "" { + return ReadFileResult{}, fmt.Errorf("path is required") + } + + cleanRel := filepath.Clean(args.Path) + if cleanRel == "." || cleanRel == ".." || strings.HasPrefix(cleanRel, ".."+string(os.PathSeparator)) { + return ReadFileResult{}, fmt.Errorf("invalid path %q", args.Path) + } + + fullPath := filepath.Join(localPath, cleanRel) + + absRoot, err := filepath.Abs(localPath) + if err != nil { + return ReadFileResult{}, fmt.Errorf("resolve repo root: %w", err) + } + absFile, err := filepath.Abs(fullPath) + if err != nil { + return ReadFileResult{}, fmt.Errorf("resolve file path: %w", err) + } + if absFile != absRoot && !strings.HasPrefix(absFile, absRoot+string(os.PathSeparator)) { + return ReadFileResult{}, fmt.Errorf("path escapes repository root: %q", args.Path) + } + + info, err := os.Stat(absFile) + if err != nil { + return ReadFileResult{}, fmt.Errorf("stat file %s: %w", args.Path, err) + } + if info.IsDir() { + return ReadFileResult{}, fmt.Errorf("path %q is a directory, not a file", args.Path) + } + + lines, err := readFileLines(absFile) + if err != nil { + return ReadFileResult{}, fmt.Errorf("read file %s: %w", args.Path, err) + } + + totalLines := len(lines) + if totalLines == 0 { + return ReadFileResult{ + Path: args.Path, + StartLine: 0, + EndLine: 0, + TotalLines: 0, + Truncated: false, + Content: "", + }, nil + } + + const ( + defaultSnippetLines = 120 + defaultMaxBytes = 8_000 + hardMaxBytes = 20_000 + ) + + maxBytes := args.MaxBytes + if maxBytes <= 0 { + maxBytes = defaultMaxBytes + } + if maxBytes > hardMaxBytes { + maxBytes = hardMaxBytes + } + + var startLine, endLine int + + switch { + case args.FullFile: + startLine = 1 + endLine = totalLines + + case args.StartLine == 0 && args.EndLine == 0: + startLine = 1 + endLine = min(totalLines, defaultSnippetLines) + + default: + startLine = args.StartLine + endLine = args.EndLine + + if startLine <= 0 { + startLine = 1 + } + if endLine <= 0 { + endLine = startLine + defaultSnippetLines - 1 + } + if endLine < startLine { + return ReadFileResult{}, fmt.Errorf("end_line must be >= start_line") + } + if startLine > totalLines { + return ReadFileResult{}, fmt.Errorf("start_line %d is beyond file length %d", startLine, totalLines) + } + if endLine > totalLines { + endLine = totalLines + } + } + + selected := lines[startLine-1 : endLine] + content, actualEndLine, truncated := joinLinesWithinByteLimit(selected, startLine, maxBytes) + + return ReadFileResult{ + Path: args.Path, + StartLine: startLine, + EndLine: actualEndLine, + TotalLines: totalLines, + Truncated: truncated || actualEndLine < endLine, + Content: content, + }, nil +} + +func readFileLines(path string) ([]string, error) { + f, err := os.Open(path) + if err != nil { + return nil, err + } + defer func() { + if err := f.Close(); err != nil { + slog.Error("error closing file", "error", err) + } + }() + + var lines []string + scanner := bufio.NewScanner(f) + + buf := make([]byte, 0, 64*1024) + scanner.Buffer(buf, 1024*1024) + + for scanner.Scan() { + lines = append(lines, scanner.Text()) + } + if err := scanner.Err(); err != nil { + return nil, err + } + return lines, nil +} + +func joinLinesWithinByteLimit(lines []string, startLine, maxBytes int) (content string, actualEndLine int, truncated bool) { + if len(lines) == 0 { + return "", startLine - 1, false + } + + var b strings.Builder + actualEndLine = startLine - 1 + + for i, line := range lines { + addition := len(line) + if i > 0 { + addition++ + } + + if b.Len()+addition > maxBytes { + truncated = true + break + } + + if i > 0 { + b.WriteByte('\n') + } + b.WriteString(line) + actualEndLine = startLine + i + } + + return b.String(), actualEndLine, truncated +} + +// NewFetchRepoTool returns a fetch_repo_tree function tool. +func NewReadFileTool() (tool.Tool, error) { + ReadFileTool, err := functiontool.New( + functiontool.Config{ + Name: "read_repo_file", + Description: "Read a repository file from the cached checkout and store it in state.", + }, + newReadFileTool(), + ) + if err != nil { + return nil, fmt.Errorf("create read_repo_file tool: %w", err) + } + return ReadFileTool, nil +} diff --git a/tools/search.go b/tools/search.go new file mode 100644 index 0000000..1a0c677 --- /dev/null +++ b/tools/search.go @@ -0,0 +1,217 @@ +package tools + +import ( + "bufio" + "fmt" + "log/slog" + "os" + "path/filepath" + "strings" + + "google.golang.org/adk/tool" + "google.golang.org/adk/tool/functiontool" +) + +type SearchRepoArgs struct { + Query string `json:"query"` + PathPrefix string `json:"path_prefix,omitempty"` + MaxResults int `json:"max_results,omitempty"` + ContextLines int `json:"context_lines,omitempty"` +} + +type SearchMatch struct { + Path string `json:"path"` + Line int `json:"line"` + StartLine int `json:"start_line"` + EndLine int `json:"end_line"` + Snippet string `json:"snippet"` +} + +type SearchRepoResult struct { + Query string `json:"query"` + MatchCount int `json:"match_count"` + Truncated bool `json:"truncated"` + Matches []SearchMatch `json:"matches"` +} + +// NewSearchRepoTool returns a repo search tool +func NewSearchRepoTool() (tool.Tool, error) { + searchRepoTool, err := functiontool.New( + functiontool.Config{ + Name: "search_repo", + Description: "Search the cached repository for text matches and return matching file paths, line numbers, and short snippets. Use this before reading files to locate relevant symbols, functions, types, config keys, or strings.", + }, + newSearchRepoTool(), + ) + if err != nil { + return nil, fmt.Errorf("create search_repo tool: %w", err) + } + return searchRepoTool, nil +} + +func newSearchRepoTool() func(tool.Context, SearchRepoArgs) (SearchRepoResult, error) { + return func(ctx tool.Context, args SearchRepoArgs) (SearchRepoResult, error) { + slog.Info("tool call", "function", "search_repo", "args", toJSONString(args)) + + if strings.TrimSpace(args.Query) == "" { + return SearchRepoResult{}, fmt.Errorf("query is required") + } + + // Sanitize tool args to prevent context overload + if args.MaxResults <= 0 { + args.MaxResults = 20 + } + if args.MaxResults > 100 { + args.MaxResults = 100 + } + if args.ContextLines < 0 { + args.ContextLines = 0 + } + if args.ContextLines > 3 { + args.ContextLines = 3 + } + + v, err := ctx.State().Get(StateRepoLocalPath) + if err != nil { + return SearchRepoResult{}, fmt.Errorf("read repo local path from state: %w", err) + } + + localPath, ok := v.(string) + if !ok || localPath == "" { + return SearchRepoResult{}, fmt.Errorf("repository cache not initialized; call fetch_repo_tree first") + } + + searchRoot := localPath + if args.PathPrefix != "" { + searchRoot = filepath.Join(localPath, filepath.Clean(args.PathPrefix)) + } + + var matches []SearchMatch + truncated := false + + err = filepath.Walk(searchRoot, func(path string, info os.FileInfo, err error) error { + if err != nil { + return nil + } + + rel, relErr := filepath.Rel(localPath, path) + if relErr != nil { + return nil + } + + if info.IsDir() { + if shouldSkipDir(rel) { + return filepath.SkipDir + } + return nil + } + + if shouldSkipFile(rel, info) { + return nil + } + + fileMatches, err := searchFile(path, rel, args.Query, args.ContextLines, args.MaxResults-len(matches)) + if err != nil { + return nil + } + + matches = append(matches, fileMatches...) + if len(matches) >= args.MaxResults { + truncated = true + return fmt.Errorf("search result limit reached") + } + return nil + }) + + // swallow the sentinel-ish stop condition + if err != nil && !strings.Contains(err.Error(), "search result limit reached") { + return SearchRepoResult{}, err + } + + return SearchRepoResult{ + Query: args.Query, + MatchCount: len(matches), + Truncated: truncated, + Matches: matches, + }, nil + } +} + +func searchFile(path, relPath, query string, contextLines, remaining int) ([]SearchMatch, error) { + f, err := os.Open(path) + if err != nil { + return nil, err + } + defer func() { + if err := f.Close(); err != nil { + slog.Error("error closing file", "error", err) + } + }() + + var lines []string + scanner := bufio.NewScanner(f) + + // allow longer lines than bufio default + buf := make([]byte, 0, 64*1024) + scanner.Buffer(buf, 1024*1024) + + for scanner.Scan() { + lines = append(lines, scanner.Text()) + } + if err := scanner.Err(); err != nil { + return nil, err + } + + q := strings.ToLower(query) + var matches []SearchMatch + + for i, line := range lines { + if !strings.Contains(strings.ToLower(line), q) { + continue + } + + start := i - contextLines + if start < 0 { + start = 0 + } + end := i + contextLines + if end >= len(lines) { + end = len(lines) - 1 + } + + snippet := strings.Join(lines[start:end+1], "\n") + + matches = append(matches, SearchMatch{ + Path: relPath, + Line: i + 1, + StartLine: start + 1, + EndLine: end + 1, + Snippet: snippet, + }) + + if len(matches) >= remaining { + break + } + } + + return matches, nil +} + +func shouldSkipFile(rel string, info os.FileInfo) bool { + if info.Size() > 2*1024*1024 { + return true + } + + ext := strings.ToLower(filepath.Ext(rel)) + switch ext { + case ".png", ".jpg", ".jpeg", ".gif", ".webp", ".pdf", ".zip", ".gz", ".tar", ".jar", ".bin", ".exe", ".so", ".dll": + return true + } + + base := filepath.Base(rel) + if strings.HasPrefix(base, ".") && base != ".env" { + return true + } + + return false +} diff --git a/agents/documentor/state.go b/tools/state.go similarity index 81% rename from agents/documentor/state.go rename to tools/state.go index ee50793..6d5a60a 100644 --- a/agents/documentor/state.go +++ b/tools/state.go @@ -1,6 +1,6 @@ -package documentor +package tools -// Session state keys used by the documentor agent. +// Session state keys. TODO agent specific, might refactor... const ( StateRepoURL = "repo_url" StateRepoRef = "repo_ref" diff --git a/tools/tools.go b/tools/tools.go new file mode 100644 index 0000000..5e898db --- /dev/null +++ b/tools/tools.go @@ -0,0 +1,61 @@ +package tools + +import ( + "encoding/json" + "fmt" + + "google.golang.org/adk/tool" +) + +type Kind string + +const ( + FetchRepoTree Kind = "fetch_repo_tree" + ReadFile Kind = "read_file" + SearchRepo Kind = "search_repo" + WriteFile Kind = "write_file" +) + +// GetToolByEnum +func GetToolByEnum(kind Kind, deps *Deps) (tool.Tool, error) { + switch kind { + case FetchRepoTree: + cfg, err := getConfig[FetchRepoTreeConfig](kind, deps) + if err != nil { + return nil, err + } + if cfg.WorkDir == "" { + return nil, fmt.Errorf("fetch_repo_tree requires WorkDir") + } + return NewFetchRepoTreeTool(cfg.WorkDir) + + case ReadFile: + return NewReadFileTool() + + case SearchRepo: + return NewSearchRepoTool() + + case WriteFile: + return NewWriteFileTool() + + default: + return nil, fmt.Errorf("invalid tool kind: %q", kind) + } +} + +func GetTools(kinds []Kind, deps *Deps) ([]tool.Tool, error) { + out := make([]tool.Tool, 0, len(kinds)) + for _, kind := range kinds { + t, err := GetToolByEnum(kind, deps) + if err != nil { + return nil, err + } + out = append(out, t) + } + return out, nil +} + +func toJSONString(v any) string { + b, _ := json.Marshal(v) + return string(b) +} diff --git a/tools/write_file.go b/tools/write_file.go new file mode 100644 index 0000000..8363cab --- /dev/null +++ b/tools/write_file.go @@ -0,0 +1,68 @@ +package tools + +import ( + "fmt" + "log/slog" + "os" + "path/filepath" + + "google.golang.org/adk/tool" + "google.golang.org/adk/tool/functiontool" +) + +type WriteFileArgs struct { + Markdown string `json:"markdown"` + OutputPath string `json:"output_path,omitempty"` +} + +type WriteFileResult struct { + Path string `json:"path"` +} + +// NewWriteFileTool returns a write_output_file function tool. +func NewWriteFileTool() (tool.Tool, error) { + WriteFileTool, err := functiontool.New( + functiontool.Config{ + Name: "write_output_file", + Description: "Write markdown documentation to the requested output file.", + }, + newWriteFileTool(), + ) + if err != nil { + return nil, fmt.Errorf("create write_file tool: %w", err) + } + return WriteFileTool, nil +} + +func newWriteFileTool() func(tool.Context, WriteFileArgs) (WriteFileResult, error) { + return func(ctx tool.Context, args WriteFileArgs) (WriteFileResult, error) { + slog.Info("tool call", "function", string(WriteFile), "content_length", len(toJSONString(args))) + out := args.OutputPath + if out == "" { + v, err := ctx.State().Get(StateOutputPath) + if err == nil { + if s, ok := v.(string); ok { + out = s + } + } + } + if out == "" { + return WriteFileResult{}, fmt.Errorf("output path is required") + } + + if err := writeTextFile(out, args.Markdown); err != nil { + return WriteFileResult{}, err + } + + ctx.Actions().StateDelta[StateDocumentation] = args.Markdown + return WriteFileResult{Path: out}, nil + } +} + +// writeTextFile creates parent directories as needed and writes content to path. +func writeTextFile(path, content string) error { + if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { + return fmt.Errorf("mkdir %s: %w", filepath.Dir(path), err) + } + return os.WriteFile(path, []byte(content), 0o644) +} From 11bfd0c1add274a7547b15067e2a6a931106a50f Mon Sep 17 00:00:00 2001 From: ATMackay Date: Wed, 18 Mar 2026 22:45:13 +1100 Subject: [PATCH 2/4] remove unused func --- tools/git_repo.go | 39 --------------------------------------- 1 file changed, 39 deletions(-) diff --git a/tools/git_repo.go b/tools/git_repo.go index 8e37702..c52333e 100644 --- a/tools/git_repo.go +++ b/tools/git_repo.go @@ -452,45 +452,6 @@ func buildManifest(root string) ([]FileEntry, error) { return manifest, nil } -func readRepoFileFromCachedCheckout(localRoot, relPath string) (string, error) { - if strings.TrimSpace(localRoot) == "" { - return "", fmt.Errorf("local repository root is required") - } - if strings.TrimSpace(relPath) == "" { - return "", fmt.Errorf("repository path is required") - } - - base := filepath.Clean(localRoot) - cleanRel := filepath.Clean(relPath) - fullPath := filepath.Join(base, cleanRel) - - if !isWithinBase(base, fullPath) { - return "", fmt.Errorf("invalid repository path: %s", relPath) - } - - // Reject symlinks. - info, err := os.Lstat(fullPath) - if err != nil { - return "", fmt.Errorf("stat repository file %s: %w", relPath, err) - } - if info.IsDir() { - return "", fmt.Errorf("path is a directory, not a file: %s", relPath) - } - if info.Mode()&os.ModeSymlink != 0 { - return "", fmt.Errorf("symlinked files are not supported: %s", relPath) - } - - b, err := os.ReadFile(fullPath) - if err != nil { - return "", fmt.Errorf("read repository file %s: %w", relPath, err) - } - - if len(b) > maxReadBytes { - b = b[:maxReadBytes] - } - return string(b), nil -} - func isWithinBase(base, target string) bool { base = filepath.Clean(base) target = filepath.Clean(target) From 9ddbbfb898bed582942d6e2fbfc996f316f61774 Mon Sep 17 00:00:00 2001 From: ATMackay Date: Thu, 19 Mar 2026 00:24:02 +1100 Subject: [PATCH 3/4] update prompt --- agents/documentor/prompt.go | 57 +++++++++++++++++++++++++++++-------- 1 file changed, 45 insertions(+), 12 deletions(-) diff --git a/agents/documentor/prompt.go b/agents/documentor/prompt.go index a4a5cfa..6dddc4a 100644 --- a/agents/documentor/prompt.go +++ b/agents/documentor/prompt.go @@ -10,16 +10,41 @@ Sub-path filter: {sub_path?} Output path: {output_path} Max files to read: {max_files?} +Your goal is to produce high-quality maintainer documentation while minimizing token usage and avoiding unnecessary file reads. + Workflow: 1. Call fetch_repo_tree first using the repository_url, ref, and sub_path from state. -2. Inspect the manifest and identify the most relevant files for architecture and code-level documentation. -3. Prefer entry points, cmd/, internal/, pkg/, config, and core domain files. -4. Skip tests, generated files, vendor, binaries, and irrelevant assets unless they are central. -5. Use the search_repo tool to find specific code snippets or information if needed before reading files. -6. Do not read more than max_files files. -7. Call read_repo_file for selected files. -8. Write detailed maintainers' documentation in markdown. -9. Call write_output_file with the completed markdown and output_path. +2. Inspect the manifest and identify the most likely entry points, core packages, configuration files, and important domain files. +3. Use search_repo before reading files whenever possible. Search for package entry points, main flows, key types, interfaces, constructors, config structures, and important symbols. +4. Prefer targeted investigation over broad exploration. +5. Only call read_repo_file when you have a specific reason to inspect a file. +6. Prefer snippet reads over full-file reads whenever possible. +7. Only read full files when the whole file is required to understand behavior. +8. Do not repeatedly read files unless necessary. +9. Do not read more than max_files files. +10. After gathering enough evidence, write the documentation and call write_output_file with the completed markdown and output_path. + +Reading Strategy: +- Search first, read second. +- Use search_repo to locate relevant symbols, functions, types, config keys, and control flow before reading files. +- Use read_repo_file with targeted line ranges or snippets whenever possible. +- Avoid reading large files in full unless absolutely necessary. +- Do not read files “just in case”. +- Do not read files that are likely irrelevant to architecture or maintainer understanding. +- If a search result is sufficient to identify relevance, only then read the necessary snippet. +- Stop reading once you have enough information to document the system accurately. + +File Selection Priorities: +- Entry points such as main packages, CLI commands, server startup, and initialization code. +- Core domain packages and orchestration flows. +- Important configuration, options, and dependency wiring. +- Public interfaces, constructors, and extension points. +- Files that define important types, state transitions, or external integrations. + +Avoid reading unless clearly necessary: +- Tests, mocks, fixtures, examples, generated files, vendor, binaries, lockfiles, assets, and migration blobs. +- Large utility files unless they are central to understanding system behavior. +- Multiple similar files when one or two representative files are enough. Requirements: - Explain architecture and package responsibilities. @@ -28,11 +53,19 @@ Requirements: - Mention important file paths and symbol names. - Do not invent behavior beyond the code retrieved. - If repository coverage is partial, say so explicitly. +- If documentation is based on selected representative files rather than exhaustive review, say so explicitly. Important Constraints: -- Always call fetch_repo_tree first to get the repository structure. -- Use search_repo to find relevant code before reading files to optimize context. -- Do not read more than max_files files; choose wisely based on relevance. -- Write clear, concise, and accurate documentation based on the retrieved code. +- Always call fetch_repo_tree first. +- Prefer search_repo before read_repo_file. +- Prefer snippet reads before full-file reads. +- Read the fewest files necessary to produce accurate documentation. +- Do not exceed max_files files. +- Avoid token overload by minimizing broad or redundant reads. +- Write clear, concise, and accurate documentation based only on retrieved evidence. + +Decision Rule: +Before each file read, ask: “What specific question am I trying to answer from this file?” +If that question is not specific, search first instead of reading. ` } From 2a6e856b13e6d7fbc0a5c9de9c748e8a089b1503 Mon Sep 17 00:00:00 2001 From: ATMackay Date: Thu, 19 Mar 2026 00:35:55 +1100 Subject: [PATCH 4/4] add license --- LICENSE | 21 +++++++++++++++++++++ Makefile | 2 +- 2 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 LICENSE diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..ce09666 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2026 Alex Mackay + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. \ No newline at end of file diff --git a/Makefile b/Makefile index baf8efb..baf84ea 100644 --- a/Makefile +++ b/Makefile @@ -39,7 +39,7 @@ install: build mv $(BIN) $(GOBIN) run: build - @./$(BUILD_FOLDER)/agent-cli run documentor --repo https://github.com/ATMackay/agent.git + @./$(BUILD_FOLDER)/agent-cli run documentor --repo https://github.com/ATMackay/agent test: @mkdir -p $(COVERAGE_BUILD_FOLDER)