diff --git a/README.md b/README.md index c87cea3..f4cbd9d 100644 --- a/README.md +++ b/README.md @@ -237,6 +237,29 @@ pnpm run build # Watch mode pnpm run dev +``` + +### Debugging the TUI + +If the TUI crashes (e.g. when pressing Enter on a form field), you can capture logs to inspect the error: + +**Option 1 – stderr to file (no debug env)** +Useful to see uncaught errors and stack traces that the app writes to stderr: + +```bash +pnpm run build +pnpm run start:debug +# Reproduce the crash, then: +cat debug.log +``` + +**Option 2 – run under Node with inspector** +To get a stack trace from an uncaught exception, run with Node’s inspector and reproduce the crash; the process will pause and you can inspect the stack: + +```bash +node --inspect-brk dist/cli.js +# Attach Chrome/Edge to the URL shown (e.g. chrome://inspect) and resume; reproduce the crash. +``` ## Contributing diff --git a/src/screens/BenchmarkJobCreateScreen.tsx b/src/screens/BenchmarkJobCreateScreen.tsx index 565e017..c1af08b 100644 --- a/src/screens/BenchmarkJobCreateScreen.tsx +++ b/src/screens/BenchmarkJobCreateScreen.tsx @@ -29,12 +29,20 @@ import { type OrchestratorConfig, } from "../services/benchmarkJobService.js"; import type { Benchmark } from "../store/benchmarkStore.js"; +import { getClient } from "../utils/client.js"; + +/** Secret list item for account secrets picker */ +interface SecretListItem { + id: string; + name: string; +} type FormField = | "source_type" | "benchmark" | "scenarios" | "agents" + | "secrets" | "model_names" | "name" | "agent_timeout" @@ -49,6 +57,8 @@ interface FormData { scenarioNames: string[]; agentIds: string[]; agentNames: string[]; + /** Env var name -> secret name (account secret) */ + secretsMapping: Record; /** Comma-separated model names (one per agent, or one value applied to all) */ modelNamesInput: string; name: string; @@ -61,6 +71,9 @@ type ScreenState = | "picking_benchmark" | "picking_scenarios" | "picking_agents" + | "secrets_config" + | "picking_secret" + | "entering_env_var" | "creating" | "success" | "error"; @@ -80,6 +93,193 @@ interface BenchmarkJobCreateScreenProps { cloneConcurrentTrials?: string; } +/** + * Secrets config sub-screen: list mappings, Add, Done. Handles its own input so hooks are stable. + */ +function SecretsConfigView({ + mappingEntries, + selectedIndex, + onSelectIndex, + onAdd, + onDone, + onRemove, + onBack, +}: { + mappingEntries: [string, string][]; + selectedIndex: number; + onSelectIndex: (i: number) => void; + onAdd: () => void; + onDone: () => void; + onRemove: (envVar: string) => void; + onBack: () => void; +}) { + const totalOptions = mappingEntries.length + 2; + const idx = Math.min(selectedIndex, Math.max(0, totalOptions - 1)); + + useInput((_input, key) => { + if (key.upArrow && idx > 0) { + onSelectIndex(idx - 1); + } else if (key.downArrow && idx < totalOptions - 1) { + onSelectIndex(idx + 1); + } else if (key.return) { + if (idx === mappingEntries.length) { + onAdd(); + } else if (idx === mappingEntries.length + 1) { + onDone(); + } else { + const keyToRemove = mappingEntries[idx][0]; + onRemove(keyToRemove); + onSelectIndex(Math.max(0, idx - 1)); + } + } else if (key.escape) { + onBack(); + } + }); + + return ( + <> + + + + + {figures.pointer} Secrets (env → secret) + + + {mappingEntries.map(([envVar, secretName], i) => ( + + + + {idx === i ? figures.pointer : " "} + + + + {envVar} → {secretName} + + {idx === i && ( + + {" "} + Enter to remove + + )} + + ))} + + + + {idx === mappingEntries.length ? figures.pointer : " "} + + + + + Add secret + + + + + + {idx === mappingEntries.length + 1 ? figures.pointer : " "} + + + + Done + + + + + + ); +} + +/** + * Inline view to enter env var name for a selected secret + * Pre-fills with secret name so Enter uses it as-is; user can edit if needed. + */ +function EnvVarInputView({ + secretName, + onSubmit, + onCancel, +}: { + secretName: string; + onSubmit: (value: string) => void; + onCancel: () => void; +}) { + const [value, setValue] = React.useState(secretName); + useInput((_input, key) => { + if (key.return) { + onSubmit(value.trim() || secretName); + } else if (key.escape) { + onCancel(); + } + }); + return ( + <> + + + + + Env var name for secret "{secretName}": + + + + onSubmit(value.trim() || secretName)} + /> + + + + + ); +} + /** * Success screen component with input handling */ @@ -174,16 +374,28 @@ export function BenchmarkJobCreateScreen({ const [formData, setFormData] = React.useState(() => { let modelNamesInput = ""; + let secretsMapping: Record = {}; try { if (cloneAgentConfigs) { const arr = JSON.parse(cloneAgentConfigs) as Array<{ modelName?: string | null; model_name?: string | null; + secrets?: Record; + secret_names?: Record; }>; modelNamesInput = arr .map((a) => a.modelName ?? a.model_name ?? "") .filter(Boolean) .join(", "); + // Merge secrets from all agent configs into one mapping (clone prefill) + const allSecrets = arr + .map((a) => a.secrets ?? a.secret_names) + .filter( + (s): s is Record => !!s && typeof s === "object", + ); + if (allSecrets.length > 0) { + secretsMapping = Object.assign({}, ...allSecrets); + } } } catch { // ignore invalid JSON @@ -196,6 +408,7 @@ export function BenchmarkJobCreateScreen({ scenarioNames: [], agentIds: cloneAgentIds ? cloneAgentIds.split(",") : [], agentNames: cloneAgentNames ? cloneAgentNames.split(",") : [], + secretsMapping, modelNamesInput, name: cloneJobName ? `${cloneJobName} (clone)` : "", agentTimeout: cloneAgentTimeout || "", @@ -205,6 +418,14 @@ export function BenchmarkJobCreateScreen({ const [createdJob, setCreatedJob] = React.useState(null); const [error, setError] = React.useState(null); + /** When adding a secret: selected secret awaiting env var name */ + const [pendingSecretForEnv, setPendingSecretForEnv] = React.useState<{ + id: string; + name: string; + } | null>(null); + /** In secrets_config, index of mapping row selected for removal (or -1 for Add/Done) */ + const [secretsConfigSelectedIndex, setSecretsConfigSelectedIndex] = + React.useState(0); // Handle Ctrl+C to exit useExitOnCtrlC(); @@ -288,6 +509,16 @@ export function BenchmarkJobCreateScreen({ required: true, description: "Select one or more agents to run", }, + { + key: "secrets", + label: "Secrets (env → secret)", + type: "picker", + required: false, + description: + cloneFromJobId && Object.keys(formData.secretsMapping).length === 0 + ? "Optional. The API does not return secrets on job fetch; add any needed env→secret mappings here." + : "Optional. Map environment variable names to account secrets.", + }, { key: "model_names", label: "Model names (comma-separated, optional)", @@ -464,6 +695,55 @@ export function BenchmarkJobCreateScreen({ [fetchAgentsPage], ); + // Fetch account secrets for picker (client-side pagination) + const fetchSecretsPage = React.useCallback( + async (params: { limit: number; startingAt?: string; search?: string }) => { + const client = getClient(); + const result = await client.secrets.list({ limit: 5000 }); + const raw = (result.secrets || []) as Array<{ id: string; name: string }>; + let items = raw.map((s) => ({ id: s.id, name: s.name || s.id })); + if (params.search) { + const q = params.search.toLowerCase(); + items = items.filter( + (s) => + s.name.toLowerCase().includes(q) || s.id.toLowerCase().includes(q), + ); + } + const startIdx = params.startingAt + ? items.findIndex((s) => s.id === params.startingAt) + 1 + : 0; + const page = items.slice(startIdx, startIdx + params.limit); + return { + items: page, + hasMore: startIdx + params.limit < items.length, + totalCount: items.length, + }; + }, + [], + ); + + const secretPickerConfig = React.useMemo( + () => ({ + title: "Select Secret", + fetchPage: fetchSecretsPage, + getItemId: (s: SecretListItem) => s.id, + getItemLabel: (s: SecretListItem) => s.name, + getItemStatus: () => undefined, + mode: "single" as const, + minSelection: 1, + emptyMessage: "No secrets found", + searchPlaceholder: "Search secrets...", + breadcrumbItems: [ + { label: "Home" }, + { label: "Benchmarks" }, + { label: "Jobs" }, + { label: "Create" }, + { label: "Select Secret", active: true }, + ], + }), + [fetchSecretsPage], + ); + // Handle benchmark selection (single) const handleBenchmarkSelect = React.useCallback((items: Benchmark[]) => { if (items.length > 0) { @@ -497,6 +777,37 @@ export function BenchmarkJobCreateScreen({ setScreenState("form"); }, []); + // After picking a secret: set pending and go to env var input + const handleSecretSelect = React.useCallback((items: SecretListItem[]) => { + if (items.length > 0) { + const s = items[0]; + setPendingSecretForEnv({ id: s.id, name: s.name }); + setScreenState("entering_env_var"); + } else { + setScreenState("secrets_config"); + } + }, []); + + // After entering env var for pending secret: add mapping and return to secrets_config + // If envVarName is empty, use secret name as-is for the mapping (env var name = secret name). + const handleEnvVarForSecretSubmit = React.useCallback( + (envVarName: string) => { + const envVarToUse = envVarName.trim() || pendingSecretForEnv?.name || ""; + if (envVarToUse && pendingSecretForEnv) { + setFormData((prev) => ({ + ...prev, + secretsMapping: { + ...prev.secretsMapping, + [envVarToUse]: pendingSecretForEnv.name, + }, + })); + } + setPendingSecretForEnv(null); + setScreenState("secrets_config"); + }, + [pendingSecretForEnv], + ); + // Handle create const handleCreate = React.useCallback(async () => { if (!isFormValid) return; @@ -545,6 +856,13 @@ export function BenchmarkJobCreateScreen({ }); } + // Form secrets are source of truth: apply to all agents + if (Object.keys(formData.secretsMapping).length > 0) { + for (const config of agentConfigs) { + config.secrets = { ...formData.secretsMapping }; + } + } + // Use cloned orchestrator config if available, otherwise build from form let orchestratorConfig: OrchestratorConfig | undefined; if (cloneOrchestratorConfig) { @@ -617,6 +935,12 @@ export function BenchmarkJobCreateScreen({ currentField === "agents" ) { setScreenState("picking_agents"); + } else if ( + currentFieldDef?.type === "picker" && + currentField === "secrets" + ) { + setScreenState("secrets_config"); + setSecretsConfigSelectedIndex(0); } else if ( currentFieldDef?.type === "action" && currentField === "create" @@ -629,6 +953,54 @@ export function BenchmarkJobCreateScreen({ } }); + // ----- Secrets sub-flow ----- + const mappingEntries = Object.entries(formData.secretsMapping); + + if (screenState === "secrets_config") { + return ( + setScreenState("picking_secret")} + onDone={() => setScreenState("form")} + onRemove={(envVar) => { + setFormData((prev) => { + const next = { ...prev.secretsMapping }; + delete next[envVar]; + return { ...prev, secretsMapping: next }; + }); + setSecretsConfigSelectedIndex((i) => Math.max(0, i - 1)); + }} + onBack={() => setScreenState("form")} + /> + ); + } + + if (screenState === "entering_env_var" && pendingSecretForEnv) { + return ( + handleEnvVarForSecretSubmit(val)} + onCancel={() => { + setPendingSecretForEnv(null); + setScreenState("secrets_config"); + }} + /> + ); + } + + if (screenState === "picking_secret") { + return ( + + config={secretPickerConfig} + onSelect={handleSecretSelect} + onCancel={() => setScreenState("secrets_config")} + initialSelected={[]} + /> + ); + } + // Show benchmark picker (single-select) if (screenState === "picking_benchmark") { return ( @@ -750,6 +1122,13 @@ export function BenchmarkJobCreateScreen({ if (formData.agentNames.length === 0) return ""; if (formData.agentNames.length === 1) return formData.agentNames[0]; return `${formData.agentNames.length} agents selected`; + case "secrets": { + const keys = Object.keys(formData.secretsMapping); + if (keys.length === 0) return ""; + if (keys.length === 1) + return `${keys[0]} → ${formData.secretsMapping[keys[0]]}`; + return `${keys.length} mappings`; + } case "model_names": return formData.modelNamesInput; case "name": diff --git a/src/screens/BenchmarkJobDetailScreen.tsx b/src/screens/BenchmarkJobDetailScreen.tsx index d71ce72..5d0ae47 100644 --- a/src/screens/BenchmarkJobDetailScreen.tsx +++ b/src/screens/BenchmarkJobDetailScreen.tsx @@ -633,15 +633,24 @@ export function BenchmarkJobDetailScreen({ // Extract agent configs - both full configs and legacy fields if (resource.job_spec?.agent_configs) { - const agentConfigs = resource.job_spec.agent_configs.map((a: any) => ({ - agentId: a.agent_id, - name: a.name, - modelName: a.model_name, - timeoutSeconds: a.timeout_seconds, - kwargs: a.kwargs, - environmentVariables: a.agent_environment?.environment_variables, - secrets: a.agent_environment?.secrets, - })); + const agentConfigs = resource.job_spec.agent_configs.map((a: any) => { + const env = a.agent_environment; + const secrets = + env?.secrets ?? + env?.secret_names ?? + (typeof env?.secret_refs === "object" && env.secret_refs + ? env.secret_refs + : undefined); + return { + agentId: a.agent_id, + name: a.name, + modelName: a.model_name, + timeoutSeconds: a.timeout_seconds, + kwargs: a.kwargs, + environmentVariables: env?.environment_variables, + secrets, + }; + }); cloneParams.cloneAgentConfigs = JSON.stringify(agentConfigs); // Also extract legacy fields for form initialization diff --git a/src/screens/BenchmarkJobListScreen.tsx b/src/screens/BenchmarkJobListScreen.tsx index 83475e0..8bd0b56 100644 --- a/src/screens/BenchmarkJobListScreen.tsx +++ b/src/screens/BenchmarkJobListScreen.tsx @@ -318,16 +318,24 @@ export function BenchmarkJobListScreen() { // Extract agent configs - both full configs and legacy fields if (selectedJob.job_spec?.agent_configs) { const agentConfigs = selectedJob.job_spec.agent_configs.map( - (a: any) => ({ - agentId: a.agent_id, - name: a.name, - modelName: a.model_name, - timeoutSeconds: a.timeout_seconds, - kwargs: a.kwargs, - environmentVariables: - a.agent_environment?.environment_variables, - secrets: a.agent_environment?.secrets, - }), + (a: any) => { + const env = a.agent_environment; + const secrets = + env?.secrets ?? + env?.secret_names ?? + (typeof env?.secret_refs === "object" && env.secret_refs + ? env.secret_refs + : undefined); + return { + agentId: a.agent_id, + name: a.name, + modelName: a.model_name, + timeoutSeconds: a.timeout_seconds, + kwargs: a.kwargs, + environmentVariables: env?.environment_variables, + secrets, + }; + }, ); cloneParams.cloneAgentConfigs = JSON.stringify(agentConfigs); @@ -396,15 +404,24 @@ export function BenchmarkJobListScreen() { // Extract agent configs - both full configs and legacy fields if (selectedJob.job_spec?.agent_configs) { const agentConfigs = selectedJob.job_spec.agent_configs.map( - (a: any) => ({ - agentId: a.agent_id, - name: a.name, - modelName: a.model_name, - timeoutSeconds: a.timeout_seconds, - kwargs: a.kwargs, - environmentVariables: a.agent_environment?.environment_variables, - secrets: a.agent_environment?.secrets, - }), + (a: any) => { + const env = a.agent_environment; + const secrets = + env?.secrets ?? + env?.secret_names ?? + (typeof env?.secret_refs === "object" && env.secret_refs + ? env.secret_refs + : undefined); + return { + agentId: a.agent_id, + name: a.name, + modelName: a.model_name, + timeoutSeconds: a.timeout_seconds, + kwargs: a.kwargs, + environmentVariables: env?.environment_variables, + secrets, + }; + }, ); cloneParams.cloneAgentConfigs = JSON.stringify(agentConfigs); @@ -504,15 +521,24 @@ export function BenchmarkJobListScreen() { // Extract agent configs - both full configs and legacy fields if (selectedJob.job_spec?.agent_configs) { const agentConfigs = selectedJob.job_spec.agent_configs.map( - (a: any) => ({ - agentId: a.agent_id, - name: a.name, - modelName: a.model_name, - timeoutSeconds: a.timeout_seconds, - kwargs: a.kwargs, - environmentVariables: a.agent_environment?.environment_variables, - secrets: a.agent_environment?.secrets, - }), + (a: any) => { + const env = a.agent_environment; + const secrets = + env?.secrets ?? + env?.secret_names ?? + (typeof env?.secret_refs === "object" && env.secret_refs + ? env.secret_refs + : undefined); + return { + agentId: a.agent_id, + name: a.name, + modelName: a.model_name, + timeoutSeconds: a.timeout_seconds, + kwargs: a.kwargs, + environmentVariables: env?.environment_variables, + secrets, + }; + }, ); cloneParams.cloneAgentConfigs = JSON.stringify(agentConfigs); diff --git a/src/screens/BenchmarkListScreen.tsx b/src/screens/BenchmarkListScreen.tsx index fe953b3..0c6d35c 100644 --- a/src/screens/BenchmarkListScreen.tsx +++ b/src/screens/BenchmarkListScreen.tsx @@ -24,7 +24,10 @@ import { useViewportHeight } from "../hooks/useViewportHeight.js"; import { useExitOnCtrlC } from "../hooks/useExitOnCtrlC.js"; import { useCursorPagination } from "../hooks/useCursorPagination.js"; import { useListSearch } from "../hooks/useListSearch.js"; -import { listBenchmarks } from "../services/benchmarkService.js"; +import { + listBenchmarks, + listPublicBenchmarks, +} from "../services/benchmarkService.js"; import type { Benchmark } from "../store/benchmarkStore.js"; export function BenchmarkListScreen() { @@ -33,6 +36,7 @@ export function BenchmarkListScreen() { const [selectedIndex, setSelectedIndex] = React.useState(0); const [showPopup, setShowPopup] = React.useState(false); const [selectedOperation, setSelectedOperation] = React.useState(0); + const [showPublic, setShowPublic] = React.useState(false); // Search state const search = useListSearch({ @@ -61,7 +65,8 @@ export function BenchmarkListScreen() { // Fetch function for pagination hook const fetchPage = React.useCallback( async (params: { limit: number; startingAt?: string }) => { - const result = await listBenchmarks({ + const listFn = showPublic ? listPublicBenchmarks : listBenchmarks; + const result = await listFn({ limit: params.limit, startingAfter: params.startingAt, search: search.submittedSearchQuery || undefined, @@ -73,7 +78,7 @@ export function BenchmarkListScreen() { totalCount: result.totalCount, }; }, - [search.submittedSearchQuery], + [showPublic, search.submittedSearchQuery], ); // Use the shared pagination hook @@ -94,7 +99,7 @@ export function BenchmarkListScreen() { getItemId: (benchmark: Benchmark) => benchmark.id, pollInterval: 5000, pollingEnabled: !showPopup && !search.searchMode, - deps: [PAGE_SIZE, search.submittedSearchQuery], + deps: [PAGE_SIZE, search.submittedSearchQuery, showPublic], }); // Operations for benchmarks @@ -271,6 +276,9 @@ export function BenchmarkListScreen() { }); } else if (input === "/") { search.enterSearchMode(); + } else if (input === "t") { + setShowPublic((prev) => !prev); + setSelectedIndex(0); } else if (key.escape) { if (search.handleEscape()) { return; @@ -339,11 +347,11 @@ export function BenchmarkListScreen() { data={benchmarks} keyExtractor={(benchmark: Benchmark) => benchmark.id} selectedIndex={selectedIndex} - title={`benchmarks[${totalCount}]`} + title={`benchmarks[${totalCount}] ${showPublic ? "(public)" : "(private)"}`} columns={columns} emptyState={ - {figures.info} No benchmarks found + {figures.info} No {showPublic ? "public " : ""}benchmarks found } /> @@ -359,6 +367,13 @@ export function BenchmarkListScreen() { {" "} total + + {" "} + • {showPublic ? "Public" : "Custom"} + {totalPages > 1 && ( <> @@ -421,6 +436,7 @@ export function BenchmarkListScreen() { { key: "Enter", label: "Details" }, { key: "c", label: "Create Job" }, { key: "a", label: "Actions" }, + { key: "t", label: showPublic ? "Custom" : "Public" }, { key: "/", label: "Search" }, { key: "Esc", label: "Back" }, ]} diff --git a/src/services/benchmarkService.ts b/src/services/benchmarkService.ts index e637346..d1aabb8 100644 --- a/src/services/benchmarkService.ts +++ b/src/services/benchmarkService.ts @@ -214,7 +214,7 @@ export async function listPublicBenchmarks( } /** - * Create/start a benchmark run with selected benchmarks + * Create/start a benchmark run with selected benchmarks (POST /v1/benchmark_runs) */ export async function createBenchmarkRun( benchmarkIds: string[],