Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .github/instructions/update_version.instructions.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
---
applyTo: '**'
---
After a code change, update the version
After a code change, update the version.
If updating in /docs, do not increment the version.

Example
Before Code Changes
Expand Down
2 changes: 1 addition & 1 deletion application/single_app/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@
EXECUTOR_TYPE = 'thread'
EXECUTOR_MAX_WORKERS = 30
SESSION_TYPE = 'filesystem'
VERSION = "0.241.004"
VERSION = "0.241.006"

SECRET_KEY = os.getenv('SECRET_KEY', 'dev-secret-key-change-in-production')

Expand Down
47 changes: 41 additions & 6 deletions application/single_app/foundry_agent_runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,14 @@ class FoundryAgentInvocationError(RuntimeError):
"""Raised when the Foundry agent invocation cannot be completed."""


def _normalize_max_completion_tokens(value: Any) -> Optional[int]:
try:
normalized = int(value)
except (TypeError, ValueError):
return None
return normalized if normalized > 0 else None


class AzureAIFoundryChatCompletionAgent:
"""Lightweight wrapper so Foundry agents behave like SK chat agents."""

Expand Down Expand Up @@ -107,6 +115,7 @@ def invoke(
global_settings=self._global_settings,
message_history=history,
metadata=metadata,
max_completion_tokens=self.max_completion_tokens,
)
)
except RuntimeError:
Expand Down Expand Up @@ -145,6 +154,7 @@ async def invoke_stream(
global_settings=self._global_settings,
message_history=list(messages),
metadata={},
max_completion_tokens=self.max_completion_tokens,
)
self.last_run_citations = result.citations
self.last_run_model = result.model
Expand Down Expand Up @@ -194,6 +204,7 @@ def invoke(
global_settings=self._global_settings,
message_history=history,
metadata=metadata,
max_completion_tokens=self.max_completion_tokens,
)
)
self.last_run_citations = result.citations
Expand All @@ -211,6 +222,7 @@ async def invoke_stream(
global_settings=self._global_settings,
message_history=list(messages),
metadata={},
max_completion_tokens=self.max_completion_tokens,
):
if stream_message.metadata:
citations = stream_message.metadata.get("citations")
Expand All @@ -228,6 +240,7 @@ async def execute_foundry_agent(
global_settings: Dict[str, Any],
message_history: List[ChatMessageContent],
metadata: Dict[str, Any],
max_completion_tokens: Optional[int] = None,
) -> FoundryAgentInvocationResult:
"""Invoke a Foundry agent using Semantic Kernel's AzureAIAgent abstraction."""

Expand All @@ -248,15 +261,20 @@ async def execute_foundry_agent(
endpoint=endpoint,
api_version=api_version,
)
resolved_max_completion_tokens = _normalize_max_completion_tokens(max_completion_tokens)

try:
definition = await client.agents.get_agent(agent_id)
azure_agent = AzureAIAgent(client=client, definition=definition)
responses = []
async for response in azure_agent.invoke(
messages=message_history,
metadata={k: str(v) for k, v in metadata.items() if v is not None},
):
invoke_kwargs = {
"messages": message_history,
"metadata": {k: str(v) for k, v in metadata.items() if v is not None},
}
if resolved_max_completion_tokens is not None:
invoke_kwargs["max_completion_tokens"] = resolved_max_completion_tokens

async for response in azure_agent.invoke(**invoke_kwargs):
responses.append(response)

if not responses:
Expand Down Expand Up @@ -299,6 +317,7 @@ async def execute_foundry_agent(
"endpoint": endpoint,
"model": model_value,
"message_length": len(text or ""),
"max_completion_tokens": resolved_max_completion_tokens,
},
)

Expand All @@ -321,6 +340,7 @@ async def execute_new_foundry_agent(
global_settings: Dict[str, Any],
message_history: List[ChatMessageContent],
metadata: Dict[str, Any],
max_completion_tokens: Optional[int] = None,
) -> FoundryAgentInvocationResult:
"""Invoke the new Foundry application runtime through its Responses protocol endpoint."""

Expand All @@ -343,7 +363,12 @@ async def execute_new_foundry_agent(
f"{endpoint.rstrip('/')}/applications/{quote(application_name, safe='')}/"
"protocols/openai/responses"
)
payload = _build_new_foundry_request_payload(message_history, metadata, stream=False)
payload = _build_new_foundry_request_payload(
message_history,
metadata,
stream=False,
max_output_tokens=_normalize_max_completion_tokens(max_completion_tokens),
)
headers = {
"Authorization": f"Bearer {token.token}",
"Content-Type": "application/json",
Expand Down Expand Up @@ -376,6 +401,7 @@ async def execute_new_foundry_agent(
"endpoint": endpoint,
"model": result.model,
"message_length": len(result.message),
"max_output_tokens": payload.get("max_output_tokens"),
},
)

Expand All @@ -390,6 +416,7 @@ async def execute_new_foundry_agent_stream(
global_settings: Dict[str, Any],
message_history: List[ChatMessageContent],
metadata: Dict[str, Any],
max_completion_tokens: Optional[int] = None,
) -> AsyncIterator[FoundryAgentStreamMessage]:
"""Stream a new Foundry application response through the Responses API."""

Expand All @@ -413,7 +440,12 @@ async def execute_new_foundry_agent_stream(
"protocols/openai/responses"
)
debug_print(f"Invoking new Foundry application '{application_name}' at {endpoint} with streaming to url {url} with api-version {responses_api_version}")
payload = _build_new_foundry_request_payload(message_history, metadata, stream=True)
payload = _build_new_foundry_request_payload(
message_history,
metadata,
stream=True,
max_output_tokens=_normalize_max_completion_tokens(max_completion_tokens),
)
headers = {
"Authorization": f"Bearer {token.token}",
"Content-Type": "application/json",
Expand Down Expand Up @@ -692,6 +724,7 @@ def _build_new_foundry_request_payload(
message_history: List[ChatMessageContent],
metadata: Dict[str, Any],
stream: bool = False,
max_output_tokens: Optional[int] = None,
) -> Dict[str, Any]:
input_items: List[Dict[str, Any]] = []
for message in message_history:
Expand Down Expand Up @@ -733,6 +766,8 @@ def _build_new_foundry_request_payload(
}
if normalized_metadata:
payload["metadata"] = normalized_metadata
if max_output_tokens is not None:
payload["max_output_tokens"] = max_output_tokens
return payload


Expand Down
2 changes: 1 addition & 1 deletion application/single_app/functions_global_agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def ensure_default_global_agent_exists():
),
"actions_to_load": [],
"other_settings": {},
"max_completion_tokens": 4096
"max_completion_tokens": -1
}
save_global_agent(default_agent)
log_event(
Expand Down
136 changes: 115 additions & 21 deletions application/single_app/route_backend_chats.py
Original file line number Diff line number Diff line change
Expand Up @@ -3890,6 +3890,15 @@ def is_tabular_access_limited_analysis(analysis_text):
'do not have direct access',
"don't have",
'do not have',
"doesn't include the full",
'does not include the full',
'only sample rows',
'only workbook metadata',
'only sample rows and workbook metadata',
'cannot accurately list all',
'cannot accurately list them',
'from the current evidence',
'from the evidence provided',
'visible excerpt you provided',
'if those tool-backed results exist',
'allow me to query again',
Expand All @@ -3898,6 +3907,80 @@ def is_tabular_access_limited_analysis(analysis_text):
return any(phrase in normalized_analysis for phrase in inaccessible_phrases)


def get_tabular_result_coverage_summary(invocations):
"""Return whether successful analytical tool calls produced full or partial result coverage."""
coverage_summary = {
'has_full_result_coverage': False,
'has_partial_result_coverage': False,
}

for invocation in invocations or []:
result_payload = get_tabular_invocation_result_payload(invocation) or {}

total_matches = parse_tabular_result_count(result_payload.get('total_matches'))
returned_rows = parse_tabular_result_count(result_payload.get('returned_rows'))
if total_matches is not None and returned_rows is not None:
if returned_rows >= total_matches:
coverage_summary['has_full_result_coverage'] = True
elif returned_rows < total_matches:
coverage_summary['has_partial_result_coverage'] = True

distinct_count = parse_tabular_result_count(result_payload.get('distinct_count'))
returned_values = parse_tabular_result_count(result_payload.get('returned_values'))
if distinct_count is not None and returned_values is not None:
if returned_values >= distinct_count:
coverage_summary['has_full_result_coverage'] = True
elif returned_values < distinct_count:
coverage_summary['has_partial_result_coverage'] = True

if result_payload.get('full_rows_included') or result_payload.get('full_values_included'):
coverage_summary['has_full_result_coverage'] = True
if result_payload.get('sample_rows_limited') or result_payload.get('values_limited'):
coverage_summary['has_partial_result_coverage'] = True

if (
coverage_summary['has_full_result_coverage']
and coverage_summary['has_partial_result_coverage']
):
break

return coverage_summary


def build_tabular_success_execution_gap_messages(user_question, analysis_text, invocations):
"""Return retry guidance when a successful tabular analysis still produced an incomplete answer."""
coverage_summary = get_tabular_result_coverage_summary(invocations)
has_full_result_coverage = coverage_summary['has_full_result_coverage']
has_partial_result_coverage = coverage_summary['has_partial_result_coverage']
wants_exhaustive_results = question_requests_tabular_exhaustive_results(user_question)
execution_gap_messages = []

if is_tabular_access_limited_analysis(analysis_text):
if wants_exhaustive_results and has_full_result_coverage:
execution_gap_messages.append(
'Previous attempt still claimed only sample rows or workbook metadata were available even though successful analytical tool calls returned the full matching result set. Answer directly from those returned rows and list the full results the user asked for.'
)
elif has_full_result_coverage:
execution_gap_messages.append(
'Previous attempt still claimed the requested data was unavailable even though successful analytical tool calls returned the full matching result set. Use the returned rows and answer directly.'
)
else:
execution_gap_messages.append(
'Previous attempt still claimed the requested data was unavailable even though analytical tool calls succeeded. Use the returned rows and answer directly.'
)

if (
wants_exhaustive_results
and has_partial_result_coverage
and not has_full_result_coverage
):
execution_gap_messages.append(
'The user asked for a full list, but previous analytical calls returned only a partial slice. Rerun the relevant analytical call with a higher max_rows or max_values before answering.'
)

return execution_gap_messages


def _select_likely_workbook_sheet(sheet_names, question_text, per_sheet=None, score_match_fn=None):
"""Return a likely sheet name when the user question strongly matches one sheet."""
score_match_fn = score_match_fn or _score_tabular_sheet_match
Expand Down Expand Up @@ -4408,7 +4491,8 @@ def build_system_prompt(force_tool_use=False, tool_error_messages=None,
"12. Summarize concrete found records sheet-by-sheet using the tool results, not schema placeholders.\n"
"13. For count or percentage questions involving a cohort defined on one sheet and facts on another, prefer get_distinct_values, count_rows, filter_rows_by_related_values, or count_rows_by_related_values over manually counting sampled rows.\n"
"14. Use normalize_match=true when matching names, owners, assignees, engineers, or similar entity-text columns across worksheets.\n"
"15. Do not mention hypothetical follow-up analyses, parser errors, or failed attempts unless the user explicitly asked about failures and you have actual tool error output to report."
"15. If a successful tool result reports returned_rows == total_matches or returned_values == distinct_count, treat that as the full matching result set. Do not claim that only sample rows or workbook metadata are available in that case.\n"
"16. Do not mention hypothetical follow-up analyses, parser errors, or failed attempts unless the user explicitly asked about failures and you have actual tool error output to report."
)

return (
Expand Down Expand Up @@ -4461,8 +4545,9 @@ def build_system_prompt(force_tool_use=False, tool_error_messages=None,
"22. For identifier-based workbook questions, locate the identifier on the correct sheet before explaining downstream calculations.\n"
"23. For peak, busiest, highest, or lowest questions, use grouped functions and inspect the highest_group, highest_value, lowest_group, and lowest_value summary fields.\n"
"24. Return only computed findings and name the strongest drivers clearly.\n"
"25. Do not mention hypothetical follow-up analyses, parser errors, or failed attempts unless the user explicitly asked about failures and you have actual tool error output to report.\n"
"26. When using query_tabular_data, use simple DataFrame.query() syntax with backticked column names for columns containing spaces. Avoid method calls such as .str.lower(), .astype(...), or other Python expressions that DataFrame.query() may reject."
"25. If a successful tool result reports returned_rows == total_matches or returned_values == distinct_count, treat that as the full matching result set. Do not claim that only sample rows or workbook metadata are available in that case.\n"
"26. Do not mention hypothetical follow-up analyses, parser errors, or failed attempts unless the user explicitly asked about failures and you have actual tool error output to report.\n"
"27. When using query_tabular_data, use simple DataFrame.query() syntax with backticked column names for columns containing spaces. Avoid method calls such as .str.lower(), .astype(...), or other Python expressions that DataFrame.query() may reject."
)

baseline_invocations = plugin_logger.get_invocations_for_conversation(
Expand Down Expand Up @@ -4631,10 +4716,19 @@ def build_system_prompt(force_tool_use=False, tool_error_messages=None,
previous_tool_error_messages = []
previous_failed_call_parameters = []
previous_discovery_feedback_messages = []
execution_gap_messages = []
selected_sheets = []
coverage_summary = get_tabular_result_coverage_summary(
successful_analytical_invocations
)
retry_gap_messages = build_tabular_success_execution_gap_messages(
user_question,
analysis,
successful_analytical_invocations,
)

if entity_lookup_mode:
selected_sheets = get_tabular_invocation_selected_sheets(successful_analytical_invocations)
execution_gap_messages = []

# Cross-sheet results ("ALL (cross-sheet search)") already span
# the entire workbook — no execution gap for sheet coverage.
Expand All @@ -4648,24 +4742,24 @@ def build_system_prompt(force_tool_use=False, tool_error_messages=None,
f"Previous attempt only queried worksheet(s): {rendered_selected_sheets}. The question asks for related records across worksheets, so query additional relevant sheets explicitly with sheet_name."
)

if is_tabular_access_limited_analysis(analysis):
execution_gap_messages.append(
'Previous attempt still claimed the requested data was unavailable even though analytical tool calls succeeded. Use the returned rows and answer directly.'
)
execution_gap_messages.extend(retry_gap_messages)

if execution_gap_messages and attempt_number < 3:
previous_execution_gap_messages = execution_gap_messages
log_event(
f"[Tabular SK Analysis] Attempt {attempt_number} entity lookup was incomplete despite successful tool calls; retrying",
extra={
'selected_sheets': selected_sheets,
'execution_gaps': previous_execution_gap_messages,
'successful_tool_count': len(successful_analytical_invocations),
},
level=logging.WARNING,
)
baseline_invocation_count = len(invocations_after)
continue
if execution_gap_messages and attempt_number < 3:
previous_execution_gap_messages = execution_gap_messages
log_event(
f"[Tabular SK Analysis] Attempt {attempt_number} analysis was incomplete despite successful tool calls; retrying",
extra={
'selected_sheets': selected_sheets,
'execution_gaps': previous_execution_gap_messages,
'successful_tool_count': len(successful_analytical_invocations),
'has_full_result_coverage': coverage_summary.get('has_full_result_coverage', False),
'has_partial_result_coverage': coverage_summary.get('has_partial_result_coverage', False),
'entity_lookup_mode': entity_lookup_mode,
},
level=logging.WARNING,
)
baseline_invocation_count = len(invocations_after)
continue

previous_execution_gap_messages = []
log_event(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@
"type": "integer",
"minimum": -1,
"maximum": 512000,
"default": 4096
"default": -1
}
},
"required": [
Expand Down
3 changes: 0 additions & 3 deletions docs/_includes/latest_release_card.html
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,6 @@ <h3 class="latest-release-card-title">

<div class="latest-release-card-actions">
<a class="btn btn-primary btn-sm" href="{{ feature.url | relative_url }}">{{ feature.cta_label | default: 'Read guide' }}</a>
{% if primary_image %}
<a class="btn btn-outline-secondary btn-sm" href="{{ primary_image | relative_url }}" target="_blank" rel="noopener">Open image</a>
{% endif %}
</div>
</div>
</article>
Loading
Loading