diff --git a/.github/instructions/update_version.instructions.md b/.github/instructions/update_version.instructions.md index db322826..6df1876e 100644 --- a/.github/instructions/update_version.instructions.md +++ b/.github/instructions/update_version.instructions.md @@ -1,7 +1,8 @@ --- applyTo: '**' --- -After a code change, update the version +After a code change, update the version. +If updating in /docs, do not increment the version. Example Before Code Changes diff --git a/.github/workflows/release-notes-check.yml b/.github/workflows/release-notes-check.yml index 9a9f0d1f..4c88702f 100644 --- a/.github/workflows/release-notes-check.yml +++ b/.github/workflows/release-notes-check.yml @@ -25,15 +25,10 @@ jobs: uses: tj-actions/changed-files@v46.0.1 with: files_yaml: | - code: - - 'application/single_app/**/*.py' - - 'application/single_app/**/*.js' - - 'application/single_app/**/*.html' - - 'application/single_app/**/*.css' + application: + - 'application/**' release_notes: - 'docs/explanation/release_notes.md' - config: - - 'application/single_app/config.py' - name: Check for feature/fix keywords in PR id: check-keywords @@ -66,8 +61,7 @@ jobs: - name: Determine if release notes update is required id: require-notes env: - CODE_CHANGED: ${{ steps.changed-files.outputs.code_any_changed }} - CONFIG_CHANGED: ${{ steps.changed-files.outputs.config_any_changed }} + APPLICATION_CHANGED: ${{ steps.changed-files.outputs.application_any_changed }} RELEASE_NOTES_CHANGED: ${{ steps.changed-files.outputs.release_notes_any_changed }} HAS_FEATURE: ${{ steps.check-keywords.outputs.has_feature }} HAS_FIX: ${{ steps.check-keywords.outputs.has_fix }} @@ -76,8 +70,7 @@ jobs: echo "================================" echo "πŸ“‹ PR Analysis Summary" echo "================================" - echo "Code files changed: $CODE_CHANGED" - echo "Config changed: $CONFIG_CHANGED" + echo "Application files changed: $APPLICATION_CHANGED" echo "Release notes updated: $RELEASE_NOTES_CHANGED" echo "Feature keywords found: $HAS_FEATURE" echo "Fix keywords found: $HAS_FIX" @@ -88,15 +81,14 @@ jobs: needs_notes="false" reason="" - if [[ "$HAS_FEATURE" == "true" ]]; then - needs_notes="true" - reason="Feature-related keywords detected in PR title/body" - elif [[ "$HAS_FIX" == "true" ]]; then - needs_notes="true" - reason="Fix-related keywords detected in PR title/body" - elif [[ "$CODE_CHANGED" == "true" && "$CONFIG_CHANGED" == "true" ]]; then - needs_notes="true" - reason="Both code and config.py were modified" + if [[ "$APPLICATION_CHANGED" == "true" ]]; then + if [[ "$HAS_FEATURE" == "true" ]]; then + needs_notes="true" + reason="Feature-related keywords detected and files under application/ changed" + elif [[ "$HAS_FIX" == "true" ]]; then + needs_notes="true" + reason="Fix-related keywords detected and files under application/ changed" + fi fi echo "needs_notes=$needs_notes" >> $GITHUB_OUTPUT @@ -104,11 +96,11 @@ jobs: - name: Validate release notes update env: - CODE_CHANGED: ${{ steps.changed-files.outputs.code_any_changed }} + APPLICATION_CHANGED: ${{ steps.changed-files.outputs.application_any_changed }} RELEASE_NOTES_CHANGED: ${{ steps.changed-files.outputs.release_notes_any_changed }} NEEDS_NOTES: ${{ steps.require-notes.outputs.needs_notes }} REASON: ${{ steps.require-notes.outputs.reason }} - CODE_FILES: ${{ steps.changed-files.outputs.code_all_changed_files }} + APPLICATION_FILES: ${{ steps.changed-files.outputs.application_all_changed_files }} run: | echo "" @@ -122,8 +114,8 @@ jobs: echo "This PR appears to contain changes that should be documented" echo "in the release notes (docs/explanation/release_notes.md)." echo "" - echo "πŸ“ Code files changed:" - echo "$CODE_FILES" | tr ' ' '\n' | sed 's/^/ - /' + echo "πŸ“ Application files changed:" + echo "$APPLICATION_FILES" | tr ' ' '\n' | sed 's/^/ - /' echo "" echo "πŸ’‘ Please consider adding an entry to release_notes.md describing:" echo " β€’ New features added" @@ -138,8 +130,8 @@ jobs: exit 0 elif [[ "$RELEASE_NOTES_CHANGED" == "true" ]]; then echo "βœ… Release notes have been updated - great job!" - elif [[ "$CODE_CHANGED" != "true" ]]; then - echo "ℹ️ No significant code changes detected - release notes update not required." + elif [[ "$APPLICATION_CHANGED" != "true" ]]; then + echo "ℹ️ No files under application/ changed - release notes update not required." else echo "ℹ️ Changes appear to be minor - release notes update optional." fi diff --git a/application/external_apps/databaseseeder/artifacts/admin_settings.json b/application/external_apps/databaseseeder/artifacts/admin_settings.json index 897285cd..24c1c7ea 100644 --- a/application/external_apps/databaseseeder/artifacts/admin_settings.json +++ b/application/external_apps/databaseseeder/artifacts/admin_settings.json @@ -119,14 +119,17 @@ "video_indexer_endpoint": "https://api.videoindexer.ai", "video_indexer_location": "", "video_indexer_account_id": "", - "video_indexer_api_key": "", "video_indexer_resource_group": "", "video_indexer_subscription_id": "", "video_indexer_account_name": "", - "video_indexer_arm_api_version": "2021-11-10-preview", + "video_indexer_arm_api_version": "2025-04-01", "video_index_timeout": 600, "speech_service_endpoint": "https://eastus.api.cognitive.microsoft.com", "speech_service_location": "eastus", + "speech_service_subscription_id": "", + "speech_service_resource_group": "", + "speech_service_resource_name": "", + "speech_service_resource_id": "", "speech_service_locale": "en-US", "speech_service_key": "", "classification_banner_enabled": true, diff --git a/application/single_app/config.py b/application/single_app/config.py index 0f46400c..7196cfe8 100644 --- a/application/single_app/config.py +++ b/application/single_app/config.py @@ -94,7 +94,7 @@ EXECUTOR_TYPE = 'thread' EXECUTOR_MAX_WORKERS = 30 SESSION_TYPE = 'filesystem' -VERSION = "0.241.004" +VERSION = "0.241.006" SECRET_KEY = os.getenv('SECRET_KEY', 'dev-secret-key-change-in-production') diff --git a/application/single_app/foundry_agent_runtime.py b/application/single_app/foundry_agent_runtime.py index 4de7f35a..0a88fb46 100644 --- a/application/single_app/foundry_agent_runtime.py +++ b/application/single_app/foundry_agent_runtime.py @@ -64,6 +64,14 @@ class FoundryAgentInvocationError(RuntimeError): """Raised when the Foundry agent invocation cannot be completed.""" +def _normalize_max_completion_tokens(value: Any) -> Optional[int]: + try: + normalized = int(value) + except (TypeError, ValueError): + return None + return normalized if normalized > 0 else None + + class AzureAIFoundryChatCompletionAgent: """Lightweight wrapper so Foundry agents behave like SK chat agents.""" @@ -107,6 +115,7 @@ def invoke( global_settings=self._global_settings, message_history=history, metadata=metadata, + max_completion_tokens=self.max_completion_tokens, ) ) except RuntimeError: @@ -145,6 +154,7 @@ async def invoke_stream( global_settings=self._global_settings, message_history=list(messages), metadata={}, + max_completion_tokens=self.max_completion_tokens, ) self.last_run_citations = result.citations self.last_run_model = result.model @@ -194,6 +204,7 @@ def invoke( global_settings=self._global_settings, message_history=history, metadata=metadata, + max_completion_tokens=self.max_completion_tokens, ) ) self.last_run_citations = result.citations @@ -211,6 +222,7 @@ async def invoke_stream( global_settings=self._global_settings, message_history=list(messages), metadata={}, + max_completion_tokens=self.max_completion_tokens, ): if stream_message.metadata: citations = stream_message.metadata.get("citations") @@ -228,6 +240,7 @@ async def execute_foundry_agent( global_settings: Dict[str, Any], message_history: List[ChatMessageContent], metadata: Dict[str, Any], + max_completion_tokens: Optional[int] = None, ) -> FoundryAgentInvocationResult: """Invoke a Foundry agent using Semantic Kernel's AzureAIAgent abstraction.""" @@ -248,15 +261,20 @@ async def execute_foundry_agent( endpoint=endpoint, api_version=api_version, ) + resolved_max_completion_tokens = _normalize_max_completion_tokens(max_completion_tokens) try: definition = await client.agents.get_agent(agent_id) azure_agent = AzureAIAgent(client=client, definition=definition) responses = [] - async for response in azure_agent.invoke( - messages=message_history, - metadata={k: str(v) for k, v in metadata.items() if v is not None}, - ): + invoke_kwargs = { + "messages": message_history, + "metadata": {k: str(v) for k, v in metadata.items() if v is not None}, + } + if resolved_max_completion_tokens is not None: + invoke_kwargs["max_completion_tokens"] = resolved_max_completion_tokens + + async for response in azure_agent.invoke(**invoke_kwargs): responses.append(response) if not responses: @@ -299,6 +317,7 @@ async def execute_foundry_agent( "endpoint": endpoint, "model": model_value, "message_length": len(text or ""), + "max_completion_tokens": resolved_max_completion_tokens, }, ) @@ -321,6 +340,7 @@ async def execute_new_foundry_agent( global_settings: Dict[str, Any], message_history: List[ChatMessageContent], metadata: Dict[str, Any], + max_completion_tokens: Optional[int] = None, ) -> FoundryAgentInvocationResult: """Invoke the new Foundry application runtime through its Responses protocol endpoint.""" @@ -343,7 +363,12 @@ async def execute_new_foundry_agent( f"{endpoint.rstrip('/')}/applications/{quote(application_name, safe='')}/" "protocols/openai/responses" ) - payload = _build_new_foundry_request_payload(message_history, metadata, stream=False) + payload = _build_new_foundry_request_payload( + message_history, + metadata, + stream=False, + max_output_tokens=_normalize_max_completion_tokens(max_completion_tokens), + ) headers = { "Authorization": f"Bearer {token.token}", "Content-Type": "application/json", @@ -376,6 +401,7 @@ async def execute_new_foundry_agent( "endpoint": endpoint, "model": result.model, "message_length": len(result.message), + "max_output_tokens": payload.get("max_output_tokens"), }, ) @@ -390,6 +416,7 @@ async def execute_new_foundry_agent_stream( global_settings: Dict[str, Any], message_history: List[ChatMessageContent], metadata: Dict[str, Any], + max_completion_tokens: Optional[int] = None, ) -> AsyncIterator[FoundryAgentStreamMessage]: """Stream a new Foundry application response through the Responses API.""" @@ -413,7 +440,12 @@ async def execute_new_foundry_agent_stream( "protocols/openai/responses" ) debug_print(f"Invoking new Foundry application '{application_name}' at {endpoint} with streaming to url {url} with api-version {responses_api_version}") - payload = _build_new_foundry_request_payload(message_history, metadata, stream=True) + payload = _build_new_foundry_request_payload( + message_history, + metadata, + stream=True, + max_output_tokens=_normalize_max_completion_tokens(max_completion_tokens), + ) headers = { "Authorization": f"Bearer {token.token}", "Content-Type": "application/json", @@ -692,6 +724,7 @@ def _build_new_foundry_request_payload( message_history: List[ChatMessageContent], metadata: Dict[str, Any], stream: bool = False, + max_output_tokens: Optional[int] = None, ) -> Dict[str, Any]: input_items: List[Dict[str, Any]] = [] for message in message_history: @@ -733,6 +766,8 @@ def _build_new_foundry_request_payload( } if normalized_metadata: payload["metadata"] = normalized_metadata + if max_output_tokens is not None: + payload["max_output_tokens"] = max_output_tokens return payload diff --git a/application/single_app/functions_authentication.py b/application/single_app/functions_authentication.py index 8bdf4b5c..a0ecde0a 100644 --- a/application/single_app/functions_authentication.py +++ b/application/single_app/functions_authentication.py @@ -385,7 +385,7 @@ def get_video_indexer_managed_identity_token(settings, video_id=None): rg = settings["video_indexer_resource_group"] sub = settings["video_indexer_subscription_id"] acct = settings["video_indexer_account_name"] - api_ver = settings.get("video_indexer_arm_api_version", "2021-11-10-preview") + api_ver = settings.get("video_indexer_arm_api_version", DEFAULT_VIDEO_INDEXER_ARM_API_VERSION) debug_print(f"[VIDEO INDEXER AUTH] Settings extracted - Subscription: {sub}, Resource Group: {rg}, Account: {acct}, API Version: {api_ver}") diff --git a/application/single_app/functions_documents.py b/application/single_app/functions_documents.py index 7bff48d8..7c6e4a27 100644 --- a/application/single_app/functions_documents.py +++ b/application/single_app/functions_documents.py @@ -94,6 +94,27 @@ def get_document_blob_storage_info(document_item, user_id=None, group_id=None, p ) +def _has_persisted_blob_reference(document_item): + if not document_item: + return False + + if document_item.get("blob_path"): + return True + + return ( + document_item.get("blob_path_mode") == ARCHIVED_REVISION_BLOB_PATH_MODE + and bool(document_item.get("archived_blob_path")) + ) + + +def _normalize_document_enhanced_citations(document_item): + if not document_item: + return document_item + + document_item["enhanced_citations"] = _has_persisted_blob_reference(document_item) + return document_item + + def get_document_blob_delete_targets(document_item, user_id=None, group_id=None, public_workspace_id=None): targets = [] seen = set() @@ -317,7 +338,9 @@ def select_current_documents(documents): current_documents = [] for family_documents in families.values(): - current_documents.append(_choose_current_document(family_documents)) + current_documents.append( + _normalize_document_enhanced_citations(_choose_current_document(family_documents)) + ) return current_documents @@ -666,6 +689,7 @@ def create_document(file_name, user_id, document_id, num_file_chunks, status, gr "status": status, "percentage_complete": 0, "document_classification": carried_forward.get("document_classification", "None"), + "enhanced_citations": False, "type": "document_metadata", "public_workspace_id": public_workspace_id, "user_id": user_id, @@ -697,6 +721,7 @@ def create_document(file_name, user_id, document_id, num_file_chunks, status, gr "status": status, "percentage_complete": 0, "document_classification": carried_forward.get("document_classification", "None"), + "enhanced_citations": False, "type": "document_metadata", "group_id": group_id, "blob_container": _get_blob_container_name(group_id=group_id), @@ -728,6 +753,7 @@ def create_document(file_name, user_id, document_id, num_file_chunks, status, gr "status": status, "percentage_complete": 0, "document_classification": carried_forward.get("document_classification", "None"), + "enhanced_citations": False, "type": "document_metadata", "user_id": user_id, "blob_container": _get_blob_container_name(), @@ -823,7 +849,7 @@ def get_document_metadata(document_id, user_id, group_id=None, public_workspace_ user_id=public_workspace_id if is_public_workspace else (group_id if is_group else user_id), content=f"Document metadata retrieved: {document_items}." ) - return document_items[0] if document_items else None + return _normalize_document_enhanced_citations(document_items[0]) if document_items else None except Exception as e: print(f"Error retrieving document metadata: {repr(e)}\nTraceback:\n{traceback.format_exc()}") @@ -2775,7 +2801,7 @@ def get_document(user_id, document_id, group_id=None, public_workspace_id=None): if not document_results: return jsonify({'error': 'Document not found or access denied'}), 404 - return jsonify(document_results[0]), 200 + return jsonify(_normalize_document_enhanced_citations(document_results[0])), 200 except Exception as e: return jsonify({'error': f'Error retrieving document: {str(e)}'}), 500 @@ -2863,7 +2889,7 @@ def get_document_version(user_id, document_id, version, group_id=None, public_wo if not document_results: return jsonify({'error': 'Document version not found'}), 404 - return jsonify(document_results[0]), 200 + return jsonify(_normalize_document_enhanced_citations(document_results[0])), 200 except Exception as e: return jsonify({'error': f'Error retrieving document version: {str(e)}'}), 500 @@ -4158,6 +4184,7 @@ def upload_to_blob(temp_file_path, user_id, document_id, blob_filename, update_c current_document["blob_container"] = storage_account_container_name current_document["blob_path"] = blob_path current_document["blob_path_mode"] = CURRENT_ALIAS_BLOB_PATH_MODE + current_document["enhanced_citations"] = True if current_document.get("archived_blob_path") is None: current_document["archived_blob_path"] = None cosmos_container.upsert_item(current_document) @@ -6242,6 +6269,34 @@ def _get_speech_config(settings, endpoint: str, locale: str): print(f"[Debug] Speech config obtained successfully", flush=True) return speech_config + +def get_speech_synthesis_config(settings, endpoint: str, location: str): + """Get speech synthesis config for either key or managed identity auth.""" + auth_type = settings.get("speech_service_authentication_type") + + if auth_type == "managed_identity": + resource_id = (settings.get("speech_service_resource_id") or "").strip() + if not location: + raise ValueError("Speech service location is required for text-to-speech with managed identity.") + if not resource_id: + raise ValueError("Speech service resource ID is required for text-to-speech with managed identity.") + + credential = DefaultAzureCredential() + token = credential.get_token(cognitive_services_scope) + authorization_token = f"aad#{resource_id}#{token.token}" + speech_config = speechsdk.SpeechConfig(auth_token=authorization_token, region=location) + else: + key = (settings.get("speech_service_key") or "").strip() + if not endpoint: + raise ValueError("Speech service endpoint is required for text-to-speech.") + if not key: + raise ValueError("Speech service key is required for text-to-speech when using key authentication.") + + speech_config = speechsdk.SpeechConfig(endpoint=endpoint, subscription=key) + + print(f"[Debug] Speech synthesis config obtained successfully", flush=True) + return speech_config + def process_audio_document( document_id: str, user_id: str, diff --git a/application/single_app/functions_global_agents.py b/application/single_app/functions_global_agents.py index 7fecf1ee..51870b9c 100644 --- a/application/single_app/functions_global_agents.py +++ b/application/single_app/functions_global_agents.py @@ -51,7 +51,7 @@ def ensure_default_global_agent_exists(): ), "actions_to_load": [], "other_settings": {}, - "max_completion_tokens": 4096 + "max_completion_tokens": -1 } save_global_agent(default_agent) log_event( diff --git a/application/single_app/functions_settings.py b/application/single_app/functions_settings.py index 324f82fc..8d09ee61 100644 --- a/application/single_app/functions_settings.py +++ b/application/single_app/functions_settings.py @@ -372,6 +372,10 @@ def get_settings(use_cosmos=False, include_source=False): # Audio file settings with Azure speech service "speech_service_endpoint": '', "speech_service_location": '', + "speech_service_subscription_id": '', + "speech_service_resource_group": '', + "speech_service_resource_name": '', + "speech_service_resource_id": '', "speech_service_locale": "en-US", "speech_service_key": "", "speech_service_authentication_type": "key", # 'key' or 'managed_identity' diff --git a/application/single_app/route_backend_chats.py b/application/single_app/route_backend_chats.py index c6e99a62..e16d7242 100644 --- a/application/single_app/route_backend_chats.py +++ b/application/single_app/route_backend_chats.py @@ -3890,6 +3890,15 @@ def is_tabular_access_limited_analysis(analysis_text): 'do not have direct access', "don't have", 'do not have', + "doesn't include the full", + 'does not include the full', + 'only sample rows', + 'only workbook metadata', + 'only sample rows and workbook metadata', + 'cannot accurately list all', + 'cannot accurately list them', + 'from the current evidence', + 'from the evidence provided', 'visible excerpt you provided', 'if those tool-backed results exist', 'allow me to query again', @@ -3898,6 +3907,80 @@ def is_tabular_access_limited_analysis(analysis_text): return any(phrase in normalized_analysis for phrase in inaccessible_phrases) +def get_tabular_result_coverage_summary(invocations): + """Return whether successful analytical tool calls produced full or partial result coverage.""" + coverage_summary = { + 'has_full_result_coverage': False, + 'has_partial_result_coverage': False, + } + + for invocation in invocations or []: + result_payload = get_tabular_invocation_result_payload(invocation) or {} + + total_matches = parse_tabular_result_count(result_payload.get('total_matches')) + returned_rows = parse_tabular_result_count(result_payload.get('returned_rows')) + if total_matches is not None and returned_rows is not None: + if returned_rows >= total_matches: + coverage_summary['has_full_result_coverage'] = True + else: + coverage_summary['has_partial_result_coverage'] = True + + distinct_count = parse_tabular_result_count(result_payload.get('distinct_count')) + returned_values = parse_tabular_result_count(result_payload.get('returned_values')) + if distinct_count is not None and returned_values is not None: + if returned_values >= distinct_count: + coverage_summary['has_full_result_coverage'] = True + else: + coverage_summary['has_partial_result_coverage'] = True + + if result_payload.get('full_rows_included') or result_payload.get('full_values_included'): + coverage_summary['has_full_result_coverage'] = True + if result_payload.get('sample_rows_limited') or result_payload.get('values_limited'): + coverage_summary['has_partial_result_coverage'] = True + + if ( + coverage_summary['has_full_result_coverage'] + and coverage_summary['has_partial_result_coverage'] + ): + break + + return coverage_summary + + +def build_tabular_success_execution_gap_messages(user_question, analysis_text, invocations): + """Return retry guidance when a successful tabular analysis still produced an incomplete answer.""" + coverage_summary = get_tabular_result_coverage_summary(invocations) + has_full_result_coverage = coverage_summary['has_full_result_coverage'] + has_partial_result_coverage = coverage_summary['has_partial_result_coverage'] + wants_exhaustive_results = question_requests_tabular_exhaustive_results(user_question) + execution_gap_messages = [] + + if is_tabular_access_limited_analysis(analysis_text): + if wants_exhaustive_results and has_full_result_coverage: + execution_gap_messages.append( + 'Previous attempt still claimed only sample rows or workbook metadata were available even though successful analytical tool calls returned the full matching result set. Answer directly from those returned rows and list the full results the user asked for.' + ) + elif has_full_result_coverage: + execution_gap_messages.append( + 'Previous attempt still claimed the requested data was unavailable even though successful analytical tool calls returned the full matching result set. Use the returned rows and answer directly.' + ) + else: + execution_gap_messages.append( + 'Previous attempt still claimed the requested data was unavailable even though analytical tool calls succeeded. Use the returned rows and answer directly.' + ) + + if ( + wants_exhaustive_results + and has_partial_result_coverage + and not has_full_result_coverage + ): + execution_gap_messages.append( + 'The user asked for a full list, but previous analytical calls returned only a partial slice. Rerun the relevant analytical call with a higher max_rows or max_values before answering.' + ) + + return execution_gap_messages + + def _select_likely_workbook_sheet(sheet_names, question_text, per_sheet=None, score_match_fn=None): """Return a likely sheet name when the user question strongly matches one sheet.""" score_match_fn = score_match_fn or _score_tabular_sheet_match @@ -4408,7 +4491,8 @@ def build_system_prompt(force_tool_use=False, tool_error_messages=None, "12. Summarize concrete found records sheet-by-sheet using the tool results, not schema placeholders.\n" "13. For count or percentage questions involving a cohort defined on one sheet and facts on another, prefer get_distinct_values, count_rows, filter_rows_by_related_values, or count_rows_by_related_values over manually counting sampled rows.\n" "14. Use normalize_match=true when matching names, owners, assignees, engineers, or similar entity-text columns across worksheets.\n" - "15. Do not mention hypothetical follow-up analyses, parser errors, or failed attempts unless the user explicitly asked about failures and you have actual tool error output to report." + "15. If a successful tool result reports returned_rows == total_matches or returned_values == distinct_count, treat that as the full matching result set. Do not claim that only sample rows or workbook metadata are available in that case.\n" + "16. Do not mention hypothetical follow-up analyses, parser errors, or failed attempts unless the user explicitly asked about failures and you have actual tool error output to report." ) return ( @@ -4461,8 +4545,9 @@ def build_system_prompt(force_tool_use=False, tool_error_messages=None, "22. For identifier-based workbook questions, locate the identifier on the correct sheet before explaining downstream calculations.\n" "23. For peak, busiest, highest, or lowest questions, use grouped functions and inspect the highest_group, highest_value, lowest_group, and lowest_value summary fields.\n" "24. Return only computed findings and name the strongest drivers clearly.\n" - "25. Do not mention hypothetical follow-up analyses, parser errors, or failed attempts unless the user explicitly asked about failures and you have actual tool error output to report.\n" - "26. When using query_tabular_data, use simple DataFrame.query() syntax with backticked column names for columns containing spaces. Avoid method calls such as .str.lower(), .astype(...), or other Python expressions that DataFrame.query() may reject." + "25. If a successful tool result reports returned_rows == total_matches or returned_values == distinct_count, treat that as the full matching result set. Do not claim that only sample rows or workbook metadata are available in that case.\n" + "26. Do not mention hypothetical follow-up analyses, parser errors, or failed attempts unless the user explicitly asked about failures and you have actual tool error output to report.\n" + "27. When using query_tabular_data, use simple DataFrame.query() syntax with backticked column names for columns containing spaces. Avoid method calls such as .str.lower(), .astype(...), or other Python expressions that DataFrame.query() may reject." ) baseline_invocations = plugin_logger.get_invocations_for_conversation( @@ -4631,10 +4716,19 @@ def build_system_prompt(force_tool_use=False, tool_error_messages=None, previous_tool_error_messages = [] previous_failed_call_parameters = [] previous_discovery_feedback_messages = [] + execution_gap_messages = [] + selected_sheets = [] + coverage_summary = get_tabular_result_coverage_summary( + successful_analytical_invocations + ) + retry_gap_messages = build_tabular_success_execution_gap_messages( + user_question, + analysis, + successful_analytical_invocations, + ) if entity_lookup_mode: selected_sheets = get_tabular_invocation_selected_sheets(successful_analytical_invocations) - execution_gap_messages = [] # Cross-sheet results ("ALL (cross-sheet search)") already span # the entire workbook β€” no execution gap for sheet coverage. @@ -4648,24 +4742,24 @@ def build_system_prompt(force_tool_use=False, tool_error_messages=None, f"Previous attempt only queried worksheet(s): {rendered_selected_sheets}. The question asks for related records across worksheets, so query additional relevant sheets explicitly with sheet_name." ) - if is_tabular_access_limited_analysis(analysis): - execution_gap_messages.append( - 'Previous attempt still claimed the requested data was unavailable even though analytical tool calls succeeded. Use the returned rows and answer directly.' - ) + execution_gap_messages.extend(retry_gap_messages) - if execution_gap_messages and attempt_number < 3: - previous_execution_gap_messages = execution_gap_messages - log_event( - f"[Tabular SK Analysis] Attempt {attempt_number} entity lookup was incomplete despite successful tool calls; retrying", - extra={ - 'selected_sheets': selected_sheets, - 'execution_gaps': previous_execution_gap_messages, - 'successful_tool_count': len(successful_analytical_invocations), - }, - level=logging.WARNING, - ) - baseline_invocation_count = len(invocations_after) - continue + if execution_gap_messages and attempt_number < 3: + previous_execution_gap_messages = execution_gap_messages + log_event( + f"[Tabular SK Analysis] Attempt {attempt_number} analysis was incomplete despite successful tool calls; retrying", + extra={ + 'selected_sheets': selected_sheets, + 'execution_gaps': previous_execution_gap_messages, + 'successful_tool_count': len(successful_analytical_invocations), + 'has_full_result_coverage': coverage_summary.get('has_full_result_coverage', False), + 'has_partial_result_coverage': coverage_summary.get('has_partial_result_coverage', False), + 'entity_lookup_mode': entity_lookup_mode, + }, + level=logging.WARNING, + ) + baseline_invocation_count = len(invocations_after) + continue previous_execution_gap_messages = [] log_event( diff --git a/application/single_app/route_backend_tts.py b/application/single_app/route_backend_tts.py index 11d14cc3..61830490 100644 --- a/application/single_app/route_backend_tts.py +++ b/application/single_app/route_backend_tts.py @@ -2,6 +2,8 @@ from config import * from functions_authentication import * +from functions_appinsights import log_event +from functions_documents import get_speech_synthesis_config from functions_settings import * from functions_debug import debug_print from swagger_wrapper import swagger_route, get_auth_security @@ -41,14 +43,26 @@ def synthesize_speech(): return jsonify({"error": "Text-to-speech is not enabled"}), 403 # Validate speech service configuration - speech_key = settings.get('speech_service_key', '') - speech_region = settings.get('speech_service_location', '') + speech_endpoint = (settings.get('speech_service_endpoint') or '').strip().rstrip('/') + speech_region = (settings.get('speech_service_location') or '').strip() + speech_auth_type = settings.get('speech_service_authentication_type', 'key') - if not speech_key or not speech_region: - debug_print("[TTS] Speech service not configured - missing key or region") + if not speech_endpoint: + debug_print("[TTS] Speech service not configured - missing endpoint") + return jsonify({"error": "Speech service not configured"}), 500 + + if speech_auth_type == 'key' and not (settings.get('speech_service_key') or '').strip(): + debug_print("[TTS] Speech service not configured - missing key for key authentication") + return jsonify({"error": "Speech service not configured"}), 500 + + if speech_auth_type == 'managed_identity' and not speech_region: + debug_print("[TTS] Speech service not configured - missing location for managed identity") return jsonify({"error": "Speech service not configured"}), 500 - debug_print(f"[TTS] Speech service configured - region: {speech_region}") + debug_print( + f"[TTS] Speech service configured - auth_type: {speech_auth_type}, " + f"endpoint: {speech_endpoint}, location: {speech_region or 'n/a'}" + ) # Parse request data data = request.get_json() @@ -71,10 +85,12 @@ def synthesize_speech(): debug_print(f"[TTS] Request params - voice: {voice}, speed: {speed}, text_length: {len(text)}") # Configure speech service - speech_config = speechsdk.SpeechConfig( - subscription=speech_key, - region=speech_region - ) + try: + speech_config = get_speech_synthesis_config(settings, speech_endpoint, speech_region) + except ValueError as config_error: + debug_print(f"[TTS] Speech service configuration invalid: {str(config_error)}") + return jsonify({"error": str(config_error)}), 500 + speech_config.speech_synthesis_voice_name = voice # Set output format to high quality diff --git a/application/single_app/route_enhanced_citations.py b/application/single_app/route_enhanced_citations.py index 29de8313..ca1b9e48 100644 --- a/application/single_app/route_enhanced_citations.py +++ b/application/single_app/route_enhanced_citations.py @@ -12,7 +12,7 @@ from functions_authentication import login_required, user_required, get_current_user_id from functions_settings import get_settings, enabled_required -from functions_documents import get_document_metadata, get_document_blob_storage_info +from functions_documents import get_document_metadata from functions_group import get_user_groups from functions_public_workspaces import get_user_visible_public_workspace_ids_from_settings from swagger_wrapper import swagger_route, get_auth_security @@ -90,15 +90,13 @@ def get_enhanced_citation_document_metadata(): return doc_response, status_code raw_doc = doc_response.get_json() - _, blob_path = get_document_blob_storage_info(raw_doc) - return jsonify({ "id": raw_doc.get("id"), "document_id": raw_doc.get("id"), "file_name": raw_doc.get("file_name"), "version": raw_doc.get("version"), "is_current_version": raw_doc.get("is_current_version"), - "enhanced_citations": bool(blob_path), + "enhanced_citations": bool(raw_doc.get("enhanced_citations", False)), }), 200 except Exception as e: diff --git a/application/single_app/route_frontend_admin_settings.py b/application/single_app/route_frontend_admin_settings.py index 94053752..129dfcde 100644 --- a/application/single_app/route_frontend_admin_settings.py +++ b/application/single_app/route_frontend_admin_settings.py @@ -367,6 +367,9 @@ def admin_settings(): 'admin_settings.html', app_settings=settings_for_template, settings=settings_for_template, + azure_environment=AZURE_ENVIRONMENT, + default_video_indexer_endpoint=video_indexer_endpoint, + default_video_indexer_arm_api_version=DEFAULT_VIDEO_INDEXER_ARM_API_VERSION, user_settings=user_settings, update_available=update_available, latest_version=latest_version, @@ -1325,12 +1328,16 @@ def is_valid_url(url): 'video_indexer_resource_group': form_data.get('video_indexer_resource_group', '').strip(), 'video_indexer_subscription_id': form_data.get('video_indexer_subscription_id', '').strip(), 'video_indexer_account_name': form_data.get('video_indexer_account_name', '').strip(), - 'video_indexer_arm_api_version': form_data.get('video_indexer_arm_api_version', '2024-01-01').strip(), + 'video_indexer_arm_api_version': form_data.get('video_indexer_arm_api_version', DEFAULT_VIDEO_INDEXER_ARM_API_VERSION).strip(), 'video_index_timeout': int(form_data.get('video_index_timeout', 600)), # Audio file settings with Azure speech service 'speech_service_endpoint': form_data.get('speech_service_endpoint', '').strip(), 'speech_service_location': form_data.get('speech_service_location', '').strip(), + 'speech_service_subscription_id': form_data.get('speech_service_subscription_id', '').strip(), + 'speech_service_resource_group': form_data.get('speech_service_resource_group', '').strip(), + 'speech_service_resource_name': form_data.get('speech_service_resource_name', '').strip(), + 'speech_service_resource_id': form_data.get('speech_service_resource_id', '').strip(), 'speech_service_locale': form_data.get('speech_service_locale', '').strip(), 'speech_service_authentication_type': form_data.get('speech_service_authentication_type', 'key'), 'speech_service_key': form_data.get('speech_service_key', '').strip(), diff --git a/application/single_app/static/js/admin/admin_settings.js b/application/single_app/static/js/admin/admin_settings.js index 896bf6b3..7861b801 100644 --- a/application/single_app/static/js/admin/admin_settings.js +++ b/application/single_app/static/js/admin/admin_settings.js @@ -1994,14 +1994,152 @@ function setupToggles() { } const speechAuthType = document.getElementById('speech_service_authentication_type'); + const speechEndpointInput = document.getElementById('speech_service_endpoint'); + const speechKeyContainer = document.getElementById('speech_service_key_container'); + const speechResourceIdContainer = document.getElementById('speech_service_resource_id_container'); + const speechResourceIdInput = document.getElementById('speech_service_resource_id'); + const speechSubscriptionInput = document.getElementById('speech_service_subscription_id'); + const speechResourceGroupInput = document.getElementById('speech_service_resource_group'); + const speechResourceNameInput = document.getElementById('speech_service_resource_name'); + const buildSpeechResourceIdButton = document.getElementById('build_speech_resource_id_btn'); + const speechResourceIdBuilderStatus = document.getElementById('speech_resource_id_builder_status'); + + function inferSpeechResourceNameFromEndpoint(endpointValue) { + const trimmedValue = (endpointValue || '').trim(); + if (!trimmedValue) { + return ''; + } + + try { + const parsedUrl = new URL(trimmedValue); + const hostName = parsedUrl.hostname.toLowerCase(); + const supportedSuffixes = [ + '.cognitiveservices.azure.com', + '.cognitiveservices.azure.us' + ]; + + for (const suffix of supportedSuffixes) { + if (hostName.endsWith(suffix)) { + const resourceName = hostName.slice(0, -suffix.length); + if (resourceName && !resourceName.includes('.')) { + return resourceName; + } + } + } + } catch (error) { + return ''; + } + + return ''; + } + + function setSpeechResourceIdBuilderStatus(message) { + if (speechResourceIdBuilderStatus) { + speechResourceIdBuilderStatus.textContent = message; + } + } + + function buildSpeechResourceIdFromFields() { + const subscriptionId = speechSubscriptionInput?.value?.trim() || ''; + const resourceGroup = speechResourceGroupInput?.value?.trim() || ''; + const resourceName = speechResourceNameInput?.value?.trim() || ''; + + if (!subscriptionId || !resourceGroup || !resourceName) { + return ''; + } + + return `/subscriptions/${subscriptionId}/resourceGroups/${resourceGroup}/providers/Microsoft.CognitiveServices/accounts/${resourceName}`; + } + + function syncSpeechResourceIdBuilder(force) { + if (!speechResourceIdInput) { + return ''; + } + + if (speechResourceNameInput && !speechResourceNameInput.value.trim()) { + const inferredResourceName = inferSpeechResourceNameFromEndpoint(speechEndpointInput?.value || ''); + if (inferredResourceName) { + speechResourceNameInput.value = inferredResourceName; + } + } + + const builtResourceId = buildSpeechResourceIdFromFields(); + const currentValue = speechResourceIdInput.value.trim(); + const previousGeneratedValue = speechResourceIdInput.dataset.generatedValue || ''; + const wasGenerated = speechResourceIdInput.dataset.generated === 'true' || currentValue === '' || currentValue === previousGeneratedValue; + + if (builtResourceId) { + speechResourceIdInput.dataset.generatedValue = builtResourceId; + if (force || wasGenerated) { + speechResourceIdInput.value = builtResourceId; + speechResourceIdInput.dataset.generated = 'true'; + } + setSpeechResourceIdBuilderStatus('Resource ID can be generated from the helper fields. You can still override it manually if needed.'); + return builtResourceId; + } + + const missingParts = []; + if (!speechSubscriptionInput?.value?.trim()) { + missingParts.push('Subscription ID'); + } + if (!speechResourceGroupInput?.value?.trim()) { + missingParts.push('Resource Group'); + } + if (!speechResourceNameInput?.value?.trim()) { + missingParts.push('Speech Resource Name'); + } + + speechResourceIdInput.dataset.generatedValue = ''; + if (speechResourceIdInput.dataset.generated === 'true' && !currentValue) { + speechResourceIdInput.dataset.generated = 'false'; + } + + setSpeechResourceIdBuilderStatus(`To auto-build the resource ID, provide: ${missingParts.join(', ')}.`); + return ''; + } + if (speechAuthType) { + const updateSpeechAuthFields = function () { + const usingKeyAuth = this.value === 'key'; + setSectionVisibility(speechKeyContainer, usingKeyAuth); + setSectionVisibility(speechResourceIdContainer, !usingKeyAuth); + }; + + updateSpeechAuthFields.call(speechAuthType); speechAuthType.addEventListener('change', function () { - document.getElementById('speech_service_key_container').style.display = - (this.value === 'key') ? 'block' : 'none'; + updateSpeechAuthFields.call(this); markFormAsModified(); }); } + if (speechResourceIdInput) { + syncSpeechResourceIdBuilder(false); + speechResourceIdInput.addEventListener('input', function () { + const builtResourceId = buildSpeechResourceIdFromFields(); + this.dataset.generated = builtResourceId && this.value.trim() === builtResourceId ? 'true' : 'false'; + }); + } + + [speechEndpointInput, speechSubscriptionInput, speechResourceGroupInput, speechResourceNameInput].forEach((element) => { + if (!element) { + return; + } + + element.addEventListener('input', () => { + syncSpeechResourceIdBuilder(false); + markFormAsModified(); + }); + }); + + if (buildSpeechResourceIdButton) { + buildSpeechResourceIdButton.addEventListener('click', () => { + const builtResourceId = syncSpeechResourceIdBuilder(true); + if (builtResourceId) { + markFormAsModified(); + } + }); + } + const officeAuthType = document.getElementById('office_docs_authentication_type'); const connStrGroup = document.getElementById('office_docs_storage_conn_str_group'); const urlGroup = document.getElementById('office_docs_storage_url_group'); @@ -3434,29 +3572,104 @@ function togglePassword(btnId, inputId) { } } +function setSectionVisibility(element, visible) { + if (!element) { + return; + } + + element.classList.toggle('d-none', !visible); +} + // --- Video Indexer Settings toggle --- const videoSupportToggle = document.getElementById('enable_video_file_support'); -const videoIndexerDiv = document.getElementById('video_indexer_settings'); +const videoIndexerDiv = document.getElementById('video_indexer_settings'); +const videoIndexerCloudSelect = document.getElementById('video_indexer_cloud'); +const videoIndexerEndpointInput = document.getElementById('video_indexer_endpoint'); +const videoIndexerEndpointDisplay = document.getElementById('video_indexer_endpoint_display'); +const videoIndexerCustomEndpointGroup = document.getElementById('video_indexer_custom_endpoint_group'); +const videoIndexerCustomEndpointInput = document.getElementById('video_indexer_custom_endpoint'); +const videoIndexerCloudMismatchAlert = document.getElementById('video_indexer_cloud_mismatch_alert'); + +function updateVideoIndexerEndpointSelection() { + if (!videoIndexerCloudSelect || !videoIndexerEndpointInput) { + return; + } + + const selectedCloud = videoIndexerCloudSelect.value; + const publicEndpoint = videoIndexerCloudSelect.dataset.publicEndpoint || 'https://api.videoindexer.ai'; + const governmentEndpoint = videoIndexerCloudSelect.dataset.governmentEndpoint || 'https://api.videoindexer.ai.azure.us'; + const runtimeCloud = videoIndexerCloudSelect.dataset.runtimeCloud || 'public'; + + let endpointValue = publicEndpoint; + if (selectedCloud === 'usgovernment') { + endpointValue = governmentEndpoint; + } else if (selectedCloud === 'custom') { + endpointValue = videoIndexerCustomEndpointInput?.value?.trim() || ''; + } + + videoIndexerEndpointInput.value = endpointValue; + + if (videoIndexerEndpointDisplay) { + videoIndexerEndpointDisplay.value = endpointValue; + } + + setSectionVisibility(videoIndexerCustomEndpointGroup, selectedCloud === 'custom'); + setSectionVisibility(videoIndexerCloudMismatchAlert, selectedCloud !== runtimeCloud); + + if (typeof updateVideoIndexerModalInfo === 'function') { + updateVideoIndexerModalInfo(); + } +} + if (videoSupportToggle && videoIndexerDiv) { - // on load - videoIndexerDiv.style.display = videoSupportToggle.checked ? 'block' : 'none'; - // on change - videoSupportToggle.addEventListener('change', () => { - videoIndexerDiv.style.display = videoSupportToggle.checked ? 'block' : 'none'; - markFormAsModified(); - }); + setSectionVisibility(videoIndexerDiv, videoSupportToggle.checked); + videoSupportToggle.addEventListener('change', () => { + setSectionVisibility(videoIndexerDiv, videoSupportToggle.checked); + markFormAsModified(); + }); +} + +if (videoIndexerCloudSelect) { + updateVideoIndexerEndpointSelection(); + videoIndexerCloudSelect.addEventListener('change', () => { + updateVideoIndexerEndpointSelection(); + markFormAsModified(); + }); +} + +if (videoIndexerCustomEndpointInput) { + videoIndexerCustomEndpointInput.addEventListener('input', () => { + updateVideoIndexerEndpointSelection(); + markFormAsModified(); + }); } // --- Speech Service Settings toggle --- -const audioSupportToggle = document.getElementById('enable_audio_file_support'); -const audioServiceDiv = document.getElementById('audio_service_settings'); -if (audioSupportToggle && audioServiceDiv) { - // initial visibility - audioServiceDiv.style.display = audioSupportToggle.checked ? 'block' : 'none'; - audioSupportToggle.addEventListener('change', () => { - audioServiceDiv.style.display = audioSupportToggle.checked ? 'block' : 'none'; - markFormAsModified(); - }); +const audioSupportToggle = document.getElementById('enable_audio_file_support'); +const speechToTextToggle = document.getElementById('enable_speech_to_text_input'); +const textToSpeechToggle = document.getElementById('enable_text_to_speech'); +const audioServiceDiv = document.getElementById('audio_service_settings'); + +function areAnySpeechFeaturesEnabled() { + return [audioSupportToggle, speechToTextToggle, textToSpeechToggle].some((toggle) => Boolean(toggle?.checked)); +} + +function updateSpeechServiceSettingsVisibility() { + setSectionVisibility(audioServiceDiv, areAnySpeechFeaturesEnabled()); +} + +if (audioServiceDiv) { + updateSpeechServiceSettingsVisibility(); + [audioSupportToggle, speechToTextToggle, textToSpeechToggle].forEach((toggle) => { + if (!toggle) { + return; + } + + toggle.addEventListener('change', () => { + updateSpeechServiceSettingsVisibility(); + markFormAsModified(); + }); + }); } // Metadata Extraction UI @@ -3495,12 +3708,12 @@ function populateExtractionModels() { } if (extractToggle) { - // show/hide the model dropdown - extractModelDiv.style.display = extractToggle.checked ? 'block' : 'none'; - extractToggle.addEventListener('change', () => { + // show/hide the model dropdown extractModelDiv.style.display = extractToggle.checked ? 'block' : 'none'; - markFormAsModified(); - }); + extractToggle.addEventListener('change', () => { + extractModelDiv.style.display = extractToggle.checked ? 'block' : 'none'; + markFormAsModified(); + }); } // Multi-Modal Vision UI @@ -3509,232 +3722,232 @@ const visionModelDiv = document.getElementById('multimodal_vision_model_settings const visionSelect = document.getElementById('multimodal_vision_model'); function populateVisionModels() { - if (!visionSelect) return; + if (!visionSelect) return; - // remember previously chosen value - const prev = visionSelect.getAttribute('data-prev') || ''; - - // clear out old options (except the placeholder) - visionSelect.innerHTML = ''; - - if (document.getElementById('enable_gpt_apim').checked) { - // use comma-separated APIM deployments - const text = document.getElementById('azure_apim_gpt_deployment').value || ''; - text.split(',') - .map(s => s.trim()) - .filter(s => s) - .forEach(d => { - const opt = new Option(d, d); - visionSelect.add(opt); + // remember previously chosen value + const prev = visionSelect.getAttribute('data-prev') || ''; + + // clear out old options (except the placeholder) + visionSelect.innerHTML = ''; + + if (document.getElementById('enable_gpt_apim').checked) { + // use comma-separated APIM deployments + const text = document.getElementById('azure_apim_gpt_deployment').value || ''; + text.split(',') + .map(s => s.trim()) + .filter(s => s) + .forEach(d => { + const opt = new Option(d, d); + visionSelect.add(opt); + }); + } else { + // use direct GPT selected deployments - filter for vision-capable models + (window.gptSelected || []).forEach(m => { + // Only include models with vision capabilities + // Vision-enabled models per Azure OpenAI docs: + // - o-series reasoning models (o1, o3, etc.) + // - GPT-5 series + // - GPT-4.1 series + // - GPT-4.5 + // - GPT-4o series (gpt-4o, gpt-4o-mini) + // - GPT-4 vision models (gpt-4-vision, gpt-4-turbo-vision) + const modelNameLower = (m.modelName || '').toLowerCase(); + const isVisionCapable = + modelNameLower.includes('vision') || + modelNameLower.includes('gpt-4o') || + modelNameLower.includes('gpt-4.1') || + modelNameLower.includes('gpt-4.5') || + modelNameLower.includes('gpt-5') || + modelNameLower.match(/^o\d+/) || + modelNameLower.includes('o1-') || + modelNameLower.includes('o3-'); + + if (isVisionCapable) { + const label = `${m.deploymentName} (${m.modelName})`; + const opt = new Option(label, m.deploymentName); + visionSelect.add(opt); + } }); - } else { - // use direct GPT selected deployments - filter for vision-capable models - (window.gptSelected || []).forEach(m => { - // Only include models with vision capabilities - // Vision-enabled models per Azure OpenAI docs: - // - o-series reasoning models (o1, o3, etc.) - // - GPT-5 series - // - GPT-4.1 series - // - GPT-4.5 - // - GPT-4o series (gpt-4o, gpt-4o-mini) - // - GPT-4 vision models (gpt-4-vision, gpt-4-turbo-vision) - const modelNameLower = (m.modelName || '').toLowerCase(); - const isVisionCapable = - modelNameLower.includes('vision') || // gpt-4-vision, gpt-4-turbo-vision - modelNameLower.includes('gpt-4o') || // gpt-4o, gpt-4o-mini - modelNameLower.includes('gpt-4.1') || // gpt-4.1 series - modelNameLower.includes('gpt-4.5') || // gpt-4.5 - modelNameLower.includes('gpt-5') || // gpt-5 series - modelNameLower.match(/^o\d+/) || // o1, o3, etc. (o-series) - modelNameLower.includes('o1-') || // o1-preview, o1-mini - modelNameLower.includes('o3-'); // o3-mini, etc. - - if (isVisionCapable) { - const label = `${m.deploymentName} (${m.modelName})`; - const opt = new Option(label, m.deploymentName); - visionSelect.add(opt); - } - }); - } + } - // restore previous - if (prev) { - visionSelect.value = prev; - } + // restore previous + if (prev) { + visionSelect.value = prev; + } } if (visionToggle && visionModelDiv) { - // show/hide the model dropdown - visionModelDiv.style.display = visionToggle.checked ? 'block' : 'none'; - visionToggle.addEventListener('change', () => { + // show/hide the model dropdown visionModelDiv.style.display = visionToggle.checked ? 'block' : 'none'; - markFormAsModified(); - }); + visionToggle.addEventListener('change', () => { + visionModelDiv.style.display = visionToggle.checked ? 'block' : 'none'; + markFormAsModified(); + }); } // Listen for vision model selection changes if (visionSelect) { - visionSelect.addEventListener('change', () => { - // Update data-prev to remember the selection - visionSelect.setAttribute('data-prev', visionSelect.value); - markFormAsModified(); - }); + visionSelect.addEventListener('change', () => { + // Update data-prev to remember the selection + visionSelect.setAttribute('data-prev', visionSelect.value); + markFormAsModified(); + }); } -// when APIM‐toggle flips, repopulate +// when APIM-toggle flips, repopulate const apimToggle = document.getElementById('enable_gpt_apim'); if (apimToggle) { - apimToggle.addEventListener('change', () => { - populateExtractionModels(); - populateVisionModels(); - }); + apimToggle.addEventListener('change', () => { + populateExtractionModels(); + populateVisionModels(); + }); } // on load, stash previous & populate document.addEventListener('DOMContentLoaded', () => { - if (extractSelect) { - extractSelect.setAttribute('data-prev', extractSelect.value); - populateExtractionModels(); - } - if (visionSelect) { - visionSelect.setAttribute('data-prev', visionSelect.value); - populateVisionModels(); - } + if (extractSelect) { + extractSelect.setAttribute('data-prev', extractSelect.value); + populateExtractionModels(); + } + if (visionSelect) { + visionSelect.setAttribute('data-prev', visionSelect.value); + populateVisionModels(); + } }); document.addEventListener('DOMContentLoaded', () => { - ['user','group','public'].forEach(type => { - const warnDiv = document.getElementById(`index-warning-${type}`); - const missingSpan = document.getElementById(`missing-fields-${type}`); - const fixBtn = document.getElementById(`fix-${type}-index-btn`); + ['user','group','public'].forEach(type => { + const warnDiv = document.getElementById(`index-warning-${type}`); + const missingSpan = document.getElementById(`missing-fields-${type}`); + const fixBtn = document.getElementById(`fix-${type}-index-btn`); - // 1) check for missing fields - fetch('/api/admin/settings/check_index_fields', { - method: 'POST', - headers: { - 'Content-Type': 'application/json' - }, - credentials: 'same-origin', - body: JSON.stringify({ indexType: type }) - }) - .then(r => { - if (!r.ok) { - return r.json().then(errorData => { - throw new Error(errorData.error || `HTTP ${r.status}: ${r.statusText}`); - }); - } - return r.json(); - }) - .then(response => { - if (response.autoFixed) { - // Fields were automatically fixed - console.log(`βœ… Auto-fixed ${type} index: added ${response.fieldsAdded.length} field(s):`, response.fieldsAdded.join(', ')); - if (warnDiv) { - warnDiv.className = 'alert alert-success'; - missingSpan.textContent = `Automatically added ${response.fieldsAdded.length} field(s): ${response.fieldsAdded.join(', ')}`; - warnDiv.style.display = 'block'; - if (fixBtn) fixBtn.style.display = 'none'; - - // Hide success message after 5 seconds - setTimeout(() => { - warnDiv.style.display = 'none'; - }, 5000); - } - } else if (response.autoFixFailed) { - // Auto-fix failed, show manual button - console.warn(`Auto-fix failed for ${type} index:`, response.error); - missingSpan.textContent = response.missingFields.join(', ') + ' (Auto-fix failed - please fix manually)'; - warnDiv.className = 'alert alert-warning'; - warnDiv.style.display = 'block'; - if (fixBtn) { - fixBtn.textContent = `Fix ${type} Index Fields`; - fixBtn.style.display = 'inline-block'; - } - } else if (response.missingFields && response.missingFields.length > 0) { - // Missing fields but auto-fix was disabled - missingSpan.textContent = response.missingFields.join(', '); - warnDiv.className = 'alert alert-warning'; - warnDiv.style.display = 'block'; - if (fixBtn) { - fixBtn.textContent = `Fix ${type} Index Fields`; - fixBtn.style.display = 'inline-block'; - } - } else if (response.indexExists) { - // Index exists and is complete - if (warnDiv) warnDiv.style.display = 'none'; - console.log(`${type} index is properly configured`); - } - }) - .catch(err => { - console.warn(`Checking ${type} index fields:`, err.message); + // 1) check for missing fields + fetch('/api/admin/settings/check_index_fields', { + method: 'POST', + headers: { + 'Content-Type': 'application/json' + }, + credentials: 'same-origin', + body: JSON.stringify({ indexType: type }) + }) + .then(r => { + if (!r.ok) { + return r.json().then(errorData => { + throw new Error(errorData.error || `HTTP ${r.status}: ${r.statusText}`); + }); + } + return r.json(); + }) + .then(response => { + if (response.autoFixed) { + // Fields were automatically fixed + console.log(`βœ… Auto-fixed ${type} index: added ${response.fieldsAdded.length} field(s):`, response.fieldsAdded.join(', ')); + if (warnDiv) { + warnDiv.className = 'alert alert-success'; + missingSpan.textContent = `Automatically added ${response.fieldsAdded.length} field(s): ${response.fieldsAdded.join(', ')}`; + warnDiv.style.display = 'block'; + if (fixBtn) fixBtn.style.display = 'none'; + + // Hide success message after 5 seconds + setTimeout(() => { + warnDiv.style.display = 'none'; + }, 5000); + } + } else if (response.autoFixFailed) { + // Auto-fix failed, show manual button + console.warn(`Auto-fix failed for ${type} index:`, response.error); + missingSpan.textContent = response.missingFields.join(', ') + ' (Auto-fix failed - please fix manually)'; + warnDiv.className = 'alert alert-warning'; + warnDiv.style.display = 'block'; + if (fixBtn) { + fixBtn.textContent = `Fix ${type} Index Fields`; + fixBtn.style.display = 'inline-block'; + } + } else if (response.missingFields && response.missingFields.length > 0) { + // Missing fields but auto-fix was disabled + missingSpan.textContent = response.missingFields.join(', '); + warnDiv.className = 'alert alert-warning'; + warnDiv.style.display = 'block'; + if (fixBtn) { + fixBtn.textContent = `Fix ${type} Index Fields`; + fixBtn.style.display = 'inline-block'; + } + } else if (response.indexExists) { + // Index exists and is complete + if (warnDiv) warnDiv.style.display = 'none'; + console.log(`${type} index is properly configured`); + } + }) + .catch(err => { + console.warn(`Checking ${type} index fields:`, err.message); - // Check if this is an index not found error - if (err.message.includes('does not exist yet') || err.message.includes('not found')) { - // Show a different message for missing index - if (warnDiv && missingSpan && fixBtn) { - missingSpan.textContent = `Index "${type}" does not exist yet`; - warnDiv.style.display = 'block'; - fixBtn.textContent = `Create ${type} Index`; - fixBtn.style.display = 'inline-block'; - fixBtn.dataset.action = 'create'; - } - } else if (err.message.includes('not configured')) { - // Azure AI Search not configured - if (warnDiv && missingSpan) { - missingSpan.textContent = 'Azure AI Search not configured'; - warnDiv.style.display = 'block'; - if (fixBtn) fixBtn.style.display = 'none'; - } - } else { - // Hide the warning div for other errors - if (warnDiv) warnDiv.style.display = 'none'; - } - }); + // Check if this is an index not found error + if (err.message.includes('does not exist yet') || err.message.includes('not found')) { + // Show a different message for missing index + if (warnDiv && missingSpan && fixBtn) { + missingSpan.textContent = `Index "${type}" does not exist yet`; + warnDiv.style.display = 'block'; + fixBtn.textContent = `Create ${type} Index`; + fixBtn.style.display = 'inline-block'; + fixBtn.dataset.action = 'create'; + } + } else if (err.message.includes('not configured')) { + // Azure AI Search not configured + if (warnDiv && missingSpan) { + missingSpan.textContent = 'Azure AI Search not configured'; + warnDiv.style.display = 'block'; + if (fixBtn) fixBtn.style.display = 'none'; + } + } else { + // Hide the warning div for other errors + if (warnDiv) warnDiv.style.display = 'none'; + } + }); - // 2) wire up the β€œfix” button - fixBtn.addEventListener('click', () => { - fixBtn.disabled = true; - const action = fixBtn.dataset.action || 'fix'; - const endpoint = action === 'create' ? '/api/admin/settings/create_index' : '/api/admin/settings/fix_index_fields'; - const actionText = action === 'create' ? 'Creating' : 'Fixing'; + // 2) wire up the fix button + fixBtn.addEventListener('click', () => { + fixBtn.disabled = true; + const action = fixBtn.dataset.action || 'fix'; + const endpoint = action === 'create' ? '/api/admin/settings/create_index' : '/api/admin/settings/fix_index_fields'; + const actionText = action === 'create' ? 'Creating' : 'Fixing'; - fixBtn.textContent = `${actionText}...`; + fixBtn.textContent = `${actionText}...`; - fetch(endpoint, { - method: 'POST', - headers: { - 'Content-Type': 'application/json' - }, - credentials: 'same-origin', - body: JSON.stringify({ indexType: type }) - }) - .then(r => { - if (!r.ok) { - return r.json().then(errorData => { - throw new Error(errorData.error || `HTTP ${r.status}: ${r.statusText}`); + fetch(endpoint, { + method: 'POST', + headers: { + 'Content-Type': 'application/json' + }, + credentials: 'same-origin', + body: JSON.stringify({ indexType: type }) + }) + .then(r => { + if (!r.ok) { + return r.json().then(errorData => { + throw new Error(errorData.error || `HTTP ${r.status}: ${r.statusText}`); + }); + } + return r.json(); + }) + .then(resp => { + if (resp.status === 'success') { + alert(resp.message || `Successfully ${action === 'create' ? 'created' : 'fixed'} ${type} index!`); + window.location.reload(); + } else { + alert(`Failed to ${action} ${type} index: ${resp.error}`); + fixBtn.disabled = false; + fixBtn.textContent = `${action === 'create' ? 'Create' : 'Fix'} ${type} Index`; + } + }) + .catch(err => { + alert(`Error ${action === 'create' ? 'creating' : 'fixing'} ${type} index: ${err.message || err}`); + fixBtn.disabled = false; + fixBtn.textContent = `${action === 'create' ? 'Create' : 'Fix'} ${type} Index`; + }); }); - } - return r.json(); - }) - .then(resp => { - if (resp.status === 'success') { - alert(resp.message || `Successfully ${action === 'create' ? 'created' : 'fixed'} ${type} index!`); - window.location.reload(); - } else { - alert(`Failed to ${action} ${type} index: ${resp.error}`); - fixBtn.disabled = false; - fixBtn.textContent = `${action === 'create' ? 'Create' : 'Fix'} ${type} Index`; - } - }) - .catch(err => { - alert(`Error ${action === 'create' ? 'creating' : 'fixing'} ${type} index: ${err.message || err}`); - fixBtn.disabled = false; - fixBtn.textContent = `${action === 'create' ? 'Create' : 'Fix'} ${type} Index`; }); - }); }); - }); togglePassword('toggle_gpt_key', 'azure_openai_gpt_key'); @@ -3756,7 +3969,6 @@ togglePassword('toggle_audio_files_key', 'audio_files_key'); togglePassword('toggle_office_conn_str', 'office_docs_storage_account_blob_endpoint'); togglePassword('toggle_video_conn_str', 'video_files_storage_account_url'); togglePassword('toggle_audio_conn_str', 'audio_files_storage_account_url'); -togglePassword('toggle_video_indexer_api_key', 'video_indexer_api_key'); togglePassword('toggle_speech_service_key', 'speech_service_key'); togglePassword('toggle_redis_key', 'redis_key'); togglePassword('toggle_azure_apim_redis_subscription_key', 'azure_apim_redis_subscription_key'); @@ -4050,6 +4262,9 @@ function calculateAvailableWalkthroughSteps() { const videoEnabled = document.getElementById('enable_video_file_support')?.checked || false; const audioEnabled = document.getElementById('enable_audio_file_support')?.checked || false; + const speechToTextEnabled = document.getElementById('enable_speech_to_text_input')?.checked || false; + const textToSpeechEnabled = document.getElementById('enable_text_to_speech')?.checked || false; + const speechFeaturesEnabled = audioEnabled || speechToTextEnabled || textToSpeechEnabled; const availableSteps = [1, 2, 3, 4]; // Base steps always available @@ -4060,10 +4275,10 @@ function calculateAvailableWalkthroughSteps() { if (videoEnabled) { availableSteps.push(8); // Video support } - - if (audioEnabled) { - availableSteps.push(9); // Audio support - } + } + + if (speechFeaturesEnabled) { + availableSteps.push(9); // Shared Speech Service } // Optional steps always available @@ -4123,8 +4338,10 @@ function findNextApplicableStep(currentStep) { case 9: // Audio support const audioEnabled = document.getElementById('enable_audio_file_support')?.checked || false; - if (!workspacesEnabled || !audioEnabled) { - // Skip this step if workspaces not enabled or audio not enabled + const speechToTextEnabled = document.getElementById('enable_speech_to_text_input')?.checked || false; + const textToSpeechEnabled = document.getElementById('enable_text_to_speech')?.checked || false; + if (!(audioEnabled || speechToTextEnabled || textToSpeechEnabled)) { + // Skip this step if no speech features are enabled nextStep++; continue; } @@ -4390,25 +4607,48 @@ function isStepComplete(stepNumber) { const videoEndpoint = document.getElementById('video_indexer_endpoint')?.value; const videoLocation = document.getElementById('video_indexer_location')?.value; const videoAccountId = document.getElementById('video_indexer_account_id')?.value; - - return videoLocation && videoAccountId && videoEndpoint; + const videoResourceGroup = document.getElementById('video_indexer_resource_group')?.value; + const videoSubscriptionId = document.getElementById('video_indexer_subscription_id')?.value; + const videoAccountName = document.getElementById('video_indexer_account_name')?.value; + + return Boolean( + videoLocation && + videoAccountId && + videoEndpoint && + videoResourceGroup && + videoSubscriptionId && + videoAccountName + ); case 9: // Audio support const audioEnabled = document.getElementById('enable_audio_file_support').checked || false; + const speechToTextEnabled = document.getElementById('enable_speech_to_text_input')?.checked || false; + const textToSpeechEnabled = document.getElementById('enable_text_to_speech')?.checked || false; + const speechFeaturesEnabled = audioEnabled || speechToTextEnabled || textToSpeechEnabled; - // If workspaces not enabled or audio not enabled, it's always complete - if (!workspacesEnabled || !audioEnabled) return true; + // If no speech features are enabled, it's always complete + if (!speechFeaturesEnabled) return true; // Otherwise check settings const speechEndpoint = document.getElementById('speech_service_endpoint')?.value; const authType = document.getElementById('speech_service_authentication_type').value; const key = document.getElementById('speech_service_key').value; - - if (!speechEndpoint || (authType === 'key' && !key)) { - return false; - } else { - return true; + const speechLocation = document.getElementById('speech_service_location')?.value; + const speechResourceId = document.getElementById('speech_service_resource_id')?.value; + + if (!speechEndpoint) { + return false; } + + if (authType === 'key') { + return Boolean(key); + } + + if (textToSpeechEnabled) { + return Boolean(speechLocation && speechResourceId); + } + + return true; case 10: // Content safety - always complete (optional) case 11: // User feedback and archiving - always complete (optional) @@ -4608,14 +4848,26 @@ function setupWalkthroughFieldListeners() { ], 8: [ // Video settings {selector: '#enable_video_file_support', event: 'change'}, + {selector: '#video_indexer_cloud', event: 'change'}, + {selector: '#video_indexer_custom_endpoint', event: 'input'}, {selector: '#video_indexer_location', event: 'input'}, {selector: '#video_indexer_account_id', event: 'input'}, - {selector: '#video_indexer_api_key', event: 'input'} + {selector: '#video_indexer_resource_group', event: 'input'}, + {selector: '#video_indexer_subscription_id', event: 'input'}, + {selector: '#video_indexer_account_name', event: 'input'} ], 9: [ // Audio settings {selector: '#enable_audio_file_support', event: 'change'}, + {selector: '#enable_speech_to_text_input', event: 'change'}, + {selector: '#enable_text_to_speech', event: 'change'}, {selector: '#speech_service_endpoint', event: 'input'}, - {selector: '#speech_service_key', event: 'input'} + {selector: '#speech_service_authentication_type', event: 'change'}, + {selector: '#speech_service_subscription_id', event: 'input'}, + {selector: '#speech_service_resource_group', event: 'input'}, + {selector: '#speech_service_resource_name', event: 'input'}, + {selector: '#speech_service_key', event: 'input'}, + {selector: '#speech_service_location', event: 'input'}, + {selector: '#speech_service_resource_id', event: 'input'} ] }; diff --git a/application/single_app/static/json/schemas/agent.schema.json b/application/single_app/static/json/schemas/agent.schema.json index 64f91251..11f17de0 100644 --- a/application/single_app/static/json/schemas/agent.schema.json +++ b/application/single_app/static/json/schemas/agent.schema.json @@ -110,7 +110,7 @@ "type": "integer", "minimum": -1, "maximum": 512000, - "default": 4096 + "default": -1 } }, "required": [ diff --git a/application/single_app/templates/_speech_service_info.html b/application/single_app/templates/_speech_service_info.html new file mode 100644 index 00000000..fad60130 --- /dev/null +++ b/application/single_app/templates/_speech_service_info.html @@ -0,0 +1,330 @@ + + + + \ No newline at end of file diff --git a/application/single_app/templates/_video_indexer_info.html b/application/single_app/templates/_video_indexer_info.html index 904ef900..cd806e30 100644 --- a/application/single_app/templates/_video_indexer_info.html +++ b/application/single_app/templates/_video_indexer_info.html @@ -12,12 +12,12 @@