diff --git a/application/single_app/functions_content.py b/application/single_app/functions_content.py
index 4a9576e5..98eb0ea1 100644
--- a/application/single_app/functions_content.py
+++ b/application/single_app/functions_content.py
@@ -1,6 +1,11 @@
 # functions_content.py
 
 import email.utils
+import struct
+import zipfile
+from xml.etree import ElementTree
+
+import olefile
 
 from functions_debug import debug_print
 from config import *
@@ -15,6 +20,193 @@ def extract_markdown_file(file_path):
     with open(file_path, 'r', encoding='utf-8') as f:
         return f.read()
 
+
+def extract_docx_text(file_path):
+    """Extract text from OOXML Word documents such as .docx and .docm."""
+    try:
+        import docx2txt
+    except ImportError as exc:
+        raise Exception(
+            "docx2txt library is required for .docx/.docm file processing. Install with: pip install docx2txt"
+        ) from exc
+
+    return docx2txt.process(file_path)
+
+
+def _normalize_legacy_doc_text(text):
+    """Convert Word control characters into readable plain text."""
+    if not text:
+        return ""
+
+    field_stripped_text = []
+    field_stack = []
+
+    for character in text:
+        if character == "\x13":
+            field_stack.append("code")
+            continue
+        if character == "\x14":
+            if field_stack:
+                field_stack[-1] = "result"
+            continue
+        if character == "\x15":
+            if field_stack:
+                field_stack.pop()
+            continue
+
+        if not field_stack or field_stack[-1] == "result":
+            field_stripped_text.append(character)
+
+    normalized_text = (
+        "".join(field_stripped_text)
+        .replace("\r", "\n")
+        .replace("\x0b", "\n")
+        .replace("\x0c", "\n\n")
+        .replace("\x07", "\t")
+        .replace("\x00", "")
+    )
+    normalized_text = re.sub(r"[\x01-\x08\x0e-\x1f]", " ", normalized_text)
+    normalized_text = re.sub(r"\n{3,}", "\n\n", normalized_text)
+    normalized_text = re.sub(r"[ \t]{2,}", " ", normalized_text)
+    return normalized_text.strip()
+
+
+def _score_legacy_doc_candidate(text):
+    """Prefer longer candidates with a high ratio of readable characters."""
+    if not text:
+        return 0
+
+    readable_characters = sum(
+        1
+        for character in text
+        if character.isalnum()
+        or character.isspace()
+        or character in ".,;:!?()[]{}'\"-_/@#$%^&*+=<>|"
+    )
+    return readable_characters
+
+
+def _extract_legacy_doc_text_from_piece_table(word_stream, piece_table_bytes):
+    """Parse a PlcPcd piece table from the WordDocument stream."""
+    if len(piece_table_bytes) < 16 or (len(piece_table_bytes) - 4) % 12 != 0:
+        return ""
+
+    piece_count = (len(piece_table_bytes) - 4) // 12
+    cp_count = piece_count + 1
+    cp_byte_count = cp_count * 4
+
+    if len(piece_table_bytes) != cp_byte_count + (piece_count * 8):
+        return ""
+
+    character_positions = struct.unpack(f"<{cp_count}I", piece_table_bytes[:cp_byte_count])
+    if any(character_positions[index] > character_positions[index + 1] for index in range(piece_count)):
+        return ""
+
+    text_segments = []
+    piece_descriptor_offset = cp_byte_count
+
+    for index in range(piece_count):
+        start_cp = character_positions[index]
+        end_cp = character_positions[index + 1]
+        character_count = end_cp - start_cp
+        if character_count < 0:
+            return ""
+
+        piece_descriptor_start = piece_descriptor_offset + (index * 8)
+        piece_descriptor_end = piece_descriptor_start + 8
+        piece_descriptor = piece_table_bytes[piece_descriptor_start:piece_descriptor_end]
+        if len(piece_descriptor) != 8:
+            return ""
+
+        fc_compressed = struct.unpack("<I", piece_descriptor[2:6])[0]
+        is_compressed_piece = bool(fc_compressed & 0x40000000)
+        stream_offset = fc_compressed & 0x3FFFFFFF
+
+        if is_compressed_piece:
+            stream_offset //= 2
+            byte_count = character_count
+            encoding = "cp1252"
+        else:
+            byte_count = character_count * 2
+            encoding = "utf-16le"
+
+        if stream_offset < 0 or byte_count < 0 or (stream_offset + byte_count) > len(word_stream):
+            return ""
+
+        raw_text = word_stream[stream_offset:stream_offset + byte_count]
+        text_segments.append(raw_text.decode(encoding, errors='ignore'))
+
+    return _normalize_legacy_doc_text("".join(text_segments))
+
+
+def _extract_legacy_doc_text_from_table_stream(word_stream, table_stream):
+    """Scan a Word table stream for the most plausible text piece table."""
+    best_text = ""
+    best_score = 0
+    search_offset = 0
+
+    while search_offset <= len(table_stream) - 5:
+        piece_table_marker_offset = table_stream.find(b"\x02", search_offset)
+        if piece_table_marker_offset == -1 or piece_table_marker_offset > len(table_stream) - 5:
+            break
+
+        piece_table_length = struct.unpack(
+            "<I",
+            table_stream[piece_table_marker_offset + 1:piece_table_marker_offset + 5],
+        )[0]
+        piece_table_end = piece_table_marker_offset + 5 + piece_table_length
+
+        if (
+            piece_table_length >= 16
+            and (piece_table_length - 4) % 12 == 0
+            and piece_table_end <= len(table_stream)
+        ):
+            candidate_text = _extract_legacy_doc_text_from_piece_table(
+                word_stream,
+                table_stream[piece_table_marker_offset + 5:piece_table_end],
+            )
+            candidate_score = _score_legacy_doc_candidate(candidate_text)
+            if candidate_score > best_score:
+                best_text = candidate_text
+                best_score = candidate_score
+
+        search_offset = piece_table_marker_offset + 1
+
+    return best_text
+
+
+def extract_legacy_doc_text(file_path):
+    """Extract text from Word 97-2003 .doc files using OLE streams and piece tables."""
+    if not olefile.isOleFile(file_path):
+        raise Exception("File is not a valid OLE compound document")
+
+    ole = olefile.OleFileIO(file_path)
+    try:
+        if not ole.exists("WordDocument"):
+            raise Exception("Missing WordDocument stream")
+
+        word_stream = ole.openstream("WordDocument").read()
+        best_text = ""
+        best_score = 0
+
+        for table_stream_name in ("1Table", "0Table"):
+            if not ole.exists(table_stream_name):
+                continue
+
+            table_stream = ole.openstream(table_stream_name).read()
+            candidate_text = _extract_legacy_doc_text_from_table_stream(word_stream, table_stream)
+            candidate_score = _score_legacy_doc_candidate(candidate_text)
+            if candidate_score > best_score:
+                best_text = candidate_text
+                best_score = candidate_score
+
+        if not best_text:
+            raise Exception("Could not locate a readable text piece table in the document")
+
+        return best_text
+    finally:
+        ole.close()
+
 def extract_content_with_azure_di(file_path):
     """
     Extracts text page-by-page using Azure Document Intelligence "prebuilt-read"
@@ -218,6 +410,245 @@ def extract_docx_metadata(docx_path):
         print(f"Error extracting DOCX metadata: {e}")
         return '', ''
 
+
+def _normalize_legacy_doc_metadata_value(value):
+    """Convert OLE metadata values into trimmed strings."""
+    if value is None:
+        return ''
+
+    if isinstance(value, bytes):
+        for encoding in ('utf-8', 'utf-16le', 'cp1252', 'latin1'):
+            try:
+                value = value.decode(encoding)
+                break
+            except Exception:
+                continue
+        else:
+            value = value.decode('utf-8', errors='ignore')
+
+    return str(value).strip().strip('\x00').strip()
+
+
+def _parse_metadata_keywords(value):
+    """Parse metadata keywords into a normalized list of values."""
+    normalized_value = _normalize_legacy_doc_metadata_value(value)
+    if not normalized_value:
+        return []
+
+    return [keyword.strip() for keyword in re.split(r'[;,]', normalized_value) if keyword.strip()]
+
+
+def extract_legacy_doc_metadata(doc_path):
+    """Return title and author from a legacy OLE Word document when available."""
+    try:
+        if not olefile.isOleFile(doc_path):
+            return '', ''
+
+        ole = olefile.OleFileIO(doc_path)
+        try:
+            metadata = ole.get_metadata()
+            doc_title = _normalize_legacy_doc_metadata_value(getattr(metadata, 'title', ''))
+            doc_author = _normalize_legacy_doc_metadata_value(getattr(metadata, 'author', ''))
+
+            if not doc_author:
+                doc_author = _normalize_legacy_doc_metadata_value(getattr(metadata, 'last_saved_by', ''))
+
+            return doc_title, doc_author
+        finally:
+            ole.close()
+    except Exception as e:
+        print(f"Error extracting DOC metadata: {e}")
+        return '', ''
+
+
+def extract_pptx_metadata(pptx_path):
+    """Return title, author, subject, and keywords from an OOXML PowerPoint file."""
+    namespaces = {
+        'cp': 'http://schemas.openxmlformats.org/package/2006/metadata/core-properties',
+        'dc': 'http://purl.org/dc/elements/1.1/',
+    }
+
+    try:
+        with zipfile.ZipFile(pptx_path) as archive:
+            try:
+                core_properties = archive.read('docProps/core.xml')
+            except KeyError:
+                return '', '', '', []
+
+        root = ElementTree.fromstring(core_properties)
+        ppt_title = (root.findtext('dc:title', default='', namespaces=namespaces) or '').strip()
+        ppt_author = (root.findtext('dc:creator', default='', namespaces=namespaces) or '').strip()
+        ppt_subject = (root.findtext('dc:subject', default='', namespaces=namespaces) or '').strip()
+        ppt_keywords = _parse_metadata_keywords(
+            root.findtext('cp:keywords', default='', namespaces=namespaces) or ''
+        )
+        return ppt_title, ppt_author, ppt_subject, ppt_keywords
+    except Exception as e:
+        print(f"Error extracting PPTX metadata: {e}")
+        return '', '', '', []
+
+
+def _clean_legacy_ppt_text_fragment(text):
+    """Normalize legacy PowerPoint text atoms into readable slide text."""
+    if not text:
+        return ''
+
+    normalized_text = (
+        text
+        .replace('\r', '\n')
+        .replace('\x0b', '\n')
+        .replace('\x0c', '\n')
+        .replace('\x00', '')
+    )
+    normalized_text = re.sub(r'[\x01-\x08\x0e-\x1f]', ' ', normalized_text)
+    normalized_text = re.sub(r'[ \t]{2,}', ' ', normalized_text)
+    normalized_text = re.sub(r'\n{3,}', '\n\n', normalized_text)
+    return normalized_text.strip()
+
+
+def extract_legacy_ppt_pages(file_path):
+    """Extract slide text from a legacy OLE PowerPoint .ppt file."""
+    if not olefile.isOleFile(file_path):
+        raise Exception("File is not a valid OLE compound document")
+
+    ole = olefile.OleFileIO(file_path)
+    try:
+        if not ole.exists('PowerPoint Document'):
+            raise Exception("Missing PowerPoint Document stream")
+
+        document_stream = ole.openstream('PowerPoint Document').read()
+    finally:
+        ole.close()
+
+    slide_fragments = {}
+    slide_counter = 0
+
+    def walk_records(start_offset, end_offset, current_slide_number=None):
+        nonlocal slide_counter
+
+        offset = start_offset
+        while offset + 8 <= end_offset:
+            record_header = struct.unpack_from('<H', document_stream, offset)[0]
+            record_version = record_header & 0x000F
+            record_type = struct.unpack_from('<H', document_stream, offset + 2)[0]
+            record_length = struct.unpack_from('<I', document_stream, offset + 4)[0]
+            payload_start = offset + 8
+            payload_end = payload_start + record_length
+
+            if payload_end > end_offset:
+                return
+
+            next_slide_number = current_slide_number
+            if record_type == 1006:
+                slide_counter += 1
+                next_slide_number = slide_counter
+                slide_fragments.setdefault(next_slide_number, [])
+
+            if record_type in {4000, 4008} and next_slide_number is not None:
+                if record_type == 4000:
+                    raw_text = document_stream[payload_start:payload_end].decode('utf-16le', errors='ignore')
+                else:
+                    raw_text = document_stream[payload_start:payload_end].decode('cp1252', errors='ignore')
+
+                cleaned_text = _clean_legacy_ppt_text_fragment(raw_text)
+                if cleaned_text:
+                    fragments = slide_fragments.setdefault(next_slide_number, [])
+                    if not fragments or fragments[-1] != cleaned_text:
+                        fragments.append(cleaned_text)
+
+            if record_version == 0x0F:
+                walk_records(payload_start, payload_end, next_slide_number)
+
+            offset = payload_end
+
+    walk_records(0, len(document_stream))
+
+    pages = []
+    non_empty_slide_count = 0
+    for slide_number in range(1, slide_counter + 1):
+        slide_text = "\n".join(slide_fragments.get(slide_number, []))
+        slide_text = re.sub(r'\n{3,}', '\n\n', slide_text).strip()
+        if slide_text:
+            non_empty_slide_count += 1
+
+        pages.append({
+            'page_number': slide_number,
+            'content': slide_text,
+        })
+
+    if non_empty_slide_count == 0:
+        raise Exception("Could not locate readable slide text in the presentation")
+
+    return pages
+
+
+def extract_legacy_ppt_metadata(ppt_path):
+    """Return title, author, subject, and keywords from a legacy OLE PowerPoint file."""
+    try:
+        if not olefile.isOleFile(ppt_path):
+            return '', '', '', []
+
+        ole = olefile.OleFileIO(ppt_path)
+        try:
+            metadata = ole.get_metadata()
+            ppt_title = _normalize_legacy_doc_metadata_value(getattr(metadata, 'title', ''))
+            ppt_author = _normalize_legacy_doc_metadata_value(getattr(metadata, 'author', ''))
+            ppt_subject = _normalize_legacy_doc_metadata_value(getattr(metadata, 'subject', ''))
+            ppt_keywords = _parse_metadata_keywords(getattr(metadata, 'keywords', ''))
+
+            if not ppt_author:
+                ppt_author = _normalize_legacy_doc_metadata_value(getattr(metadata, 'last_saved_by', ''))
+
+            return ppt_title, ppt_author, ppt_subject, ppt_keywords
+        finally:
+            ole.close()
+    except Exception as e:
+        print(f"Error extracting PPT metadata: {e}")
+        return '', '', '', []
+
+
+def extract_presentation_metadata(file_path, file_extension=None):
+    """Extract metadata from supported PowerPoint presentation formats."""
+    resolved_extension = (file_extension or os.path.splitext(file_path)[1]).lower()
+
+    if resolved_extension == '.ppt':
+        return extract_legacy_ppt_metadata(file_path)
+
+    if resolved_extension == '.pptx':
+        return extract_pptx_metadata(file_path)
+
+    return '', '', '', []
+
+
+def extract_word_text(file_path, file_extension=None):
+    """Extract text from supported Word document formats."""
+    resolved_extension = (file_extension or os.path.splitext(file_path)[1]).lower()
+
+    if resolved_extension == '.doc':
+        if olefile.isOleFile(file_path):
+            return extract_legacy_doc_text(file_path)
+        return extract_docx_text(file_path)
+
+    if resolved_extension in {'.docx', '.docm'}:
+        return extract_docx_text(file_path)
+
+    raise ValueError(f"Unsupported Word document extension: {resolved_extension}")
+
+
+def extract_word_metadata(file_path, file_extension=None):
+    """Extract title and author metadata from supported Word document formats."""
+    resolved_extension = (file_extension or os.path.splitext(file_path)[1]).lower()
+
+    if resolved_extension == '.doc':
+        if olefile.isOleFile(file_path):
+            return extract_legacy_doc_metadata(file_path)
+        return extract_docx_metadata(file_path)
+
+    if resolved_extension in {'.docx', '.docm'}:
+        return extract_docx_metadata(file_path)
+
+    return '', ''
+
 def parse_authors(author_input):
     """
     Converts any input (None, string, list, comma-delimited, etc.)
diff --git a/application/single_app/functions_documents.py b/application/single_app/functions_documents.py
index 2ff2fc95..7bff48d8 100644
--- a/application/single_app/functions_documents.py
+++ b/application/single_app/functions_documents.py
@@ -4535,7 +4535,7 @@ def process_log(document_id, user_id, temp_file_path, original_filename, enable_
 
 def process_doc(document_id, user_id, temp_file_path, original_filename, enable_enhanced_citations, update_callback, group_id=None, public_workspace_id=None):
     """
-    Processes .doc and .docm files using docx2txt library.
+    Processes legacy .doc files via OLE piece tables and .docm files via docx2txt.
     Note: .docx files still use Document Intelligence for better formatting preservation.
     """
     is_group = group_id is not None
@@ -4543,8 +4543,11 @@ def process_doc(document_id, user_id, temp_file_path, original_filename, enable_
 
     update_callback(status=f"Processing {original_filename.split('.')[-1].upper()} file...")
     total_chunks_saved = 0
+    total_embedding_tokens = 0
+    embedding_model_name = None
     chunk_config = get_chunk_size_config(get_settings())
-    target_words_per_chunk = chunk_config.get('doc', {}).get('value', 400)  # Consistent with other text-based chunking
+    file_ext = os.path.splitext(original_filename)[1].lower().lstrip('.')
+    target_words_per_chunk = chunk_config.get(file_ext, {}).get('value', 400)
 
     if enable_enhanced_citations:
         args = {
@@ -4563,15 +4566,8 @@ def process_doc(document_id, user_id, temp_file_path, original_filename, enable_
         upload_to_blob(**args)
 
     try:
-        # Import docx2txt here to avoid dependency issues if not installed
-        try:
-            import docx2txt
-        except ImportError:
-            raise Exception("docx2txt library is required for .doc and .docm file processing. Install with: pip install docx2txt")
-
-        # Extract text from .doc or .docm file
         try:
-            text_content = docx2txt.process(temp_file_path)
+            text_content = extract_word_text(temp_file_path, f'.{file_ext}')
         except Exception as e:
             raise Exception(f"Error extracting text from {original_filename}: {e}")
 
@@ -4893,7 +4889,7 @@ def process_log(document_id, user_id, temp_file_path, original_filename, enable_
 
 def process_doc(document_id, user_id, temp_file_path, original_filename, enable_enhanced_citations, update_callback, group_id=None, public_workspace_id=None):
     """
-    Processes .doc and .docm files using docx2txt library.
+    Processes legacy .doc files via OLE piece tables and .docm files via docx2txt.
     Note: .docx files still use Document Intelligence for better formatting preservation.
     """
     is_group = group_id is not None
@@ -4901,7 +4897,11 @@ def process_doc(document_id, user_id, temp_file_path, original_filename, enable_
 
     update_callback(status=f"Processing {original_filename.split('.')[-1].upper()} file...")
     total_chunks_saved = 0
-    target_words_per_chunk = 400  # Consistent with other text-based chunking
+    total_embedding_tokens = 0
+    embedding_model_name = None
+    chunk_config = get_chunk_size_config(get_settings())
+    file_ext = os.path.splitext(original_filename)[1].lower().lstrip('.')
+    target_words_per_chunk = chunk_config.get(file_ext, {}).get('value', 400)
 
     if enable_enhanced_citations:
         args = {
@@ -4920,15 +4920,8 @@ def process_doc(document_id, user_id, temp_file_path, original_filename, enable_
         upload_to_blob(**args)
 
     try:
-        # Import docx2txt here to avoid dependency issues if not installed
-        try:
-            import docx2txt
-        except ImportError:
-            raise Exception("docx2txt library is required for .doc and .docm file processing. Install with: pip install docx2txt")
-
-        # Extract text from .doc or .docm file
         try:
-            text_content = docx2txt.process(temp_file_path)
+            text_content = extract_word_text(temp_file_path, f'.{file_ext}')
         except Exception as e:
             raise Exception(f"Error extracting text from {original_filename}: {e}")
 
@@ -4969,13 +4962,18 @@ def process_doc(document_id, user_id, temp_file_path, original_filename, enable_
                 elif is_group:
                     args["group_id"] = group_id
 
-                save_chunks(**args)
+                token_usage = save_chunks(**args)
                 total_chunks_saved += 1
 
+                if token_usage:
+                    total_embedding_tokens += token_usage.get('total_tokens', 0)
+                    if not embedding_model_name:
+                        embedding_model_name = token_usage.get('model_deployment_name')
+
     except Exception as e:
         raise Exception(f"Failed processing {original_filename}: {e}")
 
-    return total_chunks_saved
+    return total_chunks_saved, total_embedding_tokens, embedding_model_name
 
 def process_html(document_id, user_id, temp_file_path, original_filename, enable_enhanced_citations, update_callback, group_id=None, public_workspace_id=None):
     """Processes HTML files."""
@@ -5851,8 +5849,10 @@ def process_di_document(document_id, user_id, temp_file_path, original_filename,
     page_count = 0 # For PDF pre-check
 
     is_pdf = file_ext == '.pdf'
-    is_word = file_ext in ('.docx', '.doc')
+    is_word = file_ext in ('.docx', '.doc', '.docm')
+    is_legacy_doc = file_ext == '.doc'
     is_ppt = file_ext in ('.pptx', '.ppt')
+    is_legacy_ppt = file_ext == '.ppt'
     is_image = file_ext in tuple('.' + ext for ext in IMAGE_EXTENSIONS)
 
     try:
@@ -5861,9 +5861,11 @@ def process_di_document(document_id, user_id, temp_file_path, original_filename,
             doc_authors_list = parse_authors(doc_author)
             page_count = get_pdf_page_count(temp_file_path)
         elif is_word:
-            doc_title, doc_author = extract_docx_metadata(temp_file_path)
+            doc_title, doc_author = extract_word_metadata(temp_file_path, file_ext)
+            doc_authors_list = parse_authors(doc_author)
+        elif is_ppt:
+            doc_title, doc_author, doc_subject, doc_keywords = extract_presentation_metadata(temp_file_path, file_ext)
             doc_authors_list = parse_authors(doc_author)
-        # PPT and Image metadata extraction might be added here if needed/possible
 
         update_fields = {'status': "Extracted initial metadata"}
         if doc_title: update_fields['title'] = doc_title
@@ -5940,27 +5942,51 @@ def process_di_document(document_id, user_id, temp_file_path, original_filename,
 
             upload_to_blob(**args)
 
-        # Send chunk to Azure DI
-        update_callback(status=f"Sending {chunk_effective_filename} to Azure Document Intelligence...")
         di_extracted_pages = []
-        try:
-            di_extracted_pages = extract_content_with_azure_di(chunk_path)
-            num_di_pages = len(di_extracted_pages)
-            conceptual_pages = num_di_pages if not is_image else 1 # Image is one conceptual item
-
-            if not di_extracted_pages and not is_image:
-                print(f"Warning: Azure DI returned no content pages for {chunk_effective_filename}.")
-                status_msg = f"Azure DI found no content in {chunk_effective_filename}."
-                # Update page count to 0 if nothing found, otherwise keep previous estimate or conceptual count
-                update_callback(number_of_pages=0 if idx == num_file_chunks else conceptual_pages, status=status_msg)
-            elif not di_extracted_pages and is_image:
-                print(f"Info: Azure DI processed image {chunk_effective_filename}, but extracted no text.")
-                update_callback(number_of_pages=conceptual_pages, status=f"Processed image {chunk_effective_filename} (no text found).")
-            else:
-                 update_callback(number_of_pages=conceptual_pages, status=f"Received {num_di_pages} content page(s)/slide(s) from Azure DI for {chunk_effective_filename}.")
+        if is_legacy_doc:
+            update_callback(status=f"Extracting legacy Word content from {chunk_effective_filename}...")
+            try:
+                extracted_text = extract_word_text(chunk_path, file_ext)
+                if extracted_text and extracted_text.strip():
+                    di_extracted_pages = [{
+                        "page_number": 1,
+                        "content": extracted_text,
+                    }]
+                    update_callback(number_of_pages=1, status=f"Extracted legacy Word content from {chunk_effective_filename}.")
+                else:
+                    print(f"Warning: Legacy Word extractor returned no content for {chunk_effective_filename}.")
+                    update_callback(number_of_pages=0, status=f"Legacy Word extractor found no content in {chunk_effective_filename}.")
+            except Exception as e:
+                raise Exception(f"Error extracting content from {chunk_effective_filename} with the legacy Word extractor: {str(e)}")
+        elif is_legacy_ppt:
+            update_callback(status=f"Extracting legacy PowerPoint content from {chunk_effective_filename}...")
+            try:
+                di_extracted_pages = extract_legacy_ppt_pages(chunk_path)
+                total_slides = len(di_extracted_pages)
+                update_callback(number_of_pages=total_slides, status=f"Extracted legacy PowerPoint content from {chunk_effective_filename}.")
+            except Exception as e:
+                raise Exception(f"Error extracting content from {chunk_effective_filename} with the legacy PowerPoint extractor: {str(e)}")
+        else:
+            # Send chunk to Azure DI
+            update_callback(status=f"Sending {chunk_effective_filename} to Azure Document Intelligence...")
+            try:
+                di_extracted_pages = extract_content_with_azure_di(chunk_path)
+                num_di_pages = len(di_extracted_pages)
+                conceptual_pages = num_di_pages if not is_image else 1 # Image is one conceptual item
+
+                if not di_extracted_pages and not is_image:
+                    print(f"Warning: Azure DI returned no content pages for {chunk_effective_filename}.")
+                    status_msg = f"Azure DI found no content in {chunk_effective_filename}."
+                    # Update page count to 0 if nothing found, otherwise keep previous estimate or conceptual count
+                    update_callback(number_of_pages=0 if idx == num_file_chunks else conceptual_pages, status=status_msg)
+                elif not di_extracted_pages and is_image:
+                    print(f"Info: Azure DI processed image {chunk_effective_filename}, but extracted no text.")
+                    update_callback(number_of_pages=conceptual_pages, status=f"Processed image {chunk_effective_filename} (no text found).")
+                else:
+                     update_callback(number_of_pages=conceptual_pages, status=f"Received {num_di_pages} content page(s)/slide(s) from Azure DI for {chunk_effective_filename}.")
 
-        except Exception as e:
-            raise Exception(f"Error extracting content from {chunk_effective_filename} with Azure DI: {str(e)}")
+            except Exception as e:
+                raise Exception(f"Error extracting content from {chunk_effective_filename} with Azure DI: {str(e)}")
 
         # --- Multi-Modal Vision Analysis (for images only) - Must happen BEFORE save_chunks ---
         if is_image and enable_enhanced_citations and idx == 1:  # Only run once for first chunk
@@ -6553,7 +6579,7 @@ def update_doc_callback(**kwargs):
         update_doc_callback(status=f"Processing file {original_filename}, type: {file_ext}")
 
         # --- 1. Dispatch to appropriate handler based on file type ---
-        # Note: .doc and .docm are handled separately by process_doc() using docx2txt
+        # Note: .doc uses the shared document pipeline with OLE extraction, while .docm stays on the direct Word-text path.
 
         is_group = group_id is not None
 
@@ -6562,7 +6588,7 @@ def update_doc_callback(**kwargs):
             "user_id": user_id,
             "temp_file_path": temp_file_path,
             "original_filename": original_filename,
-            "file_ext": file_ext if file_ext in tabular_extensions or file_ext in di_supported_extensions else None,
+            "file_ext": file_ext if file_ext in tabular_extensions or file_ext in di_supported_extensions or file_ext == '.doc' else None,
             "enable_enhanced_citations": enable_enhanced_citations,
             "update_callback": update_doc_callback
         }
@@ -6597,7 +6623,7 @@ def update_doc_callback(**kwargs):
                 total_chunks_saved, total_embedding_tokens, embedding_model_name = result
             else:
                 total_chunks_saved = result
-        elif file_ext in ('.doc', '.docm'):
+        elif file_ext == '.docm':
             result = process_doc(**{k: v for k, v in args.items() if k != "file_ext"})
             if isinstance(result, tuple) and len(result) == 3:
                 total_chunks_saved, total_embedding_tokens, embedding_model_name = result
@@ -6647,7 +6673,7 @@ def update_doc_callback(**kwargs):
                 group_id=group_id,
                 public_workspace_id=public_workspace_id
             )
-        elif file_ext in di_supported_extensions:
+        elif file_ext in di_supported_extensions or file_ext == '.doc':
             result = process_di_document(**args)
             # Handle tuple return (chunks, tokens, model_name)
             if isinstance(result, tuple) and len(result) == 3:
@@ -6722,7 +6748,11 @@ def update_doc_callback(**kwargs):
                 embedding_tokens=total_embedding_tokens,
                 embedding_model=embedding_model_name,
                 version=doc_metadata.get('version') if doc_metadata else None,
-                author=doc_metadata.get('author') if doc_metadata else None,
+                author=(
+                    doc_metadata.get('author')
+                    or ', '.join(ensure_list(doc_metadata.get('authors')))
+                    or None
+                ) if doc_metadata else None,
                 title=doc_metadata.get('title') if doc_metadata else None,
                 subject=doc_metadata.get('subject') if doc_metadata else None,
                 publication_date=doc_metadata.get('publication_date') if doc_metadata else None,
diff --git a/application/single_app/requirements.txt b/application/single_app/requirements.txt
index a01ab60c..48aa0877 100644
--- a/application/single_app/requirements.txt
+++ b/application/single_app/requirements.txt
@@ -8,6 +8,7 @@ Werkzeug==3.1.6
 requests==2.33.0
 openai==1.109.1
 docx2txt==0.8
+olefile==0.47
 Markdown==3.8.1
 bleach==6.1.0
 azure-cosmos==4.9.0
diff --git a/application/single_app/route_backend_chats.py b/application/single_app/route_backend_chats.py
index 8bd44f01..db6ff619 100644
--- a/application/single_app/route_backend_chats.py
+++ b/application/single_app/route_backend_chats.py
@@ -7771,7 +7771,10 @@ def result_requires_message_reload(result: Any) -> bool:
                     final_api_source_refs.insert(insert_idx, 'system:default_prompt')
                     default_system_prompt_inserted = True
 
-            if not original_hybrid_search_enabled:
+            if should_apply_history_grounding_message(
+                original_hybrid_search_enabled,
+                prior_grounded_document_refs,
+            ):
                 history_grounding_message = build_history_grounding_system_message()
                 insert_idx = 0
                 if (
@@ -10214,7 +10217,10 @@ def publish_live_plugin_thought(thought_payload):
                         final_api_source_refs.insert(insert_idx, 'system:default_prompt')
                         default_system_prompt_inserted = True
 
-                if not original_hybrid_search_enabled:
+                if should_apply_history_grounding_message(
+                    original_hybrid_search_enabled,
+                    prior_grounded_document_refs,
+                ):
                     history_grounding_message = build_history_grounding_system_message()
                     insert_idx = 0
                     if (
@@ -11632,6 +11638,14 @@ def build_history_grounding_system_message():
     }
 
 
+def should_apply_history_grounding_message(
+    original_hybrid_search_enabled,
+    prior_grounded_document_refs,
+):
+    """Apply bounded grounding only when prior grounded docs exist for this conversation."""
+    return (not bool(original_hybrid_search_enabled)) and bool(prior_grounded_document_refs)
+
+
 def build_assistant_history_content_with_citations(message, content):
     base_content = str(content or '').strip()
     citation_sections = []
diff --git a/application/single_app/route_frontend_chats.py b/application/single_app/route_frontend_chats.py
index c5590c4b..2679f57b 100644
--- a/application/single_app/route_frontend_chats.py
+++ b/application/single_app/route_frontend_chats.py
@@ -538,12 +538,11 @@ def upload_file():
                         # Continue without vision analysis
                 
             elif file_ext_nodot in {'doc', 'docm'}:
-                # Use docx2txt for .doc and .docm files
+                # Use OLE parsing for legacy .doc files and docx2txt for .docm files
                 try:
-                    import docx2txt
-                    extracted_content = docx2txt.process(temp_file_path)
-                except ImportError:
-                    return jsonify({'error': 'docx2txt library required for .doc/.docm files'}), 500
+                    extracted_content = extract_word_text(temp_file_path, f'.{file_ext_nodot}')
+                except Exception as e:
+                    return jsonify({'error': f'Error extracting text from {filename}: {e}'}), 500
             elif file_ext_nodot == 'txt':
                 extracted_content  = extract_text_file(temp_file_path)
             elif file_ext_nodot == 'md':
diff --git a/application/single_app/templates/profile.html b/application/single_app/templates/profile.html
index 685315e7..5e6dd0a9 100644
--- a/application/single_app/templates/profile.html
+++ b/application/single_app/templates/profile.html
@@ -135,6 +135,52 @@
         animation: spin 1s linear infinite;
     }
 
+    .preference-card-icon {
+        align-items: center;
+        background: linear-gradient(135deg, rgba(13, 110, 253, 0.12), rgba(13, 202, 240, 0.18));
+        border-radius: 14px;
+        color: var(--bs-primary);
+        display: inline-flex;
+        font-size: 1.25rem;
+        height: 52px;
+        justify-content: center;
+        width: 52px;
+    }
+
+    .preference-status {
+        min-height: 1.5rem;
+    }
+
+    .fact-memory-summary-card {
+        background: linear-gradient(135deg, rgba(13, 110, 253, 0.08), rgba(13, 202, 240, 0.12));
+        border: 1px solid rgba(13, 110, 253, 0.14);
+        border-radius: 16px;
+        padding: 1.25rem;
+    }
+
+    .fact-memory-count {
+        font-size: 2rem;
+        font-weight: 700;
+        line-height: 1;
+    }
+
+    .fact-memory-modal-list {
+        max-height: 55vh;
+        overflow-y: auto;
+    }
+
+    .fact-memory-modal-card {
+        border: 1px solid var(--bs-border-color);
+        border-radius: 14px;
+        padding: 1rem;
+        background: var(--bs-body-bg);
+    }
+
+    .fact-memory-pagination-summary {
+        min-height: 1.5rem;
+    }
+    
+
     .preference-card-icon {
         align-items: center;
         background: linear-gradient(135deg, rgba(13, 110, 253, 0.12), rgba(13, 202, 240, 0.18));
@@ -452,6 +498,117 @@ <h5 class="mb-2"><i class="bi bi-journal-text me-2"></i>Fact Memory</h5>
         <div id="fact-memory-status" class="preference-status small mt-3 text-muted"></div>
     </div>
 
+    <div class="section-card" id="tutorial-preferences">
+        <div class="d-flex flex-column flex-lg-row align-items-lg-start justify-content-between gap-3">
+            <div class="d-flex gap-3 align-items-start">
+                <span class="preference-card-icon">
+                    <i class="bi bi-signpost-split"></i>
+                </span>
+                <div>
+                    <h5 class="mb-2"><i class="bi bi-sliders me-2"></i>Tutorial Preferences</h5>
+                    <p class="text-muted mb-2">Control whether the floating guided tutorial buttons appear on Chat and Personal Workspace for your account.</p>
+                    <p class="small text-muted mb-0">These launchers are shown by default. You can hide them now and turn them back on later from this page.</p>
+                </div>
+            </div>
+            {% if app_settings.enable_support_menu and app_settings.enable_support_latest_features and app_settings.support_latest_features_has_visible_items %}
+            <a class="btn btn-outline-primary btn-sm" href="{{ url_for('support_latest_features') }}#guided_tutorials">
+                <i class="bi bi-stars me-1"></i>View Latest Feature Notes
+            </a>
+            {% endif %}
+        </div>
+
+        <div class="row g-3 align-items-center mt-1">
+            <div class="col-lg-8">
+                <div class="form-check form-switch mt-3">
+                    <input
+                        class="form-check-input"
+                        type="checkbox"
+                        id="show-tutorial-buttons-toggle"
+                        {% if user_settings.get('settings', {}).get('showTutorialButtons', True) %}checked{% endif %}
+                    />
+                    <label class="form-check-label fw-semibold" for="show-tutorial-buttons-toggle">
+                        Show tutorial buttons on Chat and Personal Workspace
+                    </label>
+                    <small class="d-block text-muted mt-1">Turn this off if you already know the interface and want to remove the floating walkthrough launchers.</small>
+                </div>
+            </div>
+            <div class="col-lg-4 text-lg-end">
+                <button type="button" class="btn btn-primary" id="save-tutorial-preferences-btn" onclick="saveTutorialPreferences()">
+                    <i class="bi bi-save me-1"></i>Save Tutorial Preference
+                </button>
+            </div>
+        </div>
+
+        <div id="tutorial-preference-status" class="preference-status small mt-3 text-muted"></div>
+    </div>
+
+    <div class="section-card" id="fact-memory-settings">
+        <div class="d-flex flex-column flex-lg-row align-items-lg-start justify-content-between gap-3">
+            <div class="d-flex gap-3 align-items-start">
+                <span class="preference-card-icon">
+                    <i class="bi bi-journal-text"></i>
+                </span>
+                <div>
+                    <h5 class="mb-2"><i class="bi bi-journal-text me-2"></i>Fact Memory</h5>
+                    <p class="text-muted mb-2">Save durable facts for your account, then manage the full memory list from a compact popup editor.</p>
+                    {% if app_settings.enable_fact_memory_plugin %}
+                    <span id="fact-memory-enabled-badge" class="badge bg-success">Enabled by admin</span>
+                    <span class="small text-muted ms-2">Supported chat and mini-SK flows can use these memories when fact memory is enabled.</span>
+                    {% else %}
+                    <span id="fact-memory-enabled-badge" class="badge bg-secondary">Disabled by admin</span>
+                    <span class="small text-muted ms-2">You can still manage saved memories here. They will stay inactive until an administrator turns fact memory back on.</span>
+                    {% endif %}
+                </div>
+            </div>
+        </div>
+
+        <div class="row g-4 mt-1">
+            <div class="col-lg-7">
+                <label for="fact-memory-new-value" class="form-label fw-semibold">Add a memory</label>
+                <textarea
+                    class="form-control"
+                    id="fact-memory-new-value"
+                    rows="3"
+                    placeholder="Example: I prefer concise responses with explicit next steps when there are options."
+                ></textarea>
+                <div class="row g-3 mt-1 align-items-end">
+                    <div class="col-md-6">
+                        <label for="fact-memory-new-type" class="form-label fw-semibold small">Memory type</label>
+                        <select class="form-select" id="fact-memory-new-type">
+                            <option value="instruction">Instruction: always apply to future responses</option>
+                            <option value="fact" selected>Fact: recall only when relevant</option>
+                        </select>
+                    </div>
+                </div>
+                <small class="d-block text-muted mt-2">Use instruction memories for durable preferences like tone or formatting. Use fact memories for details about you that the model should recall only when relevant.</small>
+                <button type="button" class="btn btn-primary mt-3" id="fact-memory-add-btn" onclick="createFactMemory()">
+                    <i class="bi bi-plus-circle me-1"></i>Add Memory
+                </button>
+            </div>
+            <div class="col-lg-5">
+                <div class="fact-memory-summary-card h-100 d-flex flex-column justify-content-between">
+                    <div>
+                        <div class="small text-uppercase text-muted fw-semibold mb-2">Saved Memories</div>
+                        <div class="fact-memory-count" id="fact-memory-count">0</div>
+                        <div class="text-muted mt-2" id="fact-memory-last-updated">No memories saved yet.</div>
+                        <div class="small text-muted mt-2" id="fact-memory-type-summary">0 instructions, 0 facts</div>
+                    </div>
+                    <div class="d-flex flex-wrap gap-2 mt-3">
+                        <button type="button" class="btn btn-outline-primary" id="open-fact-memory-modal-btn" onclick="openFactMemoryModal()">
+                            <i class="bi bi-window-stack me-1"></i>Manage Memories
+                        </button>
+                        <button type="button" class="btn btn-outline-secondary" id="fact-memory-refresh-btn" onclick="loadFactMemory()">
+                            <i class="bi bi-arrow-clockwise me-1"></i>Refresh
+                        </button>
+                    </div>
+                    <small class="text-muted d-block mt-3">Use the popup editor for search, paging, and updates.</small>
+                </div>
+            </div>
+        </div>
+
+        <div id="fact-memory-status" class="preference-status small mt-3 text-muted"></div>
+    </div>
+
     <!-- Retention Policy Settings -->
     {% if app_settings.enable_retention_policy_personal %}
     <div class="section-card" id="retention-policy-settings">
@@ -804,6 +961,88 @@ <h5 class="modal-title" id="factMemoryManagerModalLabel">
     </div>
 </div>
 
+<div class="modal fade" id="factMemoryDeleteModal" tabindex="-1" aria-labelledby="factMemoryDeleteModalLabel" aria-hidden="true">
+    <div class="modal-dialog modal-dialog-centered">
+        <div class="modal-content">
+            <div class="modal-header bg-danger text-white">
+                <h5 class="modal-title" id="factMemoryDeleteModalLabel">
+                    <i class="bi bi-trash me-2"></i>Delete Fact Memory
+                </h5>
+                <button type="button" class="btn-close btn-close-white" data-bs-dismiss="modal" aria-label="Close"></button>
+            </div>
+            <div class="modal-body">
+                <p class="mb-0">Delete this saved memory? This cannot be undone, and supported chats will stop receiving it immediately.</p>
+            </div>
+            <div class="modal-footer">
+                <button type="button" class="btn btn-secondary" data-bs-dismiss="modal">Cancel</button>
+                <button type="button" class="btn btn-danger" id="confirm-delete-fact-memory-btn" onclick="confirmDeleteFactMemory()">
+                    <i class="bi bi-trash me-1"></i>Delete Memory
+                </button>
+            </div>
+        </div>
+    </div>
+</div>
+
+<div class="modal fade" id="factMemoryManagerModal" tabindex="-1" aria-labelledby="factMemoryManagerModalLabel" aria-hidden="true">
+    <div class="modal-dialog modal-xl modal-dialog-scrollable">
+        <div class="modal-content">
+            <div class="modal-header">
+                <div>
+                    <h5 class="modal-title" id="factMemoryManagerModalLabel">
+                        <i class="bi bi-journal-text me-2"></i>Manage Fact Memories
+                    </h5>
+                    <div class="small text-muted mt-1">Search, review, and edit your saved memories.</div>
+                </div>
+                <button type="button" class="btn-close" data-bs-dismiss="modal" aria-label="Close"></button>
+            </div>
+            <div class="modal-body">
+                <div class="row g-3 align-items-end mb-3">
+                    <div class="col-lg-6">
+                        <label for="fact-memory-search-input" class="form-label fw-semibold">Search memories</label>
+                        <input type="search" class="form-control" id="fact-memory-search-input" placeholder="Search memory text" />
+                    </div>
+                    <div class="col-lg-3">
+                        <label for="fact-memory-type-filter" class="form-label fw-semibold">Type</label>
+                        <select class="form-select" id="fact-memory-type-filter">
+                            <option value="all" selected>All memory types</option>
+                            <option value="instruction">Instruction</option>
+                            <option value="fact">Fact</option>
+                        </select>
+                    </div>
+                    <div class="col-lg-3 text-lg-end">
+                        <button type="button" class="btn btn-outline-secondary" id="fact-memory-modal-refresh-btn" onclick="loadFactMemory()">
+                            <i class="bi bi-arrow-clockwise me-1"></i>Refresh List
+                        </button>
+                    </div>
+                </div>
+
+                <div class="d-flex flex-column flex-md-row justify-content-between align-items-md-center gap-2 mb-3">
+                    <div id="fact-memory-modal-summary" class="small text-muted">0 memories</div>
+                    <div id="fact-memory-pagination-summary" class="small text-muted fact-memory-pagination-summary"></div>
+                </div>
+
+                <div id="fact-memory-modal-list" class="fact-memory-modal-list vstack gap-3"></div>
+                <div id="fact-memory-modal-empty-state" class="alert alert-light border d-none mb-0" role="status">
+                    <i class="bi bi-info-circle me-2"></i>No memories match the current search.
+                </div>
+            </div>
+            <div class="modal-footer justify-content-between">
+                <nav aria-label="Fact memory pagination">
+                    <ul class="pagination pagination-sm mb-0">
+                        <li class="page-item">
+                            <button class="page-link" type="button" id="fact-memory-prev-page" aria-label="Previous memories page">Previous</button>
+                        </li>
+                        <li class="page-item">
+                            <button class="page-link" type="button" id="fact-memory-next-page" aria-label="Next memories page">Next</button>
+                        </li>
+                    </ul>
+                </nav>
+                <button type="button" class="btn btn-secondary" data-bs-dismiss="modal">Close</button>
+            </div>
+        </div>
+    </div>
+</div>
+
 <!-- Export Activity Modal -->
 <div class="modal fade" id="exportActivityModal" tabindex="-1" aria-labelledby="exportActivityModalLabel" aria-hidden="true">
     <div class="modal-dialog">
@@ -879,6 +1118,16 @@ <h5 class="modal-title" id="exportActivityModalLabel">
 let pendingFactMemoryDeleteId = null;
 let factMemoryDeleteModalInstance = null;
 let factMemoryManagerModalInstance = null;
+let factMemoryEntries = [];
+let filteredFactMemoryEntries = [];
+let factMemoryCurrentPage = 1;
+const FACT_MEMORY_PAGE_SIZE = 5;
+const FACT_MEMORY_TYPE_INSTRUCTION = 'instruction';
+const FACT_MEMORY_TYPE_FACT = 'fact';
+const FACT_MEMORY_TYPE_LEGACY_DESCRIBER = 'describer';
+let pendingFactMemoryDeleteId = null;
+let factMemoryDeleteModalInstance = null;
+let factMemoryManagerModalInstance = null;
 
 console.log('Profile page script loaded - preparing to initialize');
 
@@ -991,13 +1240,50 @@ <h5 class="modal-title" id="exportActivityModalLabel">
             }
         });
     }
-    
-    // Initialize TTS settings if enabled
-    {% if app_settings.enable_text_to_speech %}
-    loadTTSSettings();
-    {% endif %}
-    
-    document.getElementById('refresh-profile-image').addEventListener('click', function() {
+
+    const factMemorySearchInput = document.getElementById('fact-memory-search-input');
+    if (factMemorySearchInput) {
+        factMemorySearchInput.addEventListener('input', function() {
+            factMemoryCurrentPage = 1;
+            renderFactMemoryModalList();
+        });
+    }
+
+    const factMemoryTypeFilter = document.getElementById('fact-memory-type-filter');
+    if (factMemoryTypeFilter) {
+        factMemoryTypeFilter.addEventListener('change', function() {
+            factMemoryCurrentPage = 1;
+            renderFactMemoryModalList();
+        });
+    }
+
+    const previousPageButton = document.getElementById('fact-memory-prev-page');
+    if (previousPageButton) {
+        previousPageButton.addEventListener('click', function() {
+            if (factMemoryCurrentPage > 1) {
+                factMemoryCurrentPage -= 1;
+                renderFactMemoryModalList();
+            }
+        });
+    }
+
+    const nextPageButton = document.getElementById('fact-memory-next-page');
+    if (nextPageButton) {
+        nextPageButton.addEventListener('click', function() {
+            const totalPages = Math.max(1, Math.ceil(filteredFactMemoryEntries.length / FACT_MEMORY_PAGE_SIZE));
+            if (factMemoryCurrentPage < totalPages) {
+                factMemoryCurrentPage += 1;
+                renderFactMemoryModalList();
+            }
+        });
+    }
+    
+    // Initialize TTS settings if enabled
+    {% if app_settings.enable_text_to_speech %}
+    loadTTSSettings();
+    {% endif %}
+    
+    document.getElementById('refresh-profile-image').addEventListener('click', function() {
         if (typeof window.ProfileImage !== 'undefined' && window.ProfileImage.refresh) {
             window.ProfileImage.refresh().then(() => {
                 updateProfileHeroImage();
@@ -1590,6 +1876,545 @@ <h5 class="modal-title" id="exportActivityModalLabel">
         });
 }
 
+function updateTutorialPreferenceStatus(message, type = 'muted') {
+    const statusElement = document.getElementById('tutorial-preference-status');
+    if (!statusElement) {
+        return;
+    }
+
+    const classMap = {
+        muted: 'text-muted',
+        info: 'text-info',
+        success: 'text-success',
+        danger: 'text-danger'
+    };
+
+    statusElement.textContent = message || '';
+    statusElement.className = `preference-status small mt-3 ${classMap[type] || classMap.muted}`;
+}
+
+function updateFactMemoryStatus(message, type = 'muted') {
+    const statusElement = document.getElementById('fact-memory-status');
+    if (!statusElement) {
+        return;
+    }
+
+    const classMap = {
+        muted: 'text-muted',
+        info: 'text-info',
+        success: 'text-success',
+        danger: 'text-danger'
+    };
+
+    statusElement.textContent = message || '';
+    statusElement.className = `preference-status small mt-3 ${classMap[type] || classMap.muted}`;
+}
+
+function formatFactMemoryTimestamp(value) {
+    if (!value) {
+        return 'Unknown time';
+    }
+
+    const parsedDate = new Date(value);
+    if (Number.isNaN(parsedDate.getTime())) {
+        return value;
+    }
+
+    return parsedDate.toLocaleString();
+}
+
+function formatFactMemoryDate(value) {
+    if (!value) {
+        return 'Unknown date';
+    }
+
+    const parsedDate = new Date(value);
+    if (Number.isNaN(parsedDate.getTime())) {
+        return value;
+    }
+
+    return parsedDate.toLocaleDateString();
+}
+
+function normalizeFactMemoryType(memoryType) {
+    if (memoryType === FACT_MEMORY_TYPE_INSTRUCTION) {
+        return FACT_MEMORY_TYPE_INSTRUCTION;
+    }
+    if (memoryType === FACT_MEMORY_TYPE_LEGACY_DESCRIBER) {
+        return FACT_MEMORY_TYPE_FACT;
+    }
+    return FACT_MEMORY_TYPE_FACT;
+}
+
+function getFactMemoryTypeLabel(memoryType) {
+    return normalizeFactMemoryType(memoryType) === FACT_MEMORY_TYPE_INSTRUCTION ? 'Instruction' : 'Fact';
+}
+
+function getFactMemoryTypeBadgeClass(memoryType) {
+    return normalizeFactMemoryType(memoryType) === FACT_MEMORY_TYPE_INSTRUCTION ? 'bg-primary-subtle text-primary-emphasis' : 'bg-secondary-subtle text-secondary-emphasis';
+}
+
+function buildFactMemoryMetaText(fact) {
+    return `${getFactMemoryTypeLabel(fact.memory_type)} memory · Updated ${formatFactMemoryDate(fact.updated_at)}`;
+}
+
+function updateFactMemorySummary() {
+    const countElement = document.getElementById('fact-memory-count');
+    const updatedElement = document.getElementById('fact-memory-last-updated');
+    const typeSummaryElement = document.getElementById('fact-memory-type-summary');
+    const modalSummaryElement = document.getElementById('fact-memory-modal-summary');
+    const instructionCount = factMemoryEntries.filter((fact) => normalizeFactMemoryType(fact.memory_type) === FACT_MEMORY_TYPE_INSTRUCTION).length;
+    const factCount = factMemoryEntries.length - instructionCount;
+
+    if (countElement) {
+        countElement.textContent = String(factMemoryEntries.length);
+    }
+
+    if (updatedElement) {
+        if (!factMemoryEntries.length) {
+            updatedElement.textContent = 'No memories saved yet.';
+        } else {
+            updatedElement.textContent = `Last updated ${formatFactMemoryDate(factMemoryEntries[0].updated_at)}.`;
+        }
+    }
+
+    if (typeSummaryElement) {
+        typeSummaryElement.textContent = `${instructionCount} instruction${instructionCount === 1 ? '' : 's'}, ${factCount} fact${factCount === 1 ? '' : 's'}`;
+    }
+
+    if (modalSummaryElement) {
+        modalSummaryElement.textContent = `${filteredFactMemoryEntries.length} matching mem${filteredFactMemoryEntries.length === 1 ? 'ory' : 'ories'} · ${instructionCount} instruction${instructionCount === 1 ? '' : 's'} · ${factCount} fact${factCount === 1 ? '' : 's'}`;
+    }
+}
+
+function getFilteredFactMemoryEntries() {
+    const searchInput = document.getElementById('fact-memory-search-input');
+    const typeFilter = document.getElementById('fact-memory-type-filter');
+    const query = searchInput ? searchInput.value.trim().toLowerCase() : '';
+    const selectedType = typeFilter ? typeFilter.value : 'all';
+
+    return factMemoryEntries.filter((fact) => {
+        const value = String(fact.value || '').toLowerCase();
+        const memoryType = normalizeFactMemoryType(fact.memory_type);
+        const matchesQuery = !query || value.includes(query) || memoryType.includes(query);
+        const matchesType = selectedType === 'all' || memoryType === selectedType;
+        return matchesQuery && matchesType;
+    });
+}
+
+function renderFactMemoryModalList() {
+    const listElement = document.getElementById('fact-memory-modal-list');
+    const emptyState = document.getElementById('fact-memory-modal-empty-state');
+    const paginationSummary = document.getElementById('fact-memory-pagination-summary');
+    const previousPageButton = document.getElementById('fact-memory-prev-page');
+    const nextPageButton = document.getElementById('fact-memory-next-page');
+    if (!listElement || !emptyState) {
+        return;
+    }
+
+    filteredFactMemoryEntries = getFilteredFactMemoryEntries();
+    const totalPages = Math.max(1, Math.ceil(filteredFactMemoryEntries.length / FACT_MEMORY_PAGE_SIZE));
+    if (factMemoryCurrentPage > totalPages) {
+        factMemoryCurrentPage = totalPages;
+    }
+
+    const startIndex = (factMemoryCurrentPage - 1) * FACT_MEMORY_PAGE_SIZE;
+    const endIndex = startIndex + FACT_MEMORY_PAGE_SIZE;
+    const currentPageItems = filteredFactMemoryEntries.slice(startIndex, endIndex);
+
+    listElement.innerHTML = '';
+
+    if (!filteredFactMemoryEntries.length) {
+        emptyState.classList.remove('d-none');
+        if (paginationSummary) {
+            paginationSummary.textContent = 'No results';
+        }
+        if (previousPageButton) {
+            previousPageButton.disabled = true;
+        }
+        if (nextPageButton) {
+            nextPageButton.disabled = true;
+        }
+        updateFactMemorySummary();
+        return;
+    }
+
+    emptyState.classList.add('d-none');
+    if (paginationSummary) {
+        paginationSummary.textContent = `Page ${factMemoryCurrentPage} of ${totalPages}`;
+    }
+    if (previousPageButton) {
+        previousPageButton.disabled = factMemoryCurrentPage <= 1;
+    }
+    if (nextPageButton) {
+        nextPageButton.disabled = factMemoryCurrentPage >= totalPages;
+    }
+
+    currentPageItems.forEach((fact) => {
+        const itemContainer = document.createElement('div');
+        itemContainer.className = 'fact-memory-modal-card';
+        itemContainer.dataset.factMemoryId = fact.id;
+
+        const headerRow = document.createElement('div');
+        headerRow.className = 'd-flex flex-column flex-md-row justify-content-between align-items-md-center gap-2 mb-2';
+
+        const typeBadge = document.createElement('span');
+        typeBadge.className = `badge ${getFactMemoryTypeBadgeClass(fact.memory_type)}`;
+        typeBadge.textContent = getFactMemoryTypeLabel(fact.memory_type);
+
+        const typeSelect = document.createElement('select');
+        typeSelect.className = 'form-select form-select-sm';
+        typeSelect.id = `fact-memory-type-${fact.id}`;
+        typeSelect.style.maxWidth = '220px';
+        typeSelect.setAttribute('aria-label', 'Fact memory type');
+
+        const instructionOption = document.createElement('option');
+        instructionOption.value = FACT_MEMORY_TYPE_INSTRUCTION;
+        instructionOption.textContent = 'Instruction';
+
+        const factOption = document.createElement('option');
+        factOption.value = FACT_MEMORY_TYPE_FACT;
+        factOption.textContent = 'Fact';
+
+        typeSelect.appendChild(instructionOption);
+        typeSelect.appendChild(factOption);
+        typeSelect.value = normalizeFactMemoryType(fact.memory_type);
+        typeSelect.addEventListener('change', () => {
+            typeBadge.className = `badge ${getFactMemoryTypeBadgeClass(typeSelect.value)}`;
+            typeBadge.textContent = getFactMemoryTypeLabel(typeSelect.value);
+        });
+
+        headerRow.appendChild(typeBadge);
+        headerRow.appendChild(typeSelect);
+
+        const textArea = document.createElement('textarea');
+        textArea.className = 'form-control';
+        textArea.id = `fact-memory-value-${fact.id}`;
+        textArea.rows = 3;
+        textArea.value = fact.value || '';
+        textArea.setAttribute('aria-label', 'Fact memory value');
+
+        const metaText = document.createElement('div');
+        metaText.className = 'small text-muted mt-2';
+        metaText.textContent = buildFactMemoryMetaText(fact);
+
+        const actions = document.createElement('div');
+        actions.className = 'd-flex flex-wrap gap-2 mt-3';
+
+        const saveButton = document.createElement('button');
+        saveButton.type = 'button';
+        saveButton.className = 'btn btn-primary btn-sm';
+        saveButton.setAttribute('aria-label', 'Save memory');
+        saveButton.innerHTML = '<i class="bi bi-save me-1"></i>Save';
+        saveButton.addEventListener('click', () => saveFactMemory(fact.id));
+
+        const deleteButton = document.createElement('button');
+        deleteButton.type = 'button';
+        deleteButton.className = 'btn btn-outline-danger btn-sm';
+        deleteButton.setAttribute('aria-label', 'Delete memory');
+        deleteButton.innerHTML = '<i class="bi bi-trash me-1"></i>Delete';
+        deleteButton.addEventListener('click', () => requestFactMemoryDelete(fact.id));
+
+        actions.appendChild(saveButton);
+        actions.appendChild(deleteButton);
+
+        itemContainer.appendChild(headerRow);
+        itemContainer.appendChild(textArea);
+        itemContainer.appendChild(metaText);
+        itemContainer.appendChild(actions);
+        listElement.appendChild(itemContainer);
+    });
+
+    updateFactMemorySummary();
+}
+
+function openFactMemoryModal() {
+    const modalElement = document.getElementById('factMemoryManagerModal');
+    if (!modalElement || typeof bootstrap === 'undefined') {
+        return;
+    }
+
+    if (!factMemoryManagerModalInstance) {
+        factMemoryManagerModalInstance = new bootstrap.Modal(modalElement);
+    }
+
+    factMemoryCurrentPage = 1;
+    renderFactMemoryModalList();
+    factMemoryManagerModalInstance.show();
+}
+
+async function loadFactMemory() {
+    const refreshButton = document.getElementById('fact-memory-refresh-btn');
+    const originalButtonHtml = refreshButton ? refreshButton.innerHTML : '';
+    if (refreshButton) {
+        refreshButton.disabled = true;
+        refreshButton.innerHTML = '<i class="bi bi-hourglass-split me-1"></i>Loading...';
+    }
+
+    updateFactMemoryStatus('Loading saved fact memories...', 'info');
+
+    try {
+        const response = await fetch('/api/profile/fact-memory');
+        const data = await response.json();
+        if (!response.ok) {
+            throw new Error(data.error || 'Failed to load fact memory');
+        }
+
+        factMemoryEntries = (Array.isArray(data.facts) ? data.facts : []).map((fact) => ({
+            ...fact,
+            memory_type: normalizeFactMemoryType(fact.memory_type)
+        }));
+        factMemoryCurrentPage = 1;
+        renderFactMemoryModalList();
+
+        const enabledBadge = document.getElementById('fact-memory-enabled-badge');
+        if (enabledBadge) {
+            enabledBadge.textContent = data.enabled ? 'Enabled by admin' : 'Disabled by admin';
+            enabledBadge.className = `badge ${data.enabled ? 'bg-success' : 'bg-secondary'}`;
+        }
+
+        updateFactMemoryStatus(
+            data.enabled
+                ? 'Fact memory is enabled for supported chats and mini-SK analysis.'
+                : 'Fact memory is currently disabled by admin. You can still edit your saved memories here.',
+            'muted'
+        );
+    } catch (error) {
+        console.error('Error loading fact memory:', error);
+        updateFactMemoryStatus('Unable to load fact memory right now. Please try again.', 'danger');
+    } finally {
+        if (refreshButton) {
+            refreshButton.disabled = false;
+            refreshButton.innerHTML = originalButtonHtml;
+        }
+    }
+}
+
+async function createFactMemory() {
+    const textArea = document.getElementById('fact-memory-new-value');
+    const typeSelect = document.getElementById('fact-memory-new-type');
+    const addButton = document.getElementById('fact-memory-add-btn');
+    if (!textArea || !typeSelect || !addButton) {
+        return;
+    }
+
+    const value = textArea.value.trim();
+    const memoryType = normalizeFactMemoryType(typeSelect.value);
+    if (!value) {
+        updateFactMemoryStatus('Enter a memory before saving.', 'danger');
+        return;
+    }
+
+    const originalButtonHtml = addButton.innerHTML;
+    addButton.disabled = true;
+    addButton.innerHTML = '<i class="bi bi-hourglass-split me-1"></i>Saving...';
+    updateFactMemoryStatus('Saving new fact memory...', 'info');
+
+    try {
+        const response = await fetch('/api/profile/fact-memory', {
+            method: 'POST',
+            headers: {
+                'Content-Type': 'application/json'
+            },
+            body: JSON.stringify({ value, memory_type: memoryType })
+        });
+        const data = await response.json();
+        if (!response.ok) {
+            throw new Error(data.error || 'Failed to create fact memory');
+        }
+
+        textArea.value = '';
+        typeSelect.value = FACT_MEMORY_TYPE_FACT;
+        await loadFactMemory();
+        updateFactMemoryStatus('Fact memory saved.', 'success');
+        if (typeof showToastMessage === 'function') {
+            showToastMessage('Fact memory saved successfully', 'success');
+        }
+    } catch (error) {
+        console.error('Error creating fact memory:', error);
+        updateFactMemoryStatus(error.message || 'Failed to save fact memory.', 'danger');
+    } finally {
+        addButton.disabled = false;
+        addButton.innerHTML = originalButtonHtml;
+    }
+}
+
+async function saveFactMemory(factId) {
+    const textArea = document.getElementById(`fact-memory-value-${factId}`);
+    const typeSelect = document.getElementById(`fact-memory-type-${factId}`);
+    const itemContainer = document.querySelector(`[data-fact-memory-id="${factId}"]`);
+    if (!textArea || !typeSelect || !itemContainer) {
+        return;
+    }
+
+    const value = textArea.value.trim();
+    const memoryType = normalizeFactMemoryType(typeSelect.value);
+    if (!value) {
+        updateFactMemoryStatus('Memory text cannot be empty.', 'danger');
+        return;
+    }
+
+    const saveButton = itemContainer.querySelector('button[aria-label="Save memory"]');
+    const originalButtonHtml = saveButton ? saveButton.innerHTML : '';
+    if (saveButton) {
+        saveButton.disabled = true;
+        saveButton.innerHTML = '<i class="bi bi-hourglass-split me-1"></i>Saving...';
+    }
+
+    updateFactMemoryStatus('Updating fact memory...', 'info');
+
+    try {
+        const response = await fetch(`/api/profile/fact-memory/${encodeURIComponent(factId)}`, {
+            method: 'PUT',
+            headers: {
+                'Content-Type': 'application/json'
+            },
+            body: JSON.stringify({ value, memory_type: memoryType })
+        });
+        const data = await response.json();
+        if (!response.ok) {
+            throw new Error(data.error || 'Failed to update fact memory');
+        }
+
+        await loadFactMemory();
+        updateFactMemoryStatus('Fact memory updated.', 'success');
+        if (typeof showToastMessage === 'function') {
+            showToastMessage('Fact memory updated successfully', 'success');
+        }
+    } catch (error) {
+        console.error('Error updating fact memory:', error);
+        updateFactMemoryStatus(error.message || 'Failed to update fact memory.', 'danger');
+    } finally {
+        if (saveButton) {
+            saveButton.disabled = false;
+            saveButton.innerHTML = originalButtonHtml;
+        }
+    }
+}
+
+function requestFactMemoryDelete(factId) {
+    pendingFactMemoryDeleteId = factId;
+    const modalElement = document.getElementById('factMemoryDeleteModal');
+    if (!modalElement || typeof bootstrap === 'undefined') {
+        return;
+    }
+
+    if (!factMemoryDeleteModalInstance) {
+        factMemoryDeleteModalInstance = new bootstrap.Modal(modalElement);
+    }
+
+    factMemoryDeleteModalInstance.show();
+}
+
+async function confirmDeleteFactMemory() {
+    if (!pendingFactMemoryDeleteId) {
+        return;
+    }
+
+    const deleteButton = document.getElementById('confirm-delete-fact-memory-btn');
+    const originalButtonHtml = deleteButton ? deleteButton.innerHTML : '';
+    if (deleteButton) {
+        deleteButton.disabled = true;
+        deleteButton.innerHTML = '<i class="bi bi-hourglass-split me-1"></i>Deleting...';
+    }
+
+    updateFactMemoryStatus('Deleting fact memory...', 'info');
+
+    try {
+        const response = await fetch(`/api/profile/fact-memory/${encodeURIComponent(pendingFactMemoryDeleteId)}`, {
+            method: 'DELETE'
+        });
+        const data = await response.json();
+        if (!response.ok) {
+            throw new Error(data.error || 'Failed to delete fact memory');
+        }
+
+        if (factMemoryDeleteModalInstance) {
+            factMemoryDeleteModalInstance.hide();
+        }
+        pendingFactMemoryDeleteId = null;
+        await loadFactMemory();
+        updateFactMemoryStatus('Fact memory deleted.', 'success');
+        if (typeof showToastMessage === 'function') {
+            showToastMessage('Fact memory deleted successfully', 'success');
+        }
+    } catch (error) {
+        console.error('Error deleting fact memory:', error);
+        updateFactMemoryStatus(error.message || 'Failed to delete fact memory.', 'danger');
+    } finally {
+        if (deleteButton) {
+            deleteButton.disabled = false;
+            deleteButton.innerHTML = originalButtonHtml;
+        }
+    }
+}
+
+function loadTutorialPreferences() {
+    const toggle = document.getElementById('show-tutorial-buttons-toggle');
+    if (!toggle) {
+        return;
+    }
+
+    fetch('/api/user/settings')
+        .then(response => response.json())
+        .then(data => {
+            toggle.checked = data.settings?.showTutorialButtons !== false;
+            updateTutorialPreferenceStatus('Tutorial launchers are shown by default until you change this setting.');
+        })
+        .catch(error => {
+            console.error('Error loading tutorial preferences:', error);
+            updateTutorialPreferenceStatus('Unable to refresh tutorial preference from the server. The current page state is still shown.', 'danger');
+        });
+}
+
+function saveTutorialPreferences() {
+    const toggle = document.getElementById('show-tutorial-buttons-toggle');
+    const button = document.getElementById('save-tutorial-preferences-btn');
+    if (!toggle || !button) {
+        return;
+    }
+
+    const originalHtml = button.innerHTML;
+    button.disabled = true;
+    button.innerHTML = '<i class="bi bi-hourglass-split me-1"></i>Saving...';
+    updateTutorialPreferenceStatus('Saving your tutorial preference...', 'info');
+
+    fetch('/api/user/settings', {
+        method: 'POST',
+        headers: {
+            'Content-Type': 'application/json'
+        },
+        body: JSON.stringify({
+            settings: {
+                showTutorialButtons: toggle.checked
+            }
+        })
+    })
+        .then(response => {
+            if (!response.ok) {
+                throw new Error('Failed to save tutorial preference');
+            }
+
+            return response.json();
+        })
+        .then(() => {
+            const message = toggle.checked
+                ? 'Tutorial buttons will stay visible on Chat and Personal Workspace.'
+                : 'Tutorial buttons are now hidden for your account. You can turn them back on here any time.';
+
+            updateTutorialPreferenceStatus(message, 'success');
+            showToastMessage('Tutorial preference saved successfully', 'success');
+        })
+        .catch(error => {
+            console.error('Error saving tutorial preference:', error);
+            updateTutorialPreferenceStatus('Failed to save tutorial preference. Please try again.', 'danger');
+        })
+        .finally(() => {
+            button.disabled = false;
+            button.innerHTML = originalHtml;
+        });
+}
+
 function loadActivityChartData() {
     // Fetch activity trends data
     fetch('/api/user/activity-trends?days=30')
diff --git a/docs/explanation/fixes/v0.241.002/LEGACY_DOC_OLE_EXTRACTION_FIX.md b/docs/explanation/fixes/v0.241.002/LEGACY_DOC_OLE_EXTRACTION_FIX.md
new file mode 100644
index 00000000..85517bcc
--- /dev/null
+++ b/docs/explanation/fixes/v0.241.002/LEGACY_DOC_OLE_EXTRACTION_FIX.md
@@ -0,0 +1,51 @@
+# Legacy DOC OLE Extraction Fix
+
+Fixed/Implemented in version: **0.241.002**
+
+## Issue Description
+
+Uploading Word 97-2003 `.doc` files failed during text extraction because the code treated `.doc` and `.docm` as the same format and sent both through `docx2txt`.
+
+Binary `.doc` files are OLE compound documents, not OOXML zip archives, so `docx2txt` raised `There is no item named 'word/document.xml' in the archive`.
+
+## Root Cause Analysis
+
+- `application/single_app/functions_documents.py` used the same extraction path for `.doc` and `.docm`.
+- `application/single_app/route_frontend_chats.py` mirrored the same assumption for inline chat uploads.
+- The shared content helpers had metadata extraction for `.docx`, but no legacy `.doc` parser for OLE `WordDocument` and table streams.
+
+## Technical Details
+
+### Files Modified
+
+- `application/single_app/functions_content.py`
+- `application/single_app/functions_documents.py`
+- `application/single_app/route_frontend_chats.py`
+- `application/single_app/requirements.txt`
+- `application/single_app/config.py`
+- `functional_tests/test_legacy_doc_ole_extraction.py`
+
+### Code Changes Summary
+
+- Added an `olefile`-based legacy `.doc` extraction path that reads `WordDocument` and table streams and reconstructs text from Word piece tables.
+- Kept `.docm` on the existing OOXML `docx2txt` path.
+- Added shared Word text and metadata dispatch helpers so document processing and chat uploads use the same format-aware behavior.
+- Pinned `olefile==0.47` in the main application requirements.
+- Bumped the application version to `0.241.002`.
+
+### Testing Approach
+
+- Added `functional_tests/test_legacy_doc_ole_extraction.py` to validate compressed and UTF-16 piece-table decoding plus `.doc` versus `.docm` dispatch behavior without requiring the full Azure/Cosmos runtime.
+
+## Validation
+
+### Before
+
+- Real Word 97-2003 `.doc` uploads failed with a missing `word/document.xml` archive entry.
+- The same incorrect `.doc` assumption existed in both document ingestion and chat upload extraction.
+
+### After
+
+- Legacy `.doc` files are parsed through an OLE-aware extraction path.
+- `.docm` files continue to use the OOXML extraction path they already required.
+- The regression test suite now covers the binary Word piece-table parser and extension dispatch split.
\ No newline at end of file
diff --git a/docs/explanation/fixes/v0.241.003/CHAT_HISTORY_GROUNDED_FOLLOW_UP_FIX.md b/docs/explanation/fixes/v0.241.003/CHAT_HISTORY_GROUNDED_FOLLOW_UP_FIX.md
new file mode 100644
index 00000000..0be381fc
--- /dev/null
+++ b/docs/explanation/fixes/v0.241.003/CHAT_HISTORY_GROUNDED_FOLLOW_UP_FIX.md
@@ -0,0 +1,52 @@
+# Chat History Grounded Follow-Up Fix (v0.241.003)
+
+Fixed/Implemented in version: **0.241.003**
+
+## Header Information
+
+### Issue Description
+
+The grounded follow-up fallback correctly kept later turns bounded to previously grounded documents, but the final no-search grounding prompt also ran for brand-new conversations and never-grounded conversations.
+
+That caused general model-only questions to fail closed with messages such as "I do not have enough grounded information from the prior conversation sources" even when no workspace knowledge had been used in the conversation.
+
+### Root Cause Analysis
+
+`application/single_app/route_backend_chats.py` injected the no-search history grounding system message whenever workspace search was disabled for the current turn.
+
+The prompt insertion did not check whether `last_grounded_document_refs` actually existed for the conversation, so the bounded grounded-follow-up rule leaked into ordinary model-only turns.
+
+### Version Implemented
+
+`0.241.003`
+
+## Technical Details
+
+### Files Modified
+
+- `application/single_app/route_backend_chats.py`
+- `application/single_app/config.py`
+- `functional_tests/test_chat_history_grounded_follow_up_fix.py`
+
+### Code Changes Summary
+
+- Added a dedicated guard in `application/single_app/route_backend_chats.py` so the no-search grounding prompt is only inserted when the conversation already has `last_grounded_document_refs`.
+- Kept the existing `last_grounded_document_refs` persistence contract from `application/single_app/functions_conversation_metadata.py` as the anchor for bounded grounded follow-up behavior.
+- Preserved the existing bounded fallback behavior that reuses prior citations and searches only previously grounded documents when history alone is insufficient.
+- Left explicit workspace search behavior unchanged when the user turns search back on.
+- Updated the grounded follow-up functional test to verify the narrower prompt-insertion contract in both standard and streaming chat paths.
+- Bumped the application version to `0.241.003`.
+
+## Validation
+
+### Testing Approach
+
+- Updated `functional_tests/test_chat_history_grounded_follow_up_fix.py`.
+- Verified the grounding prompt helper returns `False` for new or never-grounded conversations and `True` only when prior grounded refs exist.
+- Verified both standard and streaming chat paths still keep the bounded follow-up fallback logic while applying the final grounding prompt only behind the new guard.
+
+### Impact Analysis
+
+- New conversations without prior grounded document refs now answer normally from model knowledge when workspace search is off.
+- Conversations that already used workspace grounding still retain the bounded follow-up behavior and can reuse past citations to search only the previously grounded documents.
+- The fail-closed "select a workspace or document" behavior remains available, but only for the grounded follow-up scenarios it was designed to protect.
\ No newline at end of file
diff --git a/docs/explanation/fixes/v0.241.004/LEGACY_DOC_PROCESSING_PARITY_FIX.md b/docs/explanation/fixes/v0.241.004/LEGACY_DOC_PROCESSING_PARITY_FIX.md
new file mode 100644
index 00000000..051086b4
--- /dev/null
+++ b/docs/explanation/fixes/v0.241.004/LEGACY_DOC_PROCESSING_PARITY_FIX.md
@@ -0,0 +1,54 @@
+# Legacy DOC Processing Parity Fix
+
+Fixed/Implemented in version: **0.241.004**
+
+## Issue Description
+
+Legacy Word 97-2003 `.doc` files could be uploaded and chunked after the OLE extraction fix, but they still did not follow the same higher-level processing workflow as other Word documents.
+
+That left two visible gaps:
+
+- metadata extraction did not populate the document record consistently
+- enhanced citations did not report the same processing state as the shared document pipeline
+
+## Root Cause Analysis
+
+- `application/single_app/functions_documents.py` still routed `.doc` files through `process_doc(...)`, which is a direct text-extraction path.
+- The richer `process_di_document(...)` path is where the app performs initial metadata updates, sets the `enhanced_citations` state, and runs final metadata extraction.
+- Legacy OLE metadata values can be stored as byte strings, so title/author values also needed normalization before being written back to the document record.
+
+## Technical Details
+
+### Files Modified
+
+- `application/single_app/functions_content.py`
+- `application/single_app/functions_documents.py`
+- `application/single_app/config.py`
+- `functional_tests/test_legacy_doc_ole_extraction.py`
+
+### Code Changes Summary
+
+- Routed `.doc` files through the shared document-processing pipeline instead of the direct `process_doc(...)` branch.
+- Added a legacy `.doc` branch inside the shared pipeline that uses `extract_word_text(..., '.doc')` instead of Azure Document Intelligence.
+- Normalized OLE metadata byte values before applying title/author updates.
+- Updated document creation logging to fall back to the stored `authors` list when a singular `author` field is not present.
+- Bumped the application version to `0.241.004`.
+
+### Testing Approach
+
+- Extended `functional_tests/test_legacy_doc_ole_extraction.py` to cover metadata normalization and verify that `.doc` now routes through the shared document-processing workflow while `.docm` stays on the direct OOXML path.
+
+## Validation
+
+### Before
+
+- `.doc` files extracted text, but skipped the shared processing workflow used by richer document types.
+- Metadata fields could remain blank because the direct `.doc` path did not run the same metadata lifecycle.
+- Enhanced citations did not surface the same state transitions as the shared Word document path.
+
+### After
+
+- `.doc` files use the shared document-processing workflow while still relying on OLE extraction for their content.
+- Initial metadata values from OLE are normalized before they are saved.
+- Final metadata extraction can run through the same workflow used by other document types.
+- Document creation logging now records author information from the saved `authors` list when available.
\ No newline at end of file
diff --git a/docs/explanation/fixes/v0.241.005/LEGACY_PPT_OLE_SUPPORT_FIX.md b/docs/explanation/fixes/v0.241.005/LEGACY_PPT_OLE_SUPPORT_FIX.md
new file mode 100644
index 00000000..6129cf93
--- /dev/null
+++ b/docs/explanation/fixes/v0.241.005/LEGACY_PPT_OLE_SUPPORT_FIX.md
@@ -0,0 +1,55 @@
+# Legacy PPT OLE Support Fix
+
+Fixed/Implemented in version: **0.241.005**
+
+## Issue Description
+
+Legacy PowerPoint `.ppt` files were allowed by the upload pipeline, but they did not have a native OLE extraction path.
+
+That caused two practical gaps compared with the richer PowerPoint workflow:
+
+- binary `.ppt` content still relied on the same Azure Document Intelligence path used for `.pptx`
+- PowerPoint metadata was not populated from either `.ppt` or `.pptx` files during the initial metadata update
+
+## Root Cause Analysis
+
+- `application/single_app/functions_documents.py` treated `.ppt` and `.pptx` as the same presentation type during processing.
+- The shared content helpers had no PowerPoint-specific metadata helpers.
+- Legacy `.ppt` files store slide text and summary information inside OLE streams rather than OOXML parts.
+
+## Technical Details
+
+### Files Modified
+
+- `application/single_app/functions_content.py`
+- `application/single_app/functions_documents.py`
+- `application/single_app/config.py`
+- `functional_tests/test_legacy_ppt_ole_extraction.py`
+
+### Code Changes Summary
+
+- Added `.pptx` metadata extraction from `docProps/core.xml`.
+- Added legacy `.ppt` metadata extraction from OLE `SummaryInformation`.
+- Added legacy `.ppt` slide extraction from the `PowerPoint Document` stream by walking slide containers and text atom records.
+- Routed `.ppt` through the shared document-processing workflow so enhanced-citation state updates and final metadata extraction still behave like `.pptx` uploads.
+- Bumped the application version to `0.241.005`.
+
+### Testing Approach
+
+- Added `functional_tests/test_legacy_ppt_ole_extraction.py`.
+- Validated `.pptx` metadata parsing with a synthetic OOXML `core.xml` payload.
+- Validated legacy `.ppt` metadata and slide extraction against `artifacts/UCCSChapter2_Spring2012.ppt`.
+- Verified the shared upload pipeline now calls the legacy `.ppt` extractor.
+
+## Validation
+
+### Before
+
+- `.ppt` uploads did not have a PowerPoint-specific OLE extraction path.
+- Initial metadata updates did not populate PowerPoint title, author, subject, or keywords.
+
+### After
+
+- Legacy `.ppt` files extract slide text directly from OLE PowerPoint records.
+- `.ppt` and `.pptx` both populate initial presentation metadata when available.
+- `.ppt` stays on the shared upload workflow, so enhanced citations and final metadata extraction continue to work through the same higher-level path as `.pptx`.
\ No newline at end of file
diff --git a/docs/explanation/release_notes.md b/docs/explanation/release_notes.md
index 9fb7967b..4acc038d 100644
--- a/docs/explanation/release_notes.md
+++ b/docs/explanation/release_notes.md
@@ -271,6 +271,13 @@ For feature-focused and fix-focused drill-downs by version, see [Features by Ver
     *   Updated `deep_merge_dicts()` to return a boolean `changed` flag and wired `get_settings()` to call `upsert_item()` when `settings_changed` is `True`, so missing default keys correctly trigger persistence back to Cosmos DB.
     *   Added a functional regression test to validate the merge detection and persistence markers.
     *   (Ref: `application/single_app/functions_settings.py`, `application/single_app/config.py`, `functional_tests/test_settings_deep_merge_persistence_fix.py`)
+
+*   **Legacy Office Binary Upload Support**
+    *   Added native OLE-based support for older Word `.doc` and PowerPoint `.ppt` files instead of relying on OOXML-only assumptions during processing.
+    *   Legacy `.doc` uploads now extract available metadata and follow the same shared document-processing workflow used for richer Office files, so enhanced citations and final metadata extraction stay consistent when those features are enabled.
+    *   Legacy `.ppt` uploads now extract slide text and available summary metadata from the OLE presentation streams while keeping the same enhanced-citation and final-metadata workflow used by `.pptx` uploads.
+    *   `.pptx` uploads now also populate presentation metadata such as title, author, subject, and keywords during the initial metadata update when metadata extraction is enabled.
+    *   (Ref: `functions_content.py`, `functions_documents.py`, `test_legacy_doc_ole_extraction.py`, `test_legacy_ppt_ole_extraction.py`, legacy Office OLE support and metadata parity)
     
 *   **Pillow PSD Upload Hardening**
     *   Updated the application to use `pillow==12.1.1`, moving the app off the vulnerable Pillow range for specially crafted PSD image parsing.
diff --git a/functional_tests/test_chat_history_grounded_follow_up_fix.py b/functional_tests/test_chat_history_grounded_follow_up_fix.py
index dc5b09a2..2ef9290a 100644
--- a/functional_tests/test_chat_history_grounded_follow_up_fix.py
+++ b/functional_tests/test_chat_history_grounded_follow_up_fix.py
@@ -1,12 +1,13 @@
 # test_chat_history_grounded_follow_up_fix.py
 """
 Functional test for grounded follow-up chat fallback.
-Version: 0.240.055
-Implemented in: 0.240.054; Updated in: 0.240.055
+Version: 0.241.003
+Implemented in: 0.240.054; Updated in: 0.241.003
 
 This test ensures follow-up turns with workspace search disabled can reuse
 prior grounded document refs, derive bounded fallback search parameters, and
-preserve the no-search grounding contract in both chat execution paths.
+preserve the no-search grounding contract only for conversations that already
+have grounded document history.
 """
 
 import ast
@@ -22,6 +23,7 @@
     'docs',
     'explanation',
     'fixes',
+    'v0.241.003',
     'CHAT_HISTORY_GROUNDED_FOLLOW_UP_FIX.md',
 )
 ROUTE_TARGET_FUNCTIONS = {
@@ -29,6 +31,7 @@
     'build_prior_grounded_document_search_parameters',
     'build_history_only_assessment_messages',
     'build_history_grounding_system_message',
+    'should_apply_history_grounding_message',
 }
 METADATA_TARGET_FUNCTIONS = {
     '_extract_document_id_from_search_result',
@@ -266,6 +269,7 @@ def test_history_only_prompt_contract_is_explicit():
     namespace, _ = load_route_helpers()
     build_assessment_messages = namespace['build_history_only_assessment_messages']
     build_grounding_message = namespace['build_history_grounding_system_message']
+    should_apply_grounding_message = namespace['should_apply_history_grounding_message']
 
     assessment_messages = build_assessment_messages(
         {
@@ -289,6 +293,11 @@ def test_history_only_prompt_contract_is_explicit():
     assert 'Workspace search is disabled for this turn.' in grounding_message['content']
     assert 'ask the user to select a workspace or document' in grounding_message['content']
 
+    assert should_apply_grounding_message(False, []) is False
+    assert should_apply_grounding_message(False, None) is False
+    assert should_apply_grounding_message(True, [{'document_id': 'doc-1'}]) is False
+    assert should_apply_grounding_message(False, [{'document_id': 'doc-1'}]) is True
+
     print('✅ History-only prompt contract passed')
     return True
 
@@ -306,6 +315,8 @@ def test_route_and_metadata_wiring_cover_both_chat_paths():
     assert route_source.count('Conversation context alone was insufficient; searching previously grounded documents') == 2
     assert route_source.count('No prior grounded documents were available; using conversation history only') == 2
     assert route_source.count("'history_grounded_fallback'") == 2
+    assert route_source.count('if not original_hybrid_search_enabled:') == 2
+    assert route_source.count('if should_apply_history_grounding_message(') == 2
     assert route_source.count('history_grounding_message = build_history_grounding_system_message()') == 2
 
     print('✅ Grounded follow-up wiring passed')
@@ -318,10 +329,11 @@ def test_version_and_fix_documentation_alignment():
 
     fix_doc_content = read_file_text(FIX_DOC)
 
-    assert read_config_version() == '0.240.055'
-    assert 'Fixed/Implemented in version: **0.240.055**' in fix_doc_content
+    assert read_config_version() == '0.241.003'
+    assert 'Fixed/Implemented in version: **0.241.003**' in fix_doc_content
     assert 'last_grounded_document_refs' in fix_doc_content
     assert 'previously grounded documents' in fix_doc_content.lower()
+    assert 'new conversations without prior grounded document refs now answer normally' in fix_doc_content.lower()
     assert 'application/single_app/route_backend_chats.py' in fix_doc_content
     assert 'application/single_app/functions_conversation_metadata.py' in fix_doc_content
 
diff --git a/functional_tests/test_legacy_doc_ole_extraction.py b/functional_tests/test_legacy_doc_ole_extraction.py
new file mode 100644
index 00000000..479a52d4
--- /dev/null
+++ b/functional_tests/test_legacy_doc_ole_extraction.py
@@ -0,0 +1,276 @@
+# test_legacy_doc_ole_extraction.py
+"""
+Functional test for legacy .doc OLE extraction.
+Version: 0.241.004
+Implemented in: 0.241.004
+
+This test ensures Word 97-2003 .doc files are parsed through the OLE piece-table
+path and routed through the shared document-processing workflow instead of the
+OOXML archive path that expects word/document.xml.
+"""
+
+import importlib.util
+import os
+import re
+import struct
+import sys
+import types
+from unittest.mock import patch
+
+
+FUNCTIONS_CONTENT_PATH = os.path.join(
+    os.path.dirname(os.path.abspath(__file__)),
+    "..",
+    "application",
+    "single_app",
+    "functions_content.py",
+)
+
+FUNCTIONS_DOCUMENTS_PATH = os.path.join(
+    os.path.dirname(os.path.abspath(__file__)),
+    "..",
+    "application",
+    "single_app",
+    "functions_documents.py",
+)
+
+
+def _load_functions_content_module():
+    """Load functions_content with lightweight stubs for config-heavy imports."""
+    module_name = "functions_content_legacy_doc_test"
+    spec = importlib.util.spec_from_file_location(module_name, FUNCTIONS_CONTENT_PATH)
+    module = importlib.util.module_from_spec(spec)
+
+    config_stub = types.ModuleType("config")
+    config_stub.os = os
+    config_stub.re = re
+    config_stub.CLIENTS = {}
+    config_stub.AZURE_ENVIRONMENT = "public"
+    config_stub.WORD_CHUNK_SIZE = 400
+
+    debug_stub = types.ModuleType("functions_debug")
+    debug_stub.debug_print = lambda *args, **kwargs: None
+
+    settings_stub = types.ModuleType("functions_settings")
+    logging_stub = types.ModuleType("functions_logging")
+
+    original_modules = {
+        "config": sys.modules.get("config"),
+        "functions_debug": sys.modules.get("functions_debug"),
+        "functions_settings": sys.modules.get("functions_settings"),
+        "functions_logging": sys.modules.get("functions_logging"),
+    }
+
+    sys.modules["config"] = config_stub
+    sys.modules["functions_debug"] = debug_stub
+    sys.modules["functions_settings"] = settings_stub
+    sys.modules["functions_logging"] = logging_stub
+
+    try:
+        spec.loader.exec_module(module)
+        return module
+    finally:
+        for module_key, original_module in original_modules.items():
+            if original_module is None:
+                sys.modules.pop(module_key, None)
+            else:
+                sys.modules[module_key] = original_module
+
+
+FUNCTIONS_CONTENT = _load_functions_content_module()
+
+
+def _build_piece_table_stream(text, compressed=True):
+    """Build a minimal PlcPcd stream for a single legacy Word text piece."""
+    if compressed:
+        word_stream = text.encode("cp1252")
+        fc_value = 0x40000000
+    else:
+        word_stream = text.encode("utf-16le")
+        fc_value = 0
+
+    piece_descriptor = b"\x00\x00" + struct.pack("<I", fc_value) + b"\x00\x00"
+    piece_table = struct.pack("<II", 0, len(text)) + piece_descriptor
+    table_stream = b"\x01\x00\x00" + b"\x02" + struct.pack("<I", len(piece_table)) + piece_table
+    return word_stream, table_stream
+
+
+def test_compressed_legacy_doc_piece_table():
+    """Verify ANSI-compressed legacy Word pieces decode correctly."""
+    print("🔍 Testing compressed legacy .doc piece-table extraction...")
+
+    try:
+        expected_text = "Hello legacy doc"
+        word_stream, table_stream = _build_piece_table_stream(expected_text, compressed=True)
+        extracted_text = FUNCTIONS_CONTENT._extract_legacy_doc_text_from_table_stream(word_stream, table_stream)
+
+        if extracted_text != expected_text:
+            print(f"❌ Expected '{expected_text}' but got '{extracted_text}'")
+            return False
+
+        print("✅ Compressed legacy piece-table extraction works")
+        return True
+    except Exception as exc:
+        print(f"❌ Test failed: {exc}")
+        traceback_module = __import__("traceback")
+        traceback_module.print_exc()
+        return False
+
+
+def test_unicode_legacy_doc_piece_table():
+    """Verify UTF-16 legacy Word pieces decode correctly."""
+    print("🔍 Testing Unicode legacy .doc piece-table extraction...")
+
+    try:
+        expected_text = "Unicode legacy"
+        word_stream, table_stream = _build_piece_table_stream(expected_text, compressed=False)
+        extracted_text = FUNCTIONS_CONTENT._extract_legacy_doc_text_from_table_stream(word_stream, table_stream)
+
+        if extracted_text != expected_text:
+            print(f"❌ Expected '{expected_text}' but got '{extracted_text}'")
+            return False
+
+        print("✅ Unicode legacy piece-table extraction works")
+        return True
+    except Exception as exc:
+        print(f"❌ Test failed: {exc}")
+        traceback_module = __import__("traceback")
+        traceback_module.print_exc()
+        return False
+
+
+def test_word_dispatch_uses_ole_for_doc_only():
+    """Verify .doc files use olefile while .docm files stay on the OOXML path."""
+    print("🔍 Testing .doc/.docm dispatch behavior...")
+
+    try:
+        with patch.object(FUNCTIONS_CONTENT.olefile, "isOleFile", return_value=True), patch.object(
+            FUNCTIONS_CONTENT,
+            "extract_legacy_doc_text",
+            return_value="legacy text",
+        ) as legacy_extract, patch.object(
+            FUNCTIONS_CONTENT,
+            "extract_docx_text",
+            return_value="ooxml text",
+        ) as ooxml_extract:
+            doc_result = FUNCTIONS_CONTENT.extract_word_text("sample.doc", ".doc")
+            docm_result = FUNCTIONS_CONTENT.extract_word_text("sample.docm", ".docm")
+
+        if doc_result != "legacy text":
+            print(f"❌ Expected .doc dispatch to return legacy text, got '{doc_result}'")
+            return False
+
+        if docm_result != "ooxml text":
+            print(f"❌ Expected .docm dispatch to return ooxml text, got '{docm_result}'")
+            return False
+
+        if legacy_extract.call_count != 1:
+            print(f"❌ Expected one legacy extractor call, got {legacy_extract.call_count}")
+            return False
+
+        if ooxml_extract.call_count != 1:
+            print(f"❌ Expected one OOXML extractor call, got {ooxml_extract.call_count}")
+            return False
+
+        print("✅ Word dispatch separates .doc and .docm correctly")
+        return True
+    except Exception as exc:
+        print(f"❌ Test failed: {exc}")
+        traceback_module = __import__("traceback")
+        traceback_module.print_exc()
+        return False
+
+
+def test_field_code_cleanup_keeps_display_text():
+    """Verify Word field instructions are removed while display text stays visible."""
+    print("🔍 Testing legacy field-code cleanup...")
+
+    try:
+        raw_text = 'See \x13 HYPERLINK "https://example.com" \x14example link\x15 now'
+        cleaned_text = FUNCTIONS_CONTENT._normalize_legacy_doc_text(raw_text)
+
+        if cleaned_text != 'See example link now':
+            print(f"❌ Expected cleaned field text, got '{cleaned_text}'")
+            return False
+
+        print("✅ Field-code cleanup keeps visible link text only")
+        return True
+    except Exception as exc:
+        print(f"❌ Test failed: {exc}")
+        traceback_module = __import__("traceback")
+        traceback_module.print_exc()
+        return False
+
+
+def test_legacy_doc_metadata_normalization():
+    """Verify OLE metadata byte values are normalized into plain strings."""
+    print("🔍 Testing legacy .doc metadata normalization...")
+
+    try:
+        normalized_author = FUNCTIONS_CONTENT._normalize_legacy_doc_metadata_value(b"Paul Lizer\x00")
+        normalized_blank = FUNCTIONS_CONTENT._normalize_legacy_doc_metadata_value(b"\x00")
+
+        if normalized_author != "Paul Lizer":
+            print(f"❌ Expected decoded author metadata, got '{normalized_author}'")
+            return False
+
+        if normalized_blank != "":
+            print(f"❌ Expected blank metadata to normalize to empty string, got '{normalized_blank}'")
+            return False
+
+        print("✅ Legacy metadata values normalize correctly")
+        return True
+    except Exception as exc:
+        print(f"❌ Test failed: {exc}")
+        traceback_module = __import__("traceback")
+        traceback_module.print_exc()
+        return False
+
+
+def test_legacy_doc_dispatch_uses_shared_document_pipeline():
+    """Verify .doc files use the shared document-processing path while .docm stays direct."""
+    print("🔍 Testing legacy .doc processing dispatch...")
+
+    try:
+        with open(FUNCTIONS_DOCUMENTS_PATH, "r", encoding="utf-8") as source_file:
+            source_text = source_file.read()
+
+        required_snippets = [
+            "is_legacy_doc = file_ext == '.doc'",
+            "extract_word_text(chunk_path, file_ext)",
+            "elif file_ext == '.docm':",
+            "elif file_ext in di_supported_extensions or file_ext == '.doc':",
+        ]
+
+        missing_snippets = [snippet for snippet in required_snippets if snippet not in source_text]
+        if missing_snippets:
+            print(f"❌ Missing expected shared-pipeline snippets: {missing_snippets}")
+            return False
+
+        print("✅ Legacy .doc files use the shared document-processing flow")
+        return True
+    except Exception as exc:
+        print(f"❌ Test failed: {exc}")
+        traceback_module = __import__("traceback")
+        traceback_module.print_exc()
+        return False
+
+
+if __name__ == "__main__":
+    tests = [
+        test_compressed_legacy_doc_piece_table,
+        test_unicode_legacy_doc_piece_table,
+        test_word_dispatch_uses_ole_for_doc_only,
+        test_field_code_cleanup_keeps_display_text,
+        test_legacy_doc_metadata_normalization,
+        test_legacy_doc_dispatch_uses_shared_document_pipeline,
+    ]
+
+    results = []
+    for test in tests:
+        print(f"\n🧪 Running {test.__name__}...")
+        results.append(test())
+
+    success = all(results)
+    print(f"\n📊 Results: {sum(results)}/{len(results)} tests passed")
+    sys.exit(0 if success else 1)
\ No newline at end of file
diff --git a/functional_tests/test_legacy_ppt_ole_extraction.py b/functional_tests/test_legacy_ppt_ole_extraction.py
new file mode 100644
index 00000000..f8e90e1d
--- /dev/null
+++ b/functional_tests/test_legacy_ppt_ole_extraction.py
@@ -0,0 +1,251 @@
+# test_legacy_ppt_ole_extraction.py
+"""
+Functional test for legacy .ppt OLE extraction.
+Version: 0.241.005
+Implemented in: 0.241.005
+
+This test ensures legacy PowerPoint .ppt files use OLE metadata and slide-text
+extraction while remaining on the shared document-processing workflow for
+enhanced citations and final metadata extraction.
+"""
+
+import importlib.util
+import os
+import re
+import sys
+import tempfile
+import types
+import zipfile
+
+
+FUNCTIONS_CONTENT_PATH = os.path.join(
+    os.path.dirname(os.path.abspath(__file__)),
+    "..",
+    "application",
+    "single_app",
+    "functions_content.py",
+)
+
+FUNCTIONS_DOCUMENTS_PATH = os.path.join(
+    os.path.dirname(os.path.abspath(__file__)),
+    "..",
+    "application",
+    "single_app",
+    "functions_documents.py",
+)
+
+SAMPLE_PPT_PATH = os.path.join(
+    os.path.dirname(os.path.abspath(__file__)),
+    "..",
+    "artifacts",
+    "UCCSChapter2_Spring2012.ppt",
+)
+
+
+def _load_functions_content_module():
+    """Load functions_content with lightweight stubs for config-heavy imports."""
+    module_name = "functions_content_legacy_ppt_test"
+    spec = importlib.util.spec_from_file_location(module_name, FUNCTIONS_CONTENT_PATH)
+    module = importlib.util.module_from_spec(spec)
+
+    config_stub = types.ModuleType("config")
+    config_stub.os = os
+    config_stub.re = re
+    config_stub.CLIENTS = {}
+    config_stub.AZURE_ENVIRONMENT = "public"
+    config_stub.WORD_CHUNK_SIZE = 400
+
+    debug_stub = types.ModuleType("functions_debug")
+    debug_stub.debug_print = lambda *args, **kwargs: None
+
+    settings_stub = types.ModuleType("functions_settings")
+    logging_stub = types.ModuleType("functions_logging")
+
+    original_modules = {
+        "config": sys.modules.get("config"),
+        "functions_debug": sys.modules.get("functions_debug"),
+        "functions_settings": sys.modules.get("functions_settings"),
+        "functions_logging": sys.modules.get("functions_logging"),
+    }
+
+    sys.modules["config"] = config_stub
+    sys.modules["functions_debug"] = debug_stub
+    sys.modules["functions_settings"] = settings_stub
+    sys.modules["functions_logging"] = logging_stub
+
+    try:
+        spec.loader.exec_module(module)
+        return module
+    finally:
+        for module_key, original_module in original_modules.items():
+            if original_module is None:
+                sys.modules.pop(module_key, None)
+            else:
+                sys.modules[module_key] = original_module
+
+
+FUNCTIONS_CONTENT = _load_functions_content_module()
+
+
+def test_pptx_metadata_extraction_from_core_properties():
+    """Verify OOXML PowerPoint metadata is parsed from core.xml."""
+    print("🔍 Testing .pptx metadata extraction...")
+
+    temp_path = None
+    try:
+        core_xml = """<?xml version='1.0' encoding='UTF-8'?>
+<cp:coreProperties xmlns:cp='http://schemas.openxmlformats.org/package/2006/metadata/core-properties'
+    xmlns:dc='http://purl.org/dc/elements/1.1/'>
+    <dc:title>Quarterly Review</dc:title>
+    <dc:creator>Jane Doe</dc:creator>
+    <dc:subject>Finance Update</dc:subject>
+    <cp:keywords>finance; accounting</cp:keywords>
+</cp:coreProperties>
+"""
+
+        with tempfile.NamedTemporaryFile(suffix=".pptx", delete=False) as temp_file:
+            temp_path = temp_file.name
+
+        with zipfile.ZipFile(temp_path, "w") as archive:
+            archive.writestr("docProps/core.xml", core_xml)
+
+        title, author, subject, keywords = FUNCTIONS_CONTENT.extract_presentation_metadata(temp_path, ".pptx")
+
+        if title != "Quarterly Review":
+            print(f"❌ Expected PPTX title metadata, got '{title}'")
+            return False
+
+        if author != "Jane Doe":
+            print(f"❌ Expected PPTX author metadata, got '{author}'")
+            return False
+
+        if subject != "Finance Update":
+            print(f"❌ Expected PPTX subject metadata, got '{subject}'")
+            return False
+
+        if keywords != ["finance", "accounting"]:
+            print(f"❌ Expected PPTX keyword metadata, got '{keywords}'")
+            return False
+
+        print("✅ PPTX metadata extraction works")
+        return True
+    except Exception as exc:
+        print(f"❌ Test failed: {exc}")
+        traceback_module = __import__("traceback")
+        traceback_module.print_exc()
+        return False
+    finally:
+        if temp_path and os.path.exists(temp_path):
+            os.remove(temp_path)
+
+
+def test_legacy_ppt_metadata_from_sample():
+    """Verify legacy PowerPoint metadata comes from OLE summary information."""
+    print("🔍 Testing legacy .ppt metadata extraction...")
+
+    try:
+        title, author, subject, keywords = FUNCTIONS_CONTENT.extract_presentation_metadata(SAMPLE_PPT_PATH, ".ppt")
+
+        if title != "Chapter 2 Transaction Analysis":
+            print(f"❌ Expected PPT title metadata, got '{title}'")
+            return False
+
+        if author != "Cheryl L. Prachyl":
+            print(f"❌ Expected PPT author metadata, got '{author}'")
+            return False
+
+        if subject not in ("", None):
+            print(f"❌ Expected empty PPT subject metadata, got '{subject}'")
+            return False
+
+        if keywords not in ([], None):
+            print(f"❌ Expected empty PPT keyword metadata, got '{keywords}'")
+            return False
+
+        print("✅ Legacy .ppt metadata extraction works")
+        return True
+    except Exception as exc:
+        print(f"❌ Test failed: {exc}")
+        traceback_module = __import__("traceback")
+        traceback_module.print_exc()
+        return False
+
+
+def test_legacy_ppt_slide_extraction_from_sample():
+    """Verify slide text is extracted and grouped by slide number."""
+    print("🔍 Testing legacy .ppt slide extraction...")
+
+    try:
+        pages = FUNCTIONS_CONTENT.extract_legacy_ppt_pages(SAMPLE_PPT_PATH)
+        slide_map = {page["page_number"]: page.get("content", "") for page in pages}
+
+        if len(pages) < 21:
+            print(f"❌ Expected at least 21 slide entries, got {len(pages)}")
+            return False
+
+        slide_one_text = slide_map.get(1, "")
+        if "Chapter 2" not in slide_one_text or "Accounting for Business Transactions:" not in slide_one_text:
+            print(f"❌ Expected slide 1 title text, got '{slide_map.get(1, '')}'")
+            return False
+
+        if "Transactions" not in slide_map.get(2, ""):
+            print(f"❌ Expected slide 2 transaction text, got '{slide_map.get(2, '')}'")
+            return False
+
+        if "Do NOT proceed until you learn these rules!" not in slide_map.get(21, ""):
+            print(f"❌ Expected slide 21 warning text, got '{slide_map.get(21, '')}'")
+            return False
+
+        print("✅ Legacy .ppt slide extraction works")
+        return True
+    except Exception as exc:
+        print(f"❌ Test failed: {exc}")
+        traceback_module = __import__("traceback")
+        traceback_module.print_exc()
+        return False
+
+
+def test_legacy_ppt_uses_shared_document_pipeline():
+    """Verify the shared upload pipeline calls the legacy PPT extractor."""
+    print("🔍 Testing legacy .ppt shared processing pipeline...")
+
+    try:
+        with open(FUNCTIONS_DOCUMENTS_PATH, "r", encoding="utf-8") as source_file:
+            source_text = source_file.read()
+
+        required_snippets = [
+            "is_legacy_ppt = file_ext == '.ppt'",
+            "extract_presentation_metadata(temp_file_path, file_ext)",
+            "extract_legacy_ppt_pages(chunk_path)",
+        ]
+
+        missing_snippets = [snippet for snippet in required_snippets if snippet not in source_text]
+        if missing_snippets:
+            print(f"❌ Missing expected shared-pipeline snippets: {missing_snippets}")
+            return False
+
+        print("✅ Legacy .ppt files use the shared document-processing flow")
+        return True
+    except Exception as exc:
+        print(f"❌ Test failed: {exc}")
+        traceback_module = __import__("traceback")
+        traceback_module.print_exc()
+        return False
+
+
+if __name__ == "__main__":
+    tests = [
+        test_pptx_metadata_extraction_from_core_properties,
+        test_legacy_ppt_metadata_from_sample,
+        test_legacy_ppt_slide_extraction_from_sample,
+        test_legacy_ppt_uses_shared_document_pipeline,
+    ]
+
+    results = []
+    for test in tests:
+        print(f"\n🧪 Running {test.__name__}...")
+        results.append(test())
+
+    success = all(results)
+    print(f"\n📊 Results: {sum(results)}/{len(results)} tests passed")
+    sys.exit(0 if success else 1)
\ No newline at end of file