From 9037dba705b0d728a1b31e6a8d76c9d6b1bd7485 Mon Sep 17 00:00:00 2001
From: Jason McCartney <jmac@theroot.org>
Date: Sat, 20 Dec 2025 20:26:45 -0700
Subject: [PATCH 1/5] feat: add short-lived URL generation for converted files

- Implemented functionality to generate short-lived URLs for converted files during the upload process.
- Updated response structure to include `shortLivedUrl` for both original and converted files, allowing for flexible use cases.
- Enhanced error handling to log warnings if short-lived URL generation fails, ensuring fallback to regular URLs.
- Added comprehensive tests to verify the presence and correctness of `shortLivedUrl` in upload responses, ensuring robust functionality for file handling.
---
 .../cortex-file-handler/src/blobHandler.js    |  62 +++++++
 .../tests/shortLivedUrlConversion.test.js     |  93 ++++++++++
 lib/fileUtils.js                              | 168 ++++++++++++------
 .../system/entity/tools/sys_tool_editfile.js  |   9 +-
 .../system/entity/tools/sys_tool_readfile.js  |  83 ++++++---
 .../features/tools/fileCollection.test.js     | 154 ++++++++++++++++
 tests/unit/core/fileCollection.test.js        | 128 +++++++++++++
 7 files changed, 613 insertions(+), 84 deletions(-)

diff --git a/helper-apps/cortex-file-handler/src/blobHandler.js b/helper-apps/cortex-file-handler/src/blobHandler.js
index eeae97a4..5fa04fe3 100644
--- a/helper-apps/cortex-file-handler/src/blobHandler.js
+++ b/helper-apps/cortex-file-handler/src/blobHandler.js
@@ -598,11 +598,42 @@ function uploadBlob(
                         );
                     }
 
+                    // Generate shortLivedUrl for converted file
+                    let convertedShortLivedUrl = convertedSaveResult.url; // Fallback to regular URL
+                    try {
+                      const storageFactory = StorageFactory.getInstance();
+                      const primaryProvider = await storageFactory.getAzureProvider();
+                      if (primaryProvider.generateShortLivedSASToken && primaryProvider.extractBlobNameFromUrl) {
+                        const convertedBlobName = primaryProvider.extractBlobNameFromUrl(convertedSaveResult.url);
+                        if (convertedBlobName) {
+                          const { containerClient } = await primaryProvider.getBlobClient();
+                          const convertedShortLivedSasToken = primaryProvider.generateShortLivedSASToken(
+                            containerClient,
+                            convertedBlobName,
+                            5
+                          );
+                          const convertedUrlObj = new URL(convertedSaveResult.url);
+                          const convertedBaseUrl = `${convertedUrlObj.protocol}//${convertedUrlObj.host}${convertedUrlObj.pathname}`;
+                          convertedShortLivedUrl = `${convertedBaseUrl}?${convertedShortLivedSasToken}`;
+                          context.log("Generated shortLivedUrl for converted file (busboy)");
+                        }
+                      }
+                    } catch (error) {
+                      context.log(`Warning: Could not generate shortLivedUrl for converted file: ${error.message}`);
+                      // Fallback to regular URL
+                    }
+
                     // Attach to response body
                     result.converted = {
                       url: convertedSaveResult.url,
+                      shortLivedUrl: convertedShortLivedUrl,
                       gcs: convertedGcsUrl,
                     };
+                    
+                    // Note: result.shortLivedUrl remains pointing to the original file
+                    // result.converted.shortLivedUrl points to the converted file
+                    // Both are available for different use cases
+                    
                     context.log(
                       "Conversion process (busboy) completed successfully",
                     );
@@ -895,11 +926,42 @@ async function uploadFile(
             context.log("Converted file saved to GCS");
           }
 
+          // Generate shortLivedUrl for converted file
+          let convertedShortLivedUrl = convertedSaveResult.url; // Fallback to regular URL
+          try {
+            const storageFactory = StorageFactory.getInstance();
+            const primaryProvider = await storageFactory.getAzureProvider();
+            if (primaryProvider.generateShortLivedSASToken && primaryProvider.extractBlobNameFromUrl) {
+              const convertedBlobName = primaryProvider.extractBlobNameFromUrl(convertedSaveResult.url);
+              if (convertedBlobName) {
+                const { containerClient } = await primaryProvider.getBlobClient();
+                const convertedShortLivedSasToken = primaryProvider.generateShortLivedSASToken(
+                  containerClient,
+                  convertedBlobName,
+                  5
+                );
+                const convertedUrlObj = new URL(convertedSaveResult.url);
+                const convertedBaseUrl = `${convertedUrlObj.protocol}//${convertedUrlObj.host}${convertedUrlObj.pathname}`;
+                convertedShortLivedUrl = `${convertedBaseUrl}?${convertedShortLivedSasToken}`;
+                context.log("Generated shortLivedUrl for converted file");
+              }
+            }
+          } catch (error) {
+            context.log(`Warning: Could not generate shortLivedUrl for converted file: ${error.message}`);
+            // Fallback to regular URL
+          }
+
           // Add converted file info to result
           result.converted = {
             url: convertedSaveResult.url,
+            shortLivedUrl: convertedShortLivedUrl,
             gcs: convertedGcsUrl,
           };
+          
+          // Note: result.shortLivedUrl remains pointing to the original file
+          // result.converted.shortLivedUrl points to the converted file
+          // Both are available for different use cases
+          
           context.log("Conversion process completed successfully");
         }
       } catch (error) {
diff --git a/helper-apps/cortex-file-handler/tests/shortLivedUrlConversion.test.js b/helper-apps/cortex-file-handler/tests/shortLivedUrlConversion.test.js
index 1e8c2cb2..24dbb890 100644
--- a/helper-apps/cortex-file-handler/tests/shortLivedUrlConversion.test.js
+++ b/helper-apps/cortex-file-handler/tests/shortLivedUrlConversion.test.js
@@ -162,6 +162,99 @@ test.serial(
   },
 );
 
+/**
+ * Test that initial upload response includes shortLivedUrl for converted files
+ */
+test.serial(
+  "upload should generate shortLivedUrl for converted file in initial response",
+  async (t) => {
+    // Create a minimal XLSX workbook in-memory
+    const workbook = XLSX.utils.book_new();
+    const worksheet = XLSX.utils.aoa_to_sheet([
+      ["Product", "Price"],
+      ["Apple", 1.50],
+      ["Banana", 0.75],
+    ]);
+    XLSX.utils.book_append_sheet(workbook, worksheet, "Products");
+
+    // Write it to a temp file
+    const filePath = path.join(t.context.testDir, `${uuidv4()}.xlsx`);
+    XLSX.writeFile(workbook, filePath);
+
+    const hash = `test-upload-converted-${uuidv4()}`;
+    let uploadedUrl;
+    let convertedUrl;
+
+    try {
+      // Upload the XLSX file (conversion should run automatically)
+      const uploadResponse = await uploadFile(filePath, null, hash);
+      t.is(uploadResponse.status, 200, "Upload should succeed");
+      t.truthy(uploadResponse.data.converted, "Upload response must contain converted info");
+      t.truthy(uploadResponse.data.converted.url, "Converted URL should be present");
+
+      uploadedUrl = uploadResponse.data.url;
+      convertedUrl = uploadResponse.data.converted.url;
+
+      // Verify the original file is .xlsx and converted file is .csv
+      t.is(getFileExtensionFromUrl(uploadedUrl), '.xlsx', "Original URL should point to .xlsx file");
+      t.is(getFileExtensionFromUrl(convertedUrl), '.csv', "Converted URL should point to .csv file");
+
+      // CRITICAL TEST: Verify converted.shortLivedUrl is present in upload response
+      t.truthy(
+        uploadResponse.data.converted.shortLivedUrl,
+        "Upload response should include converted.shortLivedUrl"
+      );
+
+      // Verify converted.shortLivedUrl points to the converted file (.csv)
+      const convertedShortLivedBase = uploadResponse.data.converted.shortLivedUrl.split('?')[0];
+      const convertedUrlBase = convertedUrl.split('?')[0];
+      t.is(
+        convertedShortLivedBase,
+        convertedUrlBase,
+        "converted.shortLivedUrl should be based on converted file URL (.csv)"
+      );
+
+      // Verify converted.shortLivedUrl points to CSV, not XLSX
+      t.true(
+        uploadResponse.data.converted.shortLivedUrl.includes('.csv'),
+        "converted.shortLivedUrl should point to .csv file"
+      );
+      t.false(
+        uploadResponse.data.converted.shortLivedUrl.includes('.xlsx'),
+        "converted.shortLivedUrl should NOT contain .xlsx extension"
+      );
+
+      // Verify main shortLivedUrl points to original file (for reference)
+      // The converted file's shortLivedUrl is in result.converted.shortLivedUrl
+      if (uploadResponse.data.shortLivedUrl) {
+        const mainShortLivedBase = uploadResponse.data.shortLivedUrl.split('?')[0];
+        const originalUrlBase = uploadedUrl.split('?')[0];
+        t.is(
+          mainShortLivedBase,
+          originalUrlBase,
+          "Main shortLivedUrl should point to original file (.xlsx)"
+        );
+        t.true(
+          uploadResponse.data.shortLivedUrl.includes('.xlsx'),
+          "Main shortLivedUrl should point to .xlsx file (original)"
+        );
+        t.false(
+          uploadResponse.data.shortLivedUrl.includes('.csv'),
+          "Main shortLivedUrl should NOT point to .csv file (that's in converted.shortLivedUrl)"
+        );
+      }
+
+    } finally {
+      // Clean up
+      fs.unlinkSync(filePath);
+      await cleanupHashAndFile(hash, uploadedUrl, baseUrl);
+      if (convertedUrl) {
+        await cleanupHashAndFile(null, convertedUrl, baseUrl);
+      }
+    }
+  },
+);
+
 /**
  * Test that short-lived URLs fallback to original files when no conversion exists
  */
diff --git a/lib/fileUtils.js b/lib/fileUtils.js
index b3657263..f4ee8892 100644
--- a/lib/fileUtils.js
+++ b/lib/fileUtils.js
@@ -13,6 +13,7 @@ import path from 'path';
 import FormData from 'form-data';
 import xxhash from 'xxhash-wasm';
 import mime from 'mime-types';
+import mimeDb from 'mime-db';
 import { encrypt, decrypt } from './crypto.js';
 
 const pipeline = promisify(stream.pipeline);
@@ -1007,11 +1008,16 @@ async function addFileToCollection(contextId, contextKey, url, gcs, filename, ta
         throw new Error("url or fileUrl is required");
     }
 
-    // Determine MIME type from URL (preferring converted URL if available)
-    const mimeType = determineMimeTypeFromUrl(finalUrl, finalGcs, filename);
+    // Determine MIME type from URL (the actual stored content, which may be converted)
+    // E.g., if user uploaded foo.docx but it was converted to foo.md, MIME type should be text/markdown
+    const mimeType = determineMimeTypeFromUrl(finalUrl, finalGcs, null);
     
-    // Ensure filename has correct extension based on MIME type
-    const correctedFilename = ensureFilenameExtension(filename, mimeType);
+    // IMPORTANT: Keep the original user-provided filename as displayFilename
+    // Do NOT "correct" the extension based on MIME type
+    // The user's original filename (e.g., "foo.docx") should be preserved even if the
+    // stored content is a converted format (e.g., "foo.md")
+    // This allows users to recognize their files by original name while tools
+    // use the actual URL to determine content type for operations
 
     // If no hash, generate one from URL for storage key (needed for Redis hash map)
     const storageHash = finalHash || await computeBufferHash(Buffer.from(finalUrl));
@@ -1021,8 +1027,8 @@ async function addFileToCollection(contextId, contextKey, url, gcs, filename, ta
         id: `${Date.now()}-${Math.random().toString(36).substring(2, 9)}`,
         url: finalUrl,
         gcs: finalGcs || null,
-        displayFilename: correctedFilename, // Store user-provided filename as displayFilename (filename is managed by CFH)
-        mimeType: mimeType,
+        displayFilename: filename, // Keep original user-provided filename as displayFilename (NOT corrected by MIME type)
+        mimeType: mimeType, // MIME type from actual URL content (may differ from displayFilename extension)
         tags: Array.isArray(tags) ? tags : [],
         notes: notes || '',
         hash: storageHash, // Use storageHash (actual hash or generated from URL)
@@ -1058,10 +1064,11 @@ async function addFileToCollection(contextId, contextKey, url, gcs, filename, ta
                     url: finalUrl, // Use new URL (guaranteed to be truthy at this point)
                     gcs: finalGcs || existingData.gcs || null, // Use new GCS if provided, otherwise keep existing
                     // Preserve CFH's filename (managed by CFH), store user-provided filename as displayFilename
-                    displayFilename: correctedFilename, // Store user-provided filename as displayFilename
+                    // Keep original user-provided filename (do NOT correct based on MIME type)
+                    displayFilename: filename,
                     tags: fileEntry.tags.length > 0 ? fileEntry.tags : (existingData.tags || []), // Merge tags if new ones provided
                     notes: fileEntry.notes || existingData.notes || '', // Keep existing notes if new ones empty
-                    mimeType: fileEntry.mimeType || existingData.mimeType || null,
+                    mimeType: fileEntry.mimeType || existingData.mimeType || null, // MIME type from URL (actual content type)
                     inCollection: ['*'], // Mark as global chat file (available to all chats)
                     addedDate: existingData.addedDate || fileEntry.addedDate, // Keep earliest addedDate
                     lastAccessed: new Date().toISOString(), // Always update lastAccessed
@@ -1194,6 +1201,33 @@ function determineMimeTypeFromUrl(url, gcs = null, filename = null) {
     return 'application/octet-stream';
 }
 
+/**
+ * Get the actual content MIME type from a file object.
+ * This determines the MIME type from the actual stored content (URL), not the displayFilename.
+ * 
+ * Use this for operations that need to know the actual content type (e.g., reading, editing),
+ * not for display purposes where displayFilename should be used.
+ * 
+ * Example: A file with displayFilename="report.docx" but url="...report.md" 
+ * will return "text/markdown" because that's what the actual content is.
+ * 
+ * @param {Object} file - File object with url, gcs, and optionally mimeType fields
+ * @returns {string} MIME type of the actual content
+ */
+function getActualContentMimeType(file) {
+    if (!file) {
+        return 'application/octet-stream';
+    }
+    
+    // If mimeType is already stored and valid, use it (it was computed from URL at add time)
+    if (file.mimeType && file.mimeType !== 'application/octet-stream') {
+        return file.mimeType;
+    }
+    
+    // Determine MIME type from URL (the actual stored content)
+    // Do NOT use displayFilename as it may have a different extension (e.g., docx for an md file)
+    return determineMimeTypeFromUrl(file.url, file.gcs, null);
+}
 
 /**
  * Sync files from chat history to file collection
@@ -1263,7 +1297,9 @@ async function syncFilesToCollection(chatHistory, contextId, contextKey = null)
                     // CFH already has this file - merge CFH data with Cortex metadata
                     // Only set Cortex-managed fields (tags, notes, id, dates), preserve all CFH data
                     // Ensure mimeType is set (CFH doesn't store it, so we need to determine it)
-                    const mimeType = existingData.mimeType || determineMimeTypeFromUrl(existingData.url, existingData.gcs, existingData.displayFilename);
+                    // IMPORTANT: Determine MIME type from URL (actual content), not displayFilename
+                    // displayFilename may have original extension (e.g., .docx) while URL points to converted content (e.g., .md)
+                    const mimeType = existingData.mimeType || determineMimeTypeFromUrl(existingData.url, existingData.gcs, null);
                     
                     const fileData = {
                         ...existingData, // Preserve all CFH data (url, gcs, filename, displayFilename, permanent, etc.)
@@ -1827,14 +1863,14 @@ async function checkHashExists(hash, fileHandlerUrl, pathwayResolver = null, con
         // If file exists (200), return URLs with short-lived URL preferred
         if (checkResponse.status === 200 && checkResponse.data && checkResponse.data.url) {
             const data = checkResponse.data;
-            // shortLivedUrl automatically prefers converted URL if it exists
-            // Use shortLivedUrl if available, otherwise fall back to regular URL
-            // For GCS, always use the GCS URL from checkHash (no short-lived for GCS)
-            const url = data.shortLivedUrl || data.converted?.url || data.url;
+            // Prefer converted file's shortLivedUrl if available (that's what we want to send to LLMs)
+            // Otherwise use converted URL, then original shortLivedUrl, then original URL
+            // For GCS, always use the GCS URL from checkHash (prefers converted, no short-lived for GCS)
+            const url = data.converted?.shortLivedUrl || data.converted?.url || data.shortLivedUrl || data.url;
             const gcs = data.converted?.gcs || data.gcs || null;
             
             return {
-                url: url, // shortLivedUrl if available (prefers converted), otherwise regular URL
+                url: url, // Prefers converted.shortLivedUrl (for LLM processing), then converted.url, then original shortLivedUrl/url
                 gcs: gcs, // GCS URL (prefers converted, no short-lived version for GCS)
                 hash: data.hash || hash,
                 filename: data.filename || null // Include filename from response
@@ -1884,13 +1920,13 @@ async function ensureShortLivedUrl(fileObject, fileHandlerUrl, contextId = null,
     try {
         const resolved = await checkHashExists(fileObject.hash, fileHandlerUrl, null, contextId, shortLivedMinutes);
         if (resolved && resolved.url) {
-            // Return file object with url replaced by shortLivedUrl (or fallback to regular url)
-            // GCS URL comes from checkHash (no short-lived version for GCS)
+            // Return file object with url replaced by shortLivedUrl (prefers converted.shortLivedUrl if available)
+            // GCS URL comes from checkHash (prefers converted, no short-lived version for GCS)
             // Preserve filename from original, but use resolved filename if original doesn't have one
             return {
                 ...fileObject,
-                url: resolved.url, // shortLivedUrl (or fallback)
-                gcs: resolved.gcs || fileObject.gcs || null, // GCS from checkHash
+                url: resolved.url, // Prefers converted.shortLivedUrl (for LLM processing), then converted.url, then original shortLivedUrl/url
+                gcs: resolved.gcs || fileObject.gcs || null, // GCS from checkHash (prefers converted)
                 filename: fileObject.filename || resolved.filename || fileObject.filename // Preserve original, fallback to resolved
             };
         }
@@ -2097,11 +2133,18 @@ async function uploadFileToCloud(fileInput, mimeType = null, filename = null, pa
         });
         
         if (uploadResponse.data && uploadResponse.data.url) {
+            const data = uploadResponse.data;
+            // Prefer converted file's shortLivedUrl if available (that's what we want to send to LLMs)
+            // Otherwise use converted URL, then original shortLivedUrl, then original URL
+            // For GCS, prefer converted GCS URL if available
+            const url = data.converted?.shortLivedUrl || data.converted?.url || data.shortLivedUrl || data.url;
+            const gcs = data.converted?.gcs || data.gcs || null;
+            
             // Return both url and gcs if available
             return {
-                url: uploadResponse.data.url,
-                gcs: uploadResponse.data.gcs || null,
-                hash: uploadResponse.data.hash || fileHash
+                url: url, // Prefers converted.shortLivedUrl (for LLM processing), then converted.url, then original shortLivedUrl/url
+                gcs: gcs, // GCS URL (prefers converted if available)
+                hash: data.hash || fileHash
             };
         } else {
             throw new Error('No URL returned from file handler');
@@ -2179,12 +2222,12 @@ async function resolveFileHashesToContent(fileHashes, config, contextId = null)
                 const existingFile = await checkHashExists(hash, fileHandlerUrl, null, contextId, 5);
                 if (existingFile) {
                     // checkHashExists already returns:
-                    // - shortLivedUrl (prefers converted) in url field
+                    // - converted.shortLivedUrl (preferred) or converted.url or original shortLivedUrl/url in url field
                     // - GCS URL (prefers converted) in gcs field
                     // - filename in filename field
                             return JSON.stringify({
                                 type: "image_url",
-                        url: existingFile.url, // Already has shortLivedUrl (prefers converted)
+                        url: existingFile.url, // Prefers converted.shortLivedUrl (for LLM processing)
                         image_url: { url: existingFile.url },
                         gcs: existingFile.gcs || null, // GCS URL (prefers converted, no short-lived)
                         hash: hash
@@ -2259,54 +2302,66 @@ function isTextMimeType(mimeType) {
     // e.g., "application/json; charset=utf-8" -> "application/json"
     const baseMimeType = mimeType.split(';')[0].trim().toLowerCase();
     
-    // All text/* types
+    // 1. All text/* types are text (text/plain, text/html, text/css, text/csv, text/markdown, etc.)
     if (baseMimeType.startsWith('text/')) {
         return true;
     }
     
-    // Text-based application types (consistent with sys_tool_writefile and sys_tool_editfile)
-    const textApplicationTypes = [
-        'application/json',
-        'application/javascript',
-        'application/x-javascript',
-        'application/typescript',
+    // 2. Structured text formats with +json, +xml, +yaml suffix
+    // (application/ld+json, application/rss+xml, application/vnd.api+json, etc.)
+    if (baseMimeType.endsWith('+json') || baseMimeType.endsWith('+xml') || baseMimeType.endsWith('+yaml')) {
+        return true;
+    }
+    
+    // 3. Check mime-db for charset property - if a MIME type has a default charset, it's text
+    // This catches application/json, application/javascript, etc.
+    const dbEntry = mimeDb[baseMimeType];
+    if (dbEntry && dbEntry.charset) {
+        return true;
+    }
+    
+    // 4. Well-known text-based application/* types not in mime-db with charset
+    // These are common formats that are definitely text but don't have charset in the database
+    const knownTextTypes = new Set([
         'application/xml',
         'application/x-yaml',
         'application/yaml',
-        'application/x-python',
+        'application/toml',
+        'application/x-toml',
         'application/x-sh',
         'application/x-shellscript',
-        'application/x-c',
-        'application/x-c++',
-        'application/x-cpp',
-        'application/x-java',
-        'application/x-go',
-        'application/x-rust',
-        'application/x-ruby',
-        'application/x-php',
+        'application/x-httpd-php',
         'application/x-perl',
-        'application/x-lua',
+        'application/x-python',
         'application/x-sql',
-        'application/x-toml',
-        'application/x-ini',
-        'application/x-config',
-    ];
-    
-    // Also check for common code file patterns in application types
-    if (baseMimeType.startsWith('application/x-') || baseMimeType.startsWith('application/vnd.')) {
-        // Check if it's a known text-based subtype
-        const knownTextSubtypes = [
-            'json', 'javascript', 'typescript', 'xml', 'yaml', 'python', 'sh', 'shellscript',
-            'c', 'cpp', 'c++', 'java', 'go', 'rust', 'ruby', 'php', 'perl', 'lua', 'sql',
-            'toml', 'ini', 'config', 'csv', 'tsv', 'plain', 'text', 'source', 'script'
-        ];
-        const subtype = baseMimeType.split('/').pop().split('+')[0];
-        if (knownTextSubtypes.some(known => subtype.includes(known))) {
+        'application/sql',
+        'application/graphql',
+        'application/x-tex',
+        'application/x-latex',
+        'application/rtf',
+    ]);
+    
+    if (knownTextTypes.has(baseMimeType)) {
+        return true;
+    }
+    
+    // 5. Check for source code patterns in application/x-* types
+    if (baseMimeType.startsWith('application/x-')) {
+        const subtype = baseMimeType.substring('application/x-'.length);
+        // Common patterns indicating source code
+        if (subtype.includes('source') || subtype.includes('script') || 
+            subtype.includes('src') || subtype.includes('code')) {
             return true;
         }
     }
     
-    return textApplicationTypes.includes(baseMimeType);
+    // 6. Check if charset parameter is present in original MIME string
+    // e.g., "application/octet-stream; charset=utf-8" is likely text
+    if (mimeType.toLowerCase().includes('charset=')) {
+        return true;
+    }
+    
+    return false;
 }
 
 export { 
@@ -2346,6 +2401,7 @@ export {
     isTextMimeType,
     isFileInCollection,
     writeFileDataToRedis,
+    getActualContentMimeType,
     // isYoutubeUrl is exported inline above
     // Exported for testing
     extractFilenameFromUrl,
diff --git a/pathways/system/entity/tools/sys_tool_editfile.js b/pathways/system/entity/tools/sys_tool_editfile.js
index ba171c18..82be8795 100644
--- a/pathways/system/entity/tools/sys_tool_editfile.js
+++ b/pathways/system/entity/tools/sys_tool_editfile.js
@@ -2,7 +2,7 @@
 // Entity tool that modifies existing files by replacing line ranges or exact string matches
 import logger from '../../../../lib/logger.js';
 import { axios } from '../../../../lib/requestExecutor.js';
-import { uploadFileToCloud, findFileInCollection, loadFileCollection, getMimeTypeFromFilename, resolveFileParameter, deleteFileByHash, isTextMimeType, updateFileMetadata, writeFileDataToRedis, invalidateFileCollectionCache } from '../../../../lib/fileUtils.js';
+import { uploadFileToCloud, findFileInCollection, loadFileCollection, getMimeTypeFromFilename, resolveFileParameter, deleteFileByHash, isTextMimeType, updateFileMetadata, writeFileDataToRedis, invalidateFileCollectionCache, getActualContentMimeType } from '../../../../lib/fileUtils.js';
 
 // Maximum file size for editing (50MB) - prevents memory blowup on huge files
 const MAX_EDITABLE_FILE_SIZE = 50 * 1024 * 1024;
@@ -398,10 +398,11 @@ export default {
                     }
                 }
 
-                // Determine MIME type from filename using utility function
-                // Use displayFilename (user-friendly) if available, otherwise fall back to filename (CFH-managed)
+                // Determine MIME type from actual stored content (URL), not displayFilename
+                // displayFilename may have a different extension than the actual content
+                // (e.g., displayFilename="report.docx" but content is markdown after conversion)
                 const filename = currentFile.displayFilename || currentFile.filename || 'modified.txt';
-                let mimeType = getMimeTypeFromFilename(filename, 'text/plain');
+                let mimeType = getActualContentMimeType(currentFile) || getMimeTypeFromFilename(filename, 'text/plain');
                 
                 // Add charset=utf-8 for text-based MIME types
                 if (isTextMimeType(mimeType)) {
diff --git a/pathways/system/entity/tools/sys_tool_readfile.js b/pathways/system/entity/tools/sys_tool_readfile.js
index ed8cce58..64759afd 100644
--- a/pathways/system/entity/tools/sys_tool_readfile.js
+++ b/pathways/system/entity/tools/sys_tool_readfile.js
@@ -4,8 +4,53 @@ import logger from '../../../../lib/logger.js';
 import { axios } from '../../../../lib/requestExecutor.js';
 import { resolveFileParameter, getMimeTypeFromFilename, isTextMimeType } from '../../../../lib/fileUtils.js';
 
+// Code/text file extensions that mime-types doesn't recognize or misidentifies
+// The mime-types library lacks coverage for many programming languages
+// See: https://github.com/jshttp/mime-types (uses mime-db which is IANA-focused)
+//
+// We only list extensions where mime.lookup() returns false or a wrong type
+// (e.g., .ts -> video/mp2t instead of TypeScript)
+const KNOWN_TEXT_EXTENSIONS = new Set([
+    // TypeScript (mime-types returns video/mp2t for .ts!)
+    'ts', 'tsx', 'mts', 'cts',
+    // Languages not in mime-db
+    'py', 'pyw', 'pyi',         // Python
+    'go',                        // Go  
+    'rs',                        // Rust
+    'rb', 'rake', 'gemspec',     // Ruby
+    'swift',                     // Swift
+    'kt', 'kts',                 // Kotlin
+    'scala', 'sbt',              // Scala
+    'clj', 'cljs', 'cljc', 'edn',// Clojure
+    'elm',                       // Elm
+    'ex', 'exs',                 // Elixir
+    'erl', 'hrl',                // Erlang
+    'hs', 'lhs',                 // Haskell
+    'ml', 'mli',                 // OCaml
+    'fs', 'fsi', 'fsx',          // F#
+    'r', 'rmd',                  // R
+    'jl',                        // Julia
+    'nim',                       // Nim
+    'zig',                       // Zig
+    'v',                         // V
+    'cr',                        // Crystal
+    // Modern web frameworks
+    'vue', 'svelte', 'astro',
+    // Shell variants (only bash/zsh/fish not recognized)
+    'bash', 'zsh', 'fish',
+    // Config files (dotfiles without extension)
+    'env', 'envrc', 'editorconfig', 'gitignore', 'dockerignore',
+    // Data/config formats not in mime-db
+    'prisma', 'graphql', 'gql',
+    'tf', 'tfvars', 'hcl',       // Terraform
+    'proto',                     // Protocol Buffers
+    'diff', 'patch',
+]);
+
 /**
  * Check if a file is a text file type that can be read
+ * Uses MIME type detection via the mime-types library, with fallback for
+ * common code file extensions that the library doesn't recognize
  * @param {string} url - File URL or path
  * @returns {boolean} - Returns true if it's a text file, false otherwise
  */
@@ -16,39 +61,29 @@ function isTextFile(url) {
 
     // Extract filename from URL (remove query string and fragment)
     const urlPath = url.split('?')[0].split('#')[0];
+    const extension = urlPath.split('.').pop()?.toLowerCase();
     
-    // Use existing MIME utility to get MIME type
+    // Use MIME library to get MIME type from filename/extension
     const mimeType = getMimeTypeFromFilename(urlPath);
     
-    // If we have a valid MIME type (not unknown/octet-stream), check it
+    // If MIME library returns a valid type, check it (but watch for misidentifications)
     if (mimeType && mimeType !== 'application/octet-stream') {
+        // Handle known misidentifications from mime-types library
+        // .ts -> video/mp2t (wrong, it's TypeScript)
+        // .rs -> application/rls-services+xml (wrong, it's Rust)
+        if (extension && KNOWN_TEXT_EXTENSIONS.has(extension)) {
+            return true; // Trust our extension list over wrong MIME type
+        }
         return isTextMimeType(mimeType);
     }
     
-    // Fallback: Check file extension for common text/code file types
-    // This handles cases where MIME type detection fails
-    const extension = urlPath.split('.').pop()?.toLowerCase();
-    if (extension) {
-        const textExtensions = [
-            // Code files
-            'json', 'js', 'jsx', 'ts', 'tsx', 'py', 'go', 'rs', 'java', 'c', 'cpp', 'cc', 'cxx', 'h', 'hpp', 'hxx',
-            'rb', 'php', 'pl', 'lua', 'sql', 'sh', 'bash', 'zsh', 'fish', 'ps1', 'bat', 'cmd',
-            // Config/data files
-            'yaml', 'yml', 'toml', 'ini', 'cfg', 'conf', 'config', 'xml', 'csv', 'tsv',
-            // Markup/documentation
-            'md', 'markdown', 'txt', 'text', 'log', 'diff', 'patch',
-            // Web
-            'html', 'htm', 'css', 'scss', 'sass', 'less', 'vue', 'svelte',
-            // Other text formats
-            'rtf', 'rtfd', 'tex', 'latex', 'bib', 'r', 'm', 'swift', 'kt', 'scala', 'clj', 'cljs', 'edn',
-            'dart', 'elm', 'ex', 'exs', 'fs', 'fsx', 'ml', 'mli', 'hs', 'lhs', 'vim', 'vimrc'
-        ];
-        if (textExtensions.includes(extension)) {
-            return true;
-        }
+    // Fallback: Check our list of known text/code extensions
+    // This handles cases where the MIME library doesn't recognize the extension
+    if (extension && KNOWN_TEXT_EXTENSIONS.has(extension)) {
+        return true;
     }
     
-    // Unknown type, reject to be safe
+    // For truly unknown extensions, we can't reliably determine if it's text
     return false;
 }
 
diff --git a/tests/integration/features/tools/fileCollection.test.js b/tests/integration/features/tools/fileCollection.test.js
index fe2bef03..6f2f2f54 100644
--- a/tests/integration/features/tools/fileCollection.test.js
+++ b/tests/integration/features/tools/fileCollection.test.js
@@ -2055,3 +2055,157 @@ test('Analyzer tool: Returns error JSON format when file not found', async t =>
         await cleanup(contextId);
     }
 });
+
+// ============================================
+// Converted Files Tests (displayFilename != URL extension)
+// ============================================
+
+test('Converted files: displayFilename .docx but URL .md - MIME type from URL', async t => {
+    const contextId = createTestContext();
+    
+    try {
+        // Add a file where displayFilename is .docx but URL points to converted .md file
+        // This simulates the case where a docx file was converted to markdown
+        const addResult = await callPathway('sys_tool_file_collection', {
+            contextId,
+            url: 'https://example.com/converted-document.md', // Converted to markdown
+            gcs: 'gs://bucket/converted-document.md',
+            filename: 'original-document.docx', // Original filename preserved
+            userMessage: 'Add converted file'
+        });
+        
+        const addParsed = JSON.parse(addResult);
+        t.is(addParsed.success, true);
+        
+        // Verify file was stored correctly
+        const collection = await loadFileCollection(contextId, null, false);
+        t.is(collection.length, 1);
+        
+        const file = collection[0];
+        // displayFilename should preserve original user filename
+        t.is(file.displayFilename, 'original-document.docx', 'displayFilename should preserve original filename');
+        // mimeType should be determined from URL (actual content), not displayFilename
+        t.is(file.mimeType, 'text/markdown', 'mimeType should be from URL (.md), not displayFilename (.docx)');
+        t.is(file.url, 'https://example.com/converted-document.md', 'URL should point to converted file');
+    } finally {
+        await cleanup(contextId);
+    }
+});
+
+test('Converted files: EditFile should use URL MIME type, not displayFilename', async t => {
+    const contextId = createTestContext();
+    
+    try {
+        // Add a converted file: displayFilename is .docx but URL is .md
+        const addResult = await callPathway('sys_tool_file_collection', {
+            contextId,
+            url: 'https://example.com/report.md', // Converted markdown
+            gcs: 'gs://bucket/report.md',
+            filename: 'report.docx', // Original filename
+            userMessage: 'Add converted file'
+        });
+        
+        const addParsed = JSON.parse(addResult);
+        t.is(addParsed.success, true);
+        
+        // Verify the file has correct MIME type from URL
+        const collection = await loadFileCollection(contextId, null, false);
+        const file = collection[0];
+        t.is(file.mimeType, 'text/markdown', 'MIME type should be from URL');
+        t.is(file.displayFilename, 'report.docx', 'displayFilename should be original');
+        
+        // Note: We can't actually test EditFile here without a real file handler,
+        // but we can verify the file is set up correctly for editing
+        // The EditFile tool should use getActualContentMimeType() which uses URL
+        const { getActualContentMimeType } = await import('../../../../lib/fileUtils.js');
+        const actualMimeType = getActualContentMimeType(file);
+        t.is(actualMimeType, 'text/markdown', 'getActualContentMimeType should return URL MIME type');
+    } finally {
+        await cleanup(contextId);
+    }
+});
+
+test('Converted files: ReadFile should accept text files based on URL, not displayFilename', async t => {
+    const contextId = createTestContext();
+    
+    try {
+        // Add a converted file: displayFilename is .docx but URL is .md
+        const addResult = await callPathway('sys_tool_file_collection', {
+            contextId,
+            url: 'https://example.com/document.md', // Converted markdown (text file)
+            gcs: 'gs://bucket/document.md',
+            filename: 'document.docx', // Original filename (would be binary if checked)
+            userMessage: 'Add converted text file'
+        });
+        
+        const addParsed = JSON.parse(addResult);
+        t.is(addParsed.success, true);
+        
+        // Verify file setup
+        const collection = await loadFileCollection(contextId, null, false);
+        const file = collection[0];
+        t.is(file.displayFilename, 'document.docx');
+        t.is(file.mimeType, 'text/markdown');
+        
+        // ReadFile should use resolveFileParameter which returns the URL
+        // The URL (.md) should be recognized as text, not the displayFilename (.docx)
+        const { resolveFileParameter } = await import('../../../../lib/fileUtils.js');
+        const resolvedUrl = await resolveFileParameter('document.docx', contextId);
+        t.is(resolvedUrl, 'https://example.com/document.md', 'Should resolve to URL');
+        
+        // The isTextFile function in ReadFile should check the URL, not displayFilename
+        // Since the URL is .md, it should be recognized as text
+        const { getMimeTypeFromFilename, isTextMimeType } = await import('../../../../lib/fileUtils.js');
+        const urlMimeType = getMimeTypeFromFilename(resolvedUrl);
+        t.is(urlMimeType, 'text/markdown', 'URL should be recognized as markdown');
+        t.true(isTextMimeType(urlMimeType), 'Markdown should be recognized as text type');
+    } finally {
+        await cleanup(contextId);
+    }
+});
+
+test('Converted files: Multiple converted files with different extensions', async t => {
+    const contextId = createTestContext();
+    
+    try {
+        // Add multiple converted files
+        await callPathway('sys_tool_file_collection', {
+            contextId,
+            url: 'https://example.com/doc1.md', // docx -> md
+            filename: 'document1.docx',
+            userMessage: 'Add docx->md'
+        });
+        
+        await callPathway('sys_tool_file_collection', {
+            contextId,
+            url: 'https://example.com/doc2.txt', // xlsx -> txt (CSV)
+            filename: 'spreadsheet.xlsx',
+            userMessage: 'Add xlsx->txt'
+        });
+        
+        await callPathway('sys_tool_file_collection', {
+            contextId,
+            url: 'https://example.com/doc3.json', // pptx -> json (structured data)
+            filename: 'presentation.pptx',
+            userMessage: 'Add pptx->json'
+        });
+        
+        // Verify all files have correct MIME types from URLs
+        const collection = await loadFileCollection(contextId, null, false);
+        t.is(collection.length, 3);
+        
+        const doc1 = collection.find(f => f.displayFilename === 'document1.docx');
+        t.truthy(doc1);
+        t.is(doc1.mimeType, 'text/markdown', 'docx->md should have markdown MIME type');
+        
+        const doc2 = collection.find(f => f.displayFilename === 'spreadsheet.xlsx');
+        t.truthy(doc2);
+        t.is(doc2.mimeType, 'text/plain', 'xlsx->txt should have text/plain MIME type');
+        
+        const doc3 = collection.find(f => f.displayFilename === 'presentation.pptx');
+        t.truthy(doc3);
+        t.is(doc3.mimeType, 'application/json', 'pptx->json should have JSON MIME type');
+    } finally {
+        await cleanup(contextId);
+    }
+});
diff --git a/tests/unit/core/fileCollection.test.js b/tests/unit/core/fileCollection.test.js
index 19c2eefd..cd476df2 100644
--- a/tests/unit/core/fileCollection.test.js
+++ b/tests/unit/core/fileCollection.test.js
@@ -411,3 +411,131 @@ test('isTextMimeType should identify text MIME types', async t => {
     t.false(isTextMimeType(''));
 });
 
+// Test converted files: displayFilename has different MIME type than URL
+test('determineMimeTypeFromUrl should use URL extension, not displayFilename', async t => {
+    const { determineMimeTypeFromUrl } = await import('../../../lib/fileUtils.js');
+    
+    // Simulate converted file: displayFilename is .docx but URL is .md
+    const url = 'https://example.com/converted-file.md';
+    const gcs = 'gs://bucket/converted-file.md';
+    const displayFilename = 'original-document.docx';
+    
+    // MIME type should be determined from URL (.md), not displayFilename (.docx)
+    const mimeType = determineMimeTypeFromUrl(url, gcs, null);
+    t.is(mimeType, 'text/markdown', 'Should use URL extension (.md) for MIME type');
+    
+    // Even if displayFilename is provided, URL takes precedence
+    const mimeType2 = determineMimeTypeFromUrl(url, gcs, displayFilename);
+    t.is(mimeType2, 'text/markdown', 'Should still use URL extension even with displayFilename');
+});
+
+test('getActualContentMimeType should use URL, not displayFilename', async t => {
+    const { getActualContentMimeType } = await import('../../../lib/fileUtils.js');
+    
+    // Simulate converted file: displayFilename is .docx but URL is .md
+    const file = {
+        url: 'https://example.com/converted-file.md',
+        gcs: 'gs://bucket/converted-file.md',
+        displayFilename: 'original-document.docx',
+        mimeType: null // Not set yet
+    };
+    
+    const mimeType = getActualContentMimeType(file);
+    t.is(mimeType, 'text/markdown', 'Should determine MIME type from URL, not displayFilename');
+    
+    // If mimeType is already set (from URL), use it
+    const fileWithMimeType = {
+        ...file,
+        mimeType: 'text/markdown'
+    };
+    const mimeType2 = getActualContentMimeType(fileWithMimeType);
+    t.is(mimeType2, 'text/markdown', 'Should use stored mimeType if available');
+});
+
+test('addFileToCollection should preserve original displayFilename for converted files', async t => {
+    const { addFileToCollection } = await import('../../../lib/fileUtils.js');
+    
+    // Simulate adding a file where URL points to converted content (.md) 
+    // but user wants to keep original filename (.docx)
+    const contextId = `test-converted-${Date.now()}`;
+    const url = 'https://example.com/converted-file.md'; // Converted to markdown
+    const gcs = 'gs://bucket/converted-file.md';
+    const originalFilename = 'original-document.docx'; // User's original filename
+    
+    try {
+        const fileEntry = await addFileToCollection(
+            contextId,
+            null,
+            url,
+            gcs,
+            originalFilename, // This should be preserved as displayFilename
+            [],
+            '',
+            null,
+            null,
+            null,
+            false
+        );
+        
+        // displayFilename should be the original user-provided filename
+        t.is(fileEntry.displayFilename, 'original-document.docx', 'displayFilename should preserve original filename');
+        
+        // mimeType should be determined from URL (actual content)
+        t.is(fileEntry.mimeType, 'text/markdown', 'mimeType should be from URL, not displayFilename');
+        
+        // Verify it was saved correctly
+        const { loadFileCollection } = await import('../../../lib/fileUtils.js');
+        const collection = await loadFileCollection(contextId, null, false);
+        t.is(collection.length, 1);
+        t.is(collection[0].displayFilename, 'original-document.docx');
+        t.is(collection[0].mimeType, 'text/markdown');
+        t.is(collection[0].url, url);
+    } finally {
+        // Cleanup
+        const { getRedisClient } = await import('../../../lib/fileUtils.js');
+        const redisClient = await getRedisClient();
+        if (redisClient) {
+            await redisClient.del(`FileStoreMap:ctx:${contextId}`);
+        }
+    }
+});
+
+test('syncFilesToCollection should determine MIME type from URL, not displayFilename', async t => {
+    const { syncFilesToCollection } = await import('../../../lib/fileUtils.js');
+    
+    // Simulate chat history with converted file
+    const contextId = `test-sync-converted-${Date.now()}`;
+    const chatHistory = [
+        {
+            role: 'user',
+            content: [
+                {
+                    type: 'file',
+                    url: 'https://example.com/converted-report.md', // Converted to markdown
+                    gcs: 'gs://bucket/converted-report.md',
+                    hash: 'hash123'
+                }
+            ]
+        }
+    ];
+    
+    try {
+        await syncFilesToCollection(chatHistory, contextId, null);
+        
+        const { loadFileCollection } = await import('../../../lib/fileUtils.js');
+        const collection = await loadFileCollection(contextId, null, false);
+        t.is(collection.length, 1);
+        
+        // MIME type should be from URL (.md), not from any displayFilename
+        t.is(collection[0].mimeType, 'text/markdown', 'MIME type should be determined from URL');
+        t.is(collection[0].url, 'https://example.com/converted-report.md');
+    } finally {
+        // Cleanup
+        const { getRedisClient } = await import('../../../lib/fileUtils.js');
+        const redisClient = await getRedisClient();
+        if (redisClient) {
+            await redisClient.del(`FileStoreMap:ctx:${contextId}`);
+        }
+    }
+});
+

From ed4d656af4d435e3629bb3548fc95bdc82aa0e6f Mon Sep 17 00:00:00 2001
From: Jason McCartney <jmac@theroot.org>
Date: Sat, 20 Dec 2025 20:27:25 -0700
Subject: [PATCH 2/5] chore: update version to 2.8.0 in package.json and
 package-lock.json

---
 helper-apps/cortex-file-handler/package-lock.json | 4 ++--
 helper-apps/cortex-file-handler/package.json      | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/helper-apps/cortex-file-handler/package-lock.json b/helper-apps/cortex-file-handler/package-lock.json
index ac4a2558..646374d2 100644
--- a/helper-apps/cortex-file-handler/package-lock.json
+++ b/helper-apps/cortex-file-handler/package-lock.json
@@ -1,12 +1,12 @@
 {
   "name": "@aj-archipelago/cortex-file-handler",
-  "version": "2.7.0",
+  "version": "2.8.0",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
       "name": "@aj-archipelago/cortex-file-handler",
-      "version": "2.7.0",
+      "version": "2.8.0",
       "dependencies": {
         "@azure/storage-blob": "^12.13.0",
         "@distube/ytdl-core": "^4.14.3",
diff --git a/helper-apps/cortex-file-handler/package.json b/helper-apps/cortex-file-handler/package.json
index 39ce23de..88a26399 100644
--- a/helper-apps/cortex-file-handler/package.json
+++ b/helper-apps/cortex-file-handler/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@aj-archipelago/cortex-file-handler",
-  "version": "2.7.0",
+  "version": "2.8.0",
   "description": "File handling service for Cortex - handles file uploads, media chunking, and document processing",
   "type": "module",
   "main": "src/index.js",

From f075838ba934642e48b1b02d5cfc9a3f46d6d9e9 Mon Sep 17 00:00:00 2001
From: Jason McCartney <jmac@theroot.org>
Date: Sat, 20 Dec 2025 21:43:03 -0700
Subject: [PATCH 3/5] feat: enhance MIME type handling for converted files

- Added functionality to determine and attach the MIME type of converted files based on their URLs during the upload process.
- Updated response structure to include `mimeType` for both original and converted files, ensuring accurate content type representation.
- Improved error handling for URL parsing to ensure robust MIME type extraction, enhancing overall file handling capabilities.
---
 .../cortex-file-handler/src/blobHandler.js    |  32 ++++
 helper-apps/cortex-file-handler/src/index.js  | 173 ++++++++++--------
 helper-apps/cortex-file-handler/src/redis.js  |   9 +
 .../src/services/storage/StorageService.js    |   3 +-
 4 files changed, 142 insertions(+), 75 deletions(-)

diff --git a/helper-apps/cortex-file-handler/src/blobHandler.js b/helper-apps/cortex-file-handler/src/blobHandler.js
index 5fa04fe3..0d9f2032 100644
--- a/helper-apps/cortex-file-handler/src/blobHandler.js
+++ b/helper-apps/cortex-file-handler/src/blobHandler.js
@@ -623,11 +623,27 @@ function uploadBlob(
                       // Fallback to regular URL
                     }
 
+                    // Determine MIME type of converted file from its URL
+                    let convertedMimeType = 'application/octet-stream';
+                    try {
+                      const convertedUrlObj = new URL(convertedSaveResult.url);
+                      const convertedPathname = convertedUrlObj.pathname;
+                      const convertedExtension = path.extname(convertedPathname);
+                      convertedMimeType = mime.lookup(convertedExtension) || 'application/octet-stream';
+                    } catch (e) {
+                      // If URL parsing fails, try to extract extension from URL string
+                      const urlMatch = convertedSaveResult.url.match(/\.([a-zA-Z0-9]+)(?:\?|$)/);
+                      if (urlMatch) {
+                        convertedMimeType = mime.lookup(urlMatch[1]) || 'application/octet-stream';
+                      }
+                    }
+
                     // Attach to response body
                     result.converted = {
                       url: convertedSaveResult.url,
                       shortLivedUrl: convertedShortLivedUrl,
                       gcs: convertedGcsUrl,
+                      mimeType: convertedMimeType,
                     };
                     
                     // Note: result.shortLivedUrl remains pointing to the original file
@@ -951,11 +967,27 @@ async function uploadFile(
             // Fallback to regular URL
           }
 
+          // Determine MIME type of converted file from its URL
+          let convertedMimeType = 'application/octet-stream';
+          try {
+            const convertedUrlObj = new URL(convertedSaveResult.url);
+            const convertedPathname = convertedUrlObj.pathname;
+            const convertedExtension = path.extname(convertedPathname);
+            convertedMimeType = mime.lookup(convertedExtension) || 'application/octet-stream';
+          } catch (e) {
+            // If URL parsing fails, try to extract extension from URL string
+            const urlMatch = convertedSaveResult.url.match(/\.([a-zA-Z0-9]+)(?:\?|$)/);
+            if (urlMatch) {
+              convertedMimeType = mime.lookup(urlMatch[1]) || 'application/octet-stream';
+            }
+          }
+
           // Add converted file info to result
           result.converted = {
             url: convertedSaveResult.url,
             shortLivedUrl: convertedShortLivedUrl,
             gcs: convertedGcsUrl,
+            mimeType: convertedMimeType,
           };
           
           // Note: result.shortLivedUrl remains pointing to the original file
diff --git a/helper-apps/cortex-file-handler/src/index.js b/helper-apps/cortex-file-handler/src/index.js
index d00db8d5..443280fc 100644
--- a/helper-apps/cortex-file-handler/src/index.js
+++ b/helper-apps/cortex-file-handler/src/index.js
@@ -2,6 +2,7 @@ import fs from "fs";
 import os from "os";
 import path from "path";
 import { v4 as uuidv4 } from "uuid";
+import mime from "mime-types";
 
 import { DOC_EXTENSIONS, AZURITE_ACCOUNT_NAME } from "./constants.js";
 import { easyChunker } from "./docHelper.js";
@@ -526,93 +527,117 @@ async function CortexFileHandler(context, req) {
           context.log(`Error ensuring converted version: ${error}`);
         }
 
-        // Attach converted info to response if present
-        if (hashResult.converted) {
-          response.converted = {
-            url: hashResult.converted.url,
-            gcs: hashResult.converted.gcs,
-          };
-        }
-
-        // Always generate short-lived URL for checkHash operations
-        // Use converted URL if available, otherwise use original URL
-        const urlForShortLived = hashResult.converted?.url || hashResult.url;
-        try {
-          // Extract blob name from the URL to generate new SAS token
-          // Container parameter is ignored - always uses default container from env var
-          let blobName;
+        // Add mimeType to converted block if it exists but doesn't have mimeType yet
+        if (hashResult.converted && !hashResult.converted.mimeType) {
+          let convertedMimeType = 'application/octet-stream';
           try {
-            const url = new URL(urlForShortLived);
-            // Extract blob name from the URL path (remove leading slash)
-            let path = url.pathname.substring(1);
-            
-            // For Azurite URLs, the path includes account name: devstoreaccount1/container/blob
-            // For real Azure URLs, the path is: container/blob
-            // Check if this is an Azurite URL (contains devstoreaccount1)
-            if (path.startsWith(`${AZURITE_ACCOUNT_NAME}/`)) {
-              path = path.substring(`${AZURITE_ACCOUNT_NAME}/`.length); // Remove account prefix
-            }
-            
-            // Extract blob name from path (skip container name, always use default container)
-            const pathSegments = path.split('/').filter(segment => segment.length > 0);
-            if (pathSegments.length >= 2) {
-              // Skip container name (first segment), get blob name (remaining segments)
-              blobName = pathSegments.slice(1).join('/');
-            } else if (pathSegments.length === 1) {
-              // Fallback: assume it's just the blob name in default container
-              blobName = pathSegments[0];
+            const convertedUrlObj = new URL(hashResult.converted.url);
+            const convertedPathname = convertedUrlObj.pathname;
+            const convertedExtension = path.extname(convertedPathname);
+            convertedMimeType = mime.lookup(convertedExtension) || 'application/octet-stream';
+          } catch (e) {
+            // If URL parsing fails, try to extract extension from URL string
+            const urlMatch = hashResult.converted.url.match(/\.([a-zA-Z0-9]+)(?:\?|$)/);
+            if (urlMatch) {
+              convertedMimeType = mime.lookup(urlMatch[1]) || 'application/octet-stream';
             }
-            
-          } catch (urlError) {
-            context.log(`Error parsing URL for short-lived generation: ${urlError}`);
           }
+          hashResult.converted.mimeType = convertedMimeType;
+        }
 
-          // Generate short-lived SAS token
-          // Container parameter is ignored - always uses default container from env var
-          if (blobName) {
-            const provider = storageService.primaryProvider;
-            
-            if (provider && provider.generateShortLivedSASToken) {
-              const blobClientResult = await provider.getBlobClient();
-              const containerClient = blobClientResult.containerClient;
-              
-              const sasToken = provider.generateShortLivedSASToken(
-                containerClient, 
-                blobName, 
-                shortLivedDuration
-              );
+        // Generate short-lived URLs for both original and converted files (if converted exists)
+        // Helper function to generate short-lived URL for a given URL
+        const generateShortLivedUrlForUrl = async (urlToProcess) => {
+          if (!urlToProcess) return null;
+          
+          try {
+            // Extract blob name from the URL to generate new SAS token
+            let blobName;
+            try {
+              const url = new URL(urlToProcess);
+              let path = url.pathname.substring(1);
               
-              // Construct new URL with short-lived SAS token
-              const baseUrl = urlForShortLived.split('?')[0]; // Remove existing SAS token
-              const shortLivedUrl = `${baseUrl}?${sasToken}`;
+              // For Azurite URLs, the path includes account name: devstoreaccount1/container/blob
+              // For real Azure URLs, the path is: container/blob
+              if (path.startsWith(`${AZURITE_ACCOUNT_NAME}/`)) {
+                path = path.substring(`${AZURITE_ACCOUNT_NAME}/`.length);
+              }
               
-              // Add short-lived URL to response
-              response.shortLivedUrl = shortLivedUrl;
-              response.expiresInMinutes = shortLivedDuration;
+              const pathSegments = path.split('/').filter(segment => segment.length > 0);
+              if (pathSegments.length >= 2) {
+                blobName = pathSegments.slice(1).join('/');
+              } else if (pathSegments.length === 1) {
+                blobName = pathSegments[0];
+              }
+            } catch (urlError) {
+              context.log(`Error parsing URL for short-lived generation: ${urlError}`);
+              return null;
+            }
+
+            if (blobName) {
+              const provider = storageService.primaryProvider;
               
-              const urlType = hashResult.converted?.url ? 'converted' : 'original';
-              context.log(`Generated short-lived URL for hash: ${hash} using ${urlType} URL (expires in ${shortLivedDuration} minutes)`);
-            } else {
-              // Fallback for storage providers that don't support short-lived tokens
-              response.shortLivedUrl = urlForShortLived;
-              response.expiresInMinutes = shortLivedDuration;
-              const urlType = hashResult.converted?.url ? 'converted' : 'original';
-              context.log(`Storage provider doesn't support short-lived tokens, using ${urlType} URL`);
+              if (provider && provider.generateShortLivedSASToken) {
+                const blobClientResult = await provider.getBlobClient();
+                const containerClient = blobClientResult.containerClient;
+                
+                const sasToken = provider.generateShortLivedSASToken(
+                  containerClient, 
+                  blobName, 
+                  shortLivedDuration
+                );
+                
+                const baseUrl = urlToProcess.split('?')[0];
+                return `${baseUrl}?${sasToken}`;
+              }
             }
-          } else {
-            // If we couldn't extract blob name, use original URL
-            response.shortLivedUrl = urlForShortLived;
-            response.expiresInMinutes = shortLivedDuration;
-            context.log(`Could not extract blob name from URL, using original URL for short-lived`);
+          } catch (error) {
+            context.log(`Error generating short-lived URL: ${error}`);
           }
-        } catch (error) {
-          context.log(`Error generating short-lived URL: ${error}`);
-          // Provide fallback even on error
+          
+          return null;
+        };
+
+        // Generate short-lived URLs for response (not stored in Redis)
+        // Generate short-lived URL for converted file if it exists
+        let convertedShortLivedUrl = null;
+        if (hashResult.converted?.url) {
+          convertedShortLivedUrl = await generateShortLivedUrlForUrl(hashResult.converted.url);
+          if (!convertedShortLivedUrl) {
+            // Fallback to regular URL
+            convertedShortLivedUrl = hashResult.converted.url;
+          }
+          context.log(`Generated shortLivedUrl for converted file`);
+        }
+
+        // Generate short-lived URL for original file (for main response)
+        const urlForShortLived = hashResult.converted?.url || hashResult.url;
+        const mainShortLivedUrl = await generateShortLivedUrlForUrl(urlForShortLived);
+        if (mainShortLivedUrl) {
+          response.shortLivedUrl = mainShortLivedUrl;
+          response.expiresInMinutes = shortLivedDuration;
+          const urlType = hashResult.converted?.url ? 'converted' : 'original';
+          context.log(`Generated short-lived URL for hash: ${hash} using ${urlType} URL (expires in ${shortLivedDuration} minutes)`);
+        } else {
+          // Fallback for storage providers that don't support short-lived tokens
           response.shortLivedUrl = urlForShortLived;
           response.expiresInMinutes = shortLivedDuration;
+          const urlType = hashResult.converted?.url ? 'converted' : 'original';
+          context.log(`Storage provider doesn't support short-lived tokens, using ${urlType} URL`);
         }
 
-        //update redis timestamp with current time
+        // Attach converted info to response if present (include shortLivedUrl in response only)
+        if (hashResult.converted) {
+          response.converted = {
+            url: hashResult.converted.url,
+            shortLivedUrl: convertedShortLivedUrl || hashResult.converted.url,
+            gcs: hashResult.converted.gcs,
+            mimeType: hashResult.converted.mimeType || null,
+          };
+        }
+
+        // Update redis timestamp with current time
+        // Note: setFileStoreMap will remove shortLivedUrl fields before storing
         await setFileStoreMap(hash, hashResult, resolvedContextId);
 
         context.res = {
diff --git a/helper-apps/cortex-file-handler/src/redis.js b/helper-apps/cortex-file-handler/src/redis.js
index f2d6796b..114b8493 100644
--- a/helper-apps/cortex-file-handler/src/redis.js
+++ b/helper-apps/cortex-file-handler/src/redis.js
@@ -238,6 +238,15 @@ const setFileStoreMap = async (hash, value, contextId = null) => {
     // Remove 'message' field - it's only for the upload response, not for persistence
     delete valueToStore.message;
     
+    // Remove shortLivedUrl fields - they're only for responses, not for persistence
+    // Store only permanent URLs (url, gcs, converted.url, converted.gcs)
+    delete valueToStore.shortLivedUrl;
+    if (valueToStore.converted) {
+      const convertedCopy = { ...valueToStore.converted };
+      delete convertedCopy.shortLivedUrl;
+      valueToStore.converted = convertedCopy;
+    }
+    
     // Only set timestamp if one doesn't already exist
     if (!valueToStore.timestamp) {
       valueToStore.timestamp = new Date().toISOString();
diff --git a/helper-apps/cortex-file-handler/src/services/storage/StorageService.js b/helper-apps/cortex-file-handler/src/services/storage/StorageService.js
index 7edb1d16..ddb4f56c 100644
--- a/helper-apps/cortex-file-handler/src/services/storage/StorageService.js
+++ b/helper-apps/cortex-file-handler/src/services/storage/StorageService.js
@@ -350,7 +350,8 @@ export class StorageService {
             convertedResult = {
               url: hashResult.converted.url,
               shortLivedUrl: convertedShortLivedUrl,
-              gcs: hashResult.converted.gcs
+              gcs: hashResult.converted.gcs,
+              mimeType: hashResult.converted.mimeType || null
             };
           } catch (error) {
             context.log?.(`Warning: Failed to update converted file tag: ${error.message}`);

From be9e1aa9c1bbbefdc353392f3bed6b58acbce391 Mon Sep 17 00:00:00 2001
From: Jason McCartney <jmac@theroot.org>
Date: Sat, 20 Dec 2025 22:13:31 -0700
Subject: [PATCH 4/5] feat: enhance file handling for converted files

- Updated `parseRawFileData` to prioritize converted values (URL, GCS, mimeType) when available, ensuring accurate representation of processed content.
- Implemented checks to prevent editing of converted files, providing clear error messages to users attempting to modify them.
- Added tests to verify that file collection correctly uses converted values and enforces editing restrictions on converted files, enhancing overall file management capabilities.
---
 lib/fileUtils.js                              | 38 ++++----
 .../system/entity/tools/sys_tool_editfile.js  | 11 +++
 .../features/tools/fileCollection.test.js     | 86 +++++++++++++++++++
 tests/unit/core/shortLivedUrl.test.js         |  8 +-
 4 files changed, 117 insertions(+), 26 deletions(-)

diff --git a/lib/fileUtils.js b/lib/fileUtils.js
index f4ee8892..0e17d769 100644
--- a/lib/fileUtils.js
+++ b/lib/fileUtils.js
@@ -670,19 +670,28 @@ function parseRawFileData(allFiles, contextKey = null) {
             return null;
         }
         
+        // Use converted URL, GCS, and mimeType if converted block exists
+        // This ensures we use the converted file (the actual processable content) as the primary values
+        // Keep displayFilename as the original (e.g., "foo.docx" even if URL is "foo.md")
+        const url = decryptedData.converted?.url || decryptedData.url;
+        const gcs = decryptedData.converted?.gcs || decryptedData.gcs || null;
+        const mimeType = decryptedData.converted?.mimeType || decryptedData.mimeType || null;
+        
         // Return parsed file data with hash and inCollection preserved for filtering
         return {
             id: decryptedData.id || `${Date.now()}-${Math.random().toString(36).substring(2, 9)}`,
-            url: decryptedData.url,
-            gcs: decryptedData.gcs || null,
+            url: url, // Use converted URL if available
+            gcs: gcs, // Use converted GCS if available
             displayFilename: decryptedData.displayFilename || decryptedData.filename || null,
-            mimeType: decryptedData.mimeType || null,
+            mimeType: mimeType, // Use converted mimeType if available
             tags: decryptedData.tags || [],
             notes: decryptedData.notes || '',
             hash: hash,
             permanent: decryptedData.permanent || false,
             addedDate: decryptedData.addedDate || decryptedData.timestamp || new Date().toISOString(),
             lastAccessed: decryptedData.lastAccessed || decryptedData.timestamp || new Date().toISOString(),
+            // Mark as converted if converted block exists (for edit prevention)
+            ...(decryptedData.converted && { _isConverted: true }),
             // Preserve inCollection for filtering
             inCollection: decryptedData.inCollection
         };
@@ -1614,15 +1623,6 @@ function findFileInCollection(fileParam, collection) {
                 return file;
             }
         }
-        
-        // Also check by CFH-managed filename (case-insensitive, using basename)
-        if (file.filename) {
-            const normalizedFilename = file.filename.toLowerCase();
-            const fileFilename = path.basename(normalizedFilename);
-            if (fileFilename === paramFilename) {
-                return file;
-            }
-        }
     }
 
     // If no exact match, try simple "contains" matches on displayFilename, filename, url, and gcs
@@ -1637,14 +1637,6 @@ function findFileInCollection(fileParam, collection) {
                 }
             }
             
-            // Check if CFH-managed filename contains the parameter
-            if (file.filename) {
-                const normalizedFilename = file.filename.toLowerCase();
-                if (normalizedFilename.includes(normalizedParam)) {
-                    return file;
-                }
-            }
-            
             // Check if URL contains the parameter
             if (file.url) {
                 const normalizedUrl = file.url.toLowerCase();
@@ -1871,7 +1863,7 @@ async function checkHashExists(hash, fileHandlerUrl, pathwayResolver = null, con
             
             return {
                 url: url, // Prefers converted.shortLivedUrl (for LLM processing), then converted.url, then original shortLivedUrl/url
-                gcs: gcs, // GCS URL (prefers converted, no short-lived version for GCS)
+                gcs: gcs, // GCS URL (prefers converted if available, no short-lived version for GCS)
                 hash: data.hash || hash,
                 filename: data.filename || null // Include filename from response
             };
@@ -1926,7 +1918,7 @@ async function ensureShortLivedUrl(fileObject, fileHandlerUrl, contextId = null,
             return {
                 ...fileObject,
                 url: resolved.url, // Prefers converted.shortLivedUrl (for LLM processing), then converted.url, then original shortLivedUrl/url
-                gcs: resolved.gcs || fileObject.gcs || null, // GCS from checkHash (prefers converted)
+                gcs: resolved.gcs || fileObject.gcs || null, // GCS from checkHash (prefers converted if available)
                 filename: fileObject.filename || resolved.filename || fileObject.filename // Preserve original, fallback to resolved
             };
         }
@@ -2223,7 +2215,7 @@ async function resolveFileHashesToContent(fileHashes, config, contextId = null)
                 if (existingFile) {
                     // checkHashExists already returns:
                     // - converted.shortLivedUrl (preferred) or converted.url or original shortLivedUrl/url in url field
-                    // - GCS URL (prefers converted) in gcs field
+                    // - GCS URL (prefers converted if available) in gcs field
                     // - filename in filename field
                             return JSON.stringify({
                                 type: "image_url",
diff --git a/pathways/system/entity/tools/sys_tool_editfile.js b/pathways/system/entity/tools/sys_tool_editfile.js
index 82be8795..067d3104 100644
--- a/pathways/system/entity/tools/sys_tool_editfile.js
+++ b/pathways/system/entity/tools/sys_tool_editfile.js
@@ -255,6 +255,16 @@ export default {
                 return JSON.stringify(errorResult);
             }
             
+            // Prevent editing converted files (they can be read but not edited)
+            if (foundFile._isConverted) {
+                const errorResult = {
+                    success: false,
+                    error: `Cannot edit converted files. The file "${foundFile.displayFilename || file}" is a converted version and cannot be edited. You can read it using ReadTextFile, but to edit it you would need to edit the original file.`
+                };
+                resolver.tool = JSON.stringify({ toolUsed: toolName });
+                return JSON.stringify(errorResult);
+            }
+            
             const fileId = foundFile.id;
             
             // Serialize edits to this file (prevents concurrent edits on same instance)
@@ -283,6 +293,7 @@ export default {
                         return { jsonResult: JSON.stringify(errorResult) };
                     }
                     
+                    // Use the file URL (already uses converted URL if it exists)
                     const fileUrl = currentFile.url;
                 
                     if (!fileUrl) {
diff --git a/tests/integration/features/tools/fileCollection.test.js b/tests/integration/features/tools/fileCollection.test.js
index 6f2f2f54..f813cc1a 100644
--- a/tests/integration/features/tools/fileCollection.test.js
+++ b/tests/integration/features/tools/fileCollection.test.js
@@ -2209,3 +2209,89 @@ test('Converted files: Multiple converted files with different extensions', asyn
         await cleanup(contextId);
     }
 });
+
+test('Converted files: loadFileCollection should use converted values as primary (no converted block in response)', async t => {
+    const contextId = createTestContext();
+    
+    try {
+        // Write a file with converted block directly to Redis (simulating CFH behavior)
+        // The converted block exists in Redis, but is not exposed in the file collection response
+        const { getRedisClient, writeFileDataToRedis } = await import('../../../../lib/fileUtils.js');
+        const redisClient = await getRedisClient();
+        
+        if (!redisClient) {
+            t.skip('Redis not available');
+            return;
+        }
+        
+        const hash = 'test-converted-hash-123';
+        const fileData = {
+            id: 'test-file-id',
+            url: 'https://example.com/original.docx',
+            gcs: 'gs://bucket/original.docx',
+            filename: 'original.docx',
+            displayFilename: 'original.docx',
+            mimeType: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
+            hash: hash,
+            permanent: false,
+            timestamp: new Date().toISOString(),
+            // Include converted block in Redis (as CFH would write it)
+            // This will be used to set primary values, but the block itself is not exposed
+            converted: {
+                url: 'https://example.com/converted.md',
+                gcs: 'gs://bucket/converted.md',
+                mimeType: 'text/markdown'
+            },
+            inCollection: ['*']
+        };
+        
+        const contextMapKey = `FileStoreMap:ctx:${contextId}`;
+        await writeFileDataToRedis(redisClient, contextMapKey, hash, fileData, null);
+        
+        // Load the collection
+        const collection = await loadFileCollection(contextId, null, false);
+        t.is(collection.length, 1);
+        
+        const file = collection[0];
+        
+        // CRITICAL: Main URL, GCS, and mimeType should use converted values as primary
+        // The converted block is NOT included in the response - only the _isConverted flag
+        t.is(file.url, 'https://example.com/converted.md', 'Main URL should be converted URL');
+        t.is(file.gcs, 'gs://bucket/converted.md', 'Main GCS should be converted GCS');
+        t.is(file.mimeType, 'text/markdown', 'Main mimeType should be converted mimeType');
+        t.is(file.displayFilename, 'original.docx', 'displayFilename should preserve original filename');
+        
+        // Verify converted block is NOT included in file collection response
+        t.falsy(file.converted, 'File should NOT have converted block in collection response');
+        t.truthy(file._isConverted, 'File should be marked as converted');
+        
+        // Verify we can match by displayFilename (original filename)
+        const { findFileInCollection } = await import('../../../../lib/fileUtils.js');
+        const matchedByDisplayFilename = findFileInCollection('original.docx', collection);
+        t.truthy(matchedByDisplayFilename, 'Should match file by original displayFilename');
+        
+        // Verify resolveFileParameter returns converted URL (now the main URL)
+        const { resolveFileParameter } = await import('../../../../lib/fileUtils.js');
+        const resolvedUrl = await resolveFileParameter('original.docx', contextId);
+        t.is(resolvedUrl, 'https://example.com/converted.md', 'Should resolve to converted URL (now main URL)');
+        
+        // Verify converted files can be read (text type)
+        const { isTextMimeType } = await import('../../../../lib/fileUtils.js');
+        t.true(isTextMimeType(file.mimeType), 'Converted file should be recognized as text type');
+        
+        // Verify converted files cannot be edited
+        const editResult = await callPathway('sys_tool_editfile', {
+            contextId,
+            file: 'original.docx',
+            startLine: 1,
+            endLine: 1,
+            content: 'test',
+            userMessage: 'Try to edit converted file'
+        });
+        const editParsed = JSON.parse(editResult);
+        t.is(editParsed.success, false, 'Should not allow editing converted files');
+        t.true(editParsed.error.includes('converted') || editParsed.error.includes('Cannot edit'), 'Error should mention converted files cannot be edited');
+    } finally {
+        await cleanup(contextId);
+    }
+});
diff --git a/tests/unit/core/shortLivedUrl.test.js b/tests/unit/core/shortLivedUrl.test.js
index 9bbe9f7c..9cff7a0d 100644
--- a/tests/unit/core/shortLivedUrl.test.js
+++ b/tests/unit/core/shortLivedUrl.test.js
@@ -74,9 +74,10 @@ test('checkHashExists should prefer converted URL in shortLivedUrl', async t =>
         status: 200,
         data: {
             url: 'https://storage.example.com/file.xlsx?sv=2023-11-03&se=2025-01-01T00:00:00Z&sig=long-lived',
-            shortLivedUrl: 'https://storage.example.com/file.csv?sv=2023-11-03&se=2024-01-01T10:15:00Z&sig=short-lived',
+            shortLivedUrl: 'https://storage.example.com/file.xlsx?sv=2023-11-03&se=2024-01-01T10:15:00Z&sig=short-lived',
             converted: {
                 url: 'https://storage.example.com/file.csv?sv=2023-11-03&se=2025-01-01T00:00:00Z&sig=long-lived',
+                shortLivedUrl: 'https://storage.example.com/file.csv?sv=2023-11-03&se=2024-01-01T10:15:00Z&sig=short-lived',
                 gcs: 'gs://bucket/file.csv'
             },
             gcs: 'gs://bucket/file.xlsx',
@@ -89,8 +90,9 @@ test('checkHashExists should prefer converted URL in shortLivedUrl', async t =>
     const result = await checkHashExists(hash, fileHandlerUrl);
 
     t.truthy(result);
-    // shortLivedUrl should be based on converted file
-    t.is(result.url, mockResponse.data.shortLivedUrl, 'Should use shortLivedUrl (which prefers converted)');
+    // Should prefer converted.shortLivedUrl first, then converted.url, then original shortLivedUrl/url
+    // Since converted.shortLivedUrl exists, it should be used
+    t.is(result.url, mockResponse.data.converted.shortLivedUrl, 'Should prefer converted.shortLivedUrl');
     // GCS should prefer converted
     t.is(result.gcs, mockResponse.data.converted.gcs, 'Should prefer converted GCS URL');
 });

From 679ba5169d3402aa612e8f00be0c17ef9f0ab372 Mon Sep 17 00:00:00 2001
From: Jason McCartney <jmac@theroot.org>
Date: Sun, 21 Dec 2025 08:41:53 -0700
Subject: [PATCH 5/5] CR feedback

---
 .../cortex-file-handler/src/blobHandler.js    | 142 +++++++++---------
 helper-apps/cortex-file-handler/src/index.js  |  17 +--
 .../tests/setRetention.test.js                |   6 +-
 .../system/entity/tools/sys_tool_editfile.js  |   2 +-
 4 files changed, 77 insertions(+), 90 deletions(-)

diff --git a/helper-apps/cortex-file-handler/src/blobHandler.js b/helper-apps/cortex-file-handler/src/blobHandler.js
index 0d9f2032..07becac5 100644
--- a/helper-apps/cortex-file-handler/src/blobHandler.js
+++ b/helper-apps/cortex-file-handler/src/blobHandler.js
@@ -177,6 +177,64 @@ async function downloadFromGCS(gcsUrl, destinationPath) {
   }
 }
 
+/**
+ * Extracts MIME type from a URL based on file extension
+ * @param {string} url - The URL to extract MIME type from
+ * @returns {string} The MIME type or 'application/octet-stream' as fallback
+ */
+function getMimeTypeFromUrl(url) {
+  const defaultMimeType = 'application/octet-stream';
+  if (!url) return defaultMimeType;
+
+  try {
+    const urlObj = new URL(url);
+    const pathname = urlObj.pathname;
+    const extension = path.extname(pathname);
+    return mime.lookup(extension) || defaultMimeType;
+  } catch (e) {
+    // If URL parsing fails, try to extract extension from URL string
+    const urlMatch = url.match(/\.([a-zA-Z0-9]+)(?:\?|$)/);
+    if (urlMatch) {
+      return mime.lookup(urlMatch[1]) || defaultMimeType;
+    }
+    return defaultMimeType;
+  }
+}
+
+/**
+ * Generates a short-lived SAS URL for a converted file
+ * @param {object} context - The request context for logging
+ * @param {string} convertedUrl - The URL of the converted file
+ * @param {string} [logSuffix=''] - Optional suffix for log messages
+ * @returns {Promise<string>} The short-lived URL or the original URL as fallback
+ */
+async function generateShortLivedUrlForConvertedFile(context, convertedUrl, logSuffix = '') {
+  let shortLivedUrl = convertedUrl; // Fallback to regular URL
+  try {
+    const storageFactory = StorageFactory.getInstance();
+    const primaryProvider = await storageFactory.getAzureProvider();
+    if (primaryProvider.generateShortLivedSASToken && primaryProvider.extractBlobNameFromUrl) {
+      const blobName = primaryProvider.extractBlobNameFromUrl(convertedUrl);
+      if (blobName) {
+        const { containerClient } = await primaryProvider.getBlobClient();
+        const sasToken = primaryProvider.generateShortLivedSASToken(
+          containerClient,
+          blobName,
+          5
+        );
+        const urlObj = new URL(convertedUrl);
+        const baseUrl = `${urlObj.protocol}//${urlObj.host}${urlObj.pathname}`;
+        shortLivedUrl = `${baseUrl}?${sasToken}`;
+        context.log(`Generated shortLivedUrl for converted file${logSuffix}`);
+      }
+    }
+  } catch (error) {
+    context.log(`Warning: Could not generate shortLivedUrl for converted file: ${error.message}`);
+    // Fallback to regular URL
+  }
+  return shortLivedUrl;
+}
+
 export const getBlobClient = async () => {
   const connectionString = process.env.AZURE_STORAGE_CONNECTION_STRING;
   // Always use default container from env var
@@ -599,44 +657,14 @@ function uploadBlob(
                     }
 
                     // Generate shortLivedUrl for converted file
-                    let convertedShortLivedUrl = convertedSaveResult.url; // Fallback to regular URL
-                    try {
-                      const storageFactory = StorageFactory.getInstance();
-                      const primaryProvider = await storageFactory.getAzureProvider();
-                      if (primaryProvider.generateShortLivedSASToken && primaryProvider.extractBlobNameFromUrl) {
-                        const convertedBlobName = primaryProvider.extractBlobNameFromUrl(convertedSaveResult.url);
-                        if (convertedBlobName) {
-                          const { containerClient } = await primaryProvider.getBlobClient();
-                          const convertedShortLivedSasToken = primaryProvider.generateShortLivedSASToken(
-                            containerClient,
-                            convertedBlobName,
-                            5
-                          );
-                          const convertedUrlObj = new URL(convertedSaveResult.url);
-                          const convertedBaseUrl = `${convertedUrlObj.protocol}//${convertedUrlObj.host}${convertedUrlObj.pathname}`;
-                          convertedShortLivedUrl = `${convertedBaseUrl}?${convertedShortLivedSasToken}`;
-                          context.log("Generated shortLivedUrl for converted file (busboy)");
-                        }
-                      }
-                    } catch (error) {
-                      context.log(`Warning: Could not generate shortLivedUrl for converted file: ${error.message}`);
-                      // Fallback to regular URL
-                    }
+                    const convertedShortLivedUrl = await generateShortLivedUrlForConvertedFile(
+                      context,
+                      convertedSaveResult.url,
+                      ' (busboy)'
+                    );
 
                     // Determine MIME type of converted file from its URL
-                    let convertedMimeType = 'application/octet-stream';
-                    try {
-                      const convertedUrlObj = new URL(convertedSaveResult.url);
-                      const convertedPathname = convertedUrlObj.pathname;
-                      const convertedExtension = path.extname(convertedPathname);
-                      convertedMimeType = mime.lookup(convertedExtension) || 'application/octet-stream';
-                    } catch (e) {
-                      // If URL parsing fails, try to extract extension from URL string
-                      const urlMatch = convertedSaveResult.url.match(/\.([a-zA-Z0-9]+)(?:\?|$)/);
-                      if (urlMatch) {
-                        convertedMimeType = mime.lookup(urlMatch[1]) || 'application/octet-stream';
-                      }
-                    }
+                    const convertedMimeType = getMimeTypeFromUrl(convertedSaveResult.url);
 
                     // Attach to response body
                     result.converted = {
@@ -943,44 +971,13 @@ async function uploadFile(
           }
 
           // Generate shortLivedUrl for converted file
-          let convertedShortLivedUrl = convertedSaveResult.url; // Fallback to regular URL
-          try {
-            const storageFactory = StorageFactory.getInstance();
-            const primaryProvider = await storageFactory.getAzureProvider();
-            if (primaryProvider.generateShortLivedSASToken && primaryProvider.extractBlobNameFromUrl) {
-              const convertedBlobName = primaryProvider.extractBlobNameFromUrl(convertedSaveResult.url);
-              if (convertedBlobName) {
-                const { containerClient } = await primaryProvider.getBlobClient();
-                const convertedShortLivedSasToken = primaryProvider.generateShortLivedSASToken(
-                  containerClient,
-                  convertedBlobName,
-                  5
-                );
-                const convertedUrlObj = new URL(convertedSaveResult.url);
-                const convertedBaseUrl = `${convertedUrlObj.protocol}//${convertedUrlObj.host}${convertedUrlObj.pathname}`;
-                convertedShortLivedUrl = `${convertedBaseUrl}?${convertedShortLivedSasToken}`;
-                context.log("Generated shortLivedUrl for converted file");
-              }
-            }
-          } catch (error) {
-            context.log(`Warning: Could not generate shortLivedUrl for converted file: ${error.message}`);
-            // Fallback to regular URL
-          }
+          const convertedShortLivedUrl = await generateShortLivedUrlForConvertedFile(
+            context,
+            convertedSaveResult.url
+          );
 
           // Determine MIME type of converted file from its URL
-          let convertedMimeType = 'application/octet-stream';
-          try {
-            const convertedUrlObj = new URL(convertedSaveResult.url);
-            const convertedPathname = convertedUrlObj.pathname;
-            const convertedExtension = path.extname(convertedPathname);
-            convertedMimeType = mime.lookup(convertedExtension) || 'application/octet-stream';
-          } catch (e) {
-            // If URL parsing fails, try to extract extension from URL string
-            const urlMatch = convertedSaveResult.url.match(/\.([a-zA-Z0-9]+)(?:\?|$)/);
-            if (urlMatch) {
-              convertedMimeType = mime.lookup(urlMatch[1]) || 'application/octet-stream';
-            }
-          }
+          const convertedMimeType = getMimeTypeFromUrl(convertedSaveResult.url);
 
           // Add converted file info to result
           result.converted = {
@@ -1282,6 +1279,7 @@ export {
   gcs,
   uploadChunkToGCS,
   downloadFromGCS,
+  getMimeTypeFromUrl,
   // Re-export container constants
   getDefaultContainerName,
   GCS_BUCKETNAME,
diff --git a/helper-apps/cortex-file-handler/src/index.js b/helper-apps/cortex-file-handler/src/index.js
index 443280fc..c7c12344 100644
--- a/helper-apps/cortex-file-handler/src/index.js
+++ b/helper-apps/cortex-file-handler/src/index.js
@@ -18,7 +18,7 @@ import {
 } from "./redis.js";
 import { FileConversionService } from "./services/FileConversionService.js";
 import { StorageService } from "./services/storage/StorageService.js";
-import { uploadBlob } from "./blobHandler.js";
+import { uploadBlob, getMimeTypeFromUrl } from "./blobHandler.js";
 import { generateShortId } from "./utils/filenameUtils.js";
 import { redactContextId, redactSasToken, sanitizeForLogging } from "./utils/logSecurity.js";
 
@@ -529,20 +529,7 @@ async function CortexFileHandler(context, req) {
 
         // Add mimeType to converted block if it exists but doesn't have mimeType yet
         if (hashResult.converted && !hashResult.converted.mimeType) {
-          let convertedMimeType = 'application/octet-stream';
-          try {
-            const convertedUrlObj = new URL(hashResult.converted.url);
-            const convertedPathname = convertedUrlObj.pathname;
-            const convertedExtension = path.extname(convertedPathname);
-            convertedMimeType = mime.lookup(convertedExtension) || 'application/octet-stream';
-          } catch (e) {
-            // If URL parsing fails, try to extract extension from URL string
-            const urlMatch = hashResult.converted.url.match(/\.([a-zA-Z0-9]+)(?:\?|$)/);
-            if (urlMatch) {
-              convertedMimeType = mime.lookup(urlMatch[1]) || 'application/octet-stream';
-            }
-          }
-          hashResult.converted.mimeType = convertedMimeType;
+          hashResult.converted.mimeType = getMimeTypeFromUrl(hashResult.converted.url);
         }
 
         // Generate short-lived URLs for both original and converted files (if converted exists)
diff --git a/helper-apps/cortex-file-handler/tests/setRetention.test.js b/helper-apps/cortex-file-handler/tests/setRetention.test.js
index 22cdf418..356017e0 100644
--- a/helper-apps/cortex-file-handler/tests/setRetention.test.js
+++ b/helper-apps/cortex-file-handler/tests/setRetention.test.js
@@ -313,7 +313,8 @@ test.serial("should update Redis map with retention information", async (t) => {
     const newEntry = await getFileStoreMap(testHash);
     t.truthy(newEntry, "Redis entry should still exist after setting retention");
     t.is(newEntry.url, retentionResponse.data.url, "Entry should have correct URL");
-    t.truthy(newEntry.shortLivedUrl, "Entry should have shortLivedUrl");
+    // Note: shortLivedUrl is intentionally NOT stored in Redis (stripped before persistence)
+    // It's only returned in the response, which is checked above
     t.is(newEntry.permanent, true, "Entry should have permanent=true in Redis (matches file collection logic)");
 
   } finally {
@@ -551,7 +552,8 @@ test.serial("should set retention for context-scoped file", async (t) => {
     // Verify Redis entry was updated with context-scoped key
     const updatedEntry = await getFileStoreMap(testHash, false, contextId);
     t.truthy(updatedEntry, "Should have updated entry in Redis");
-    t.truthy(updatedEntry.shortLivedUrl, "Should have shortLivedUrl in Redis entry");
+    // Note: shortLivedUrl is intentionally NOT stored in Redis (stripped before persistence)
+    // It's only returned in the response, which is checked above
     t.is(updatedEntry.permanent, true, "Entry should have permanent=true in Redis (matches file collection logic)");
 
     // Wait for operations to complete
diff --git a/pathways/system/entity/tools/sys_tool_editfile.js b/pathways/system/entity/tools/sys_tool_editfile.js
index 067d3104..193dae9c 100644
--- a/pathways/system/entity/tools/sys_tool_editfile.js
+++ b/pathways/system/entity/tools/sys_tool_editfile.js
@@ -2,7 +2,7 @@
 // Entity tool that modifies existing files by replacing line ranges or exact string matches
 import logger from '../../../../lib/logger.js';
 import { axios } from '../../../../lib/requestExecutor.js';
-import { uploadFileToCloud, findFileInCollection, loadFileCollection, getMimeTypeFromFilename, resolveFileParameter, deleteFileByHash, isTextMimeType, updateFileMetadata, writeFileDataToRedis, invalidateFileCollectionCache, getActualContentMimeType } from '../../../../lib/fileUtils.js';
+import { uploadFileToCloud, findFileInCollection, loadFileCollection, getMimeTypeFromFilename, deleteFileByHash, isTextMimeType, updateFileMetadata, writeFileDataToRedis, invalidateFileCollectionCache, getActualContentMimeType } from '../../../../lib/fileUtils.js';
 
 // Maximum file size for editing (50MB) - prevents memory blowup on huge files
 const MAX_EDITABLE_FILE_SIZE = 50 * 1024 * 1024;