diff --git a/openspec/specs/telegram-markdown-rendering/spec.md b/openspec/specs/telegram-markdown-rendering/spec.md new file mode 100644 index 00000000..57544a68 --- /dev/null +++ b/openspec/specs/telegram-markdown-rendering/spec.md @@ -0,0 +1,76 @@ +## Purpose + +Convert standard Markdown output from AI providers to Telegram-compatible HTML so that messages rendered in Telegram display proper formatting (bold headers, italic text, code blocks, links) instead of raw markdown syntax. Includes a fallback to plain text if HTML parsing fails. + +## Requirements + +### Requirement: Markdown-to-Telegram-HTML converter +The system SHALL provide a `markdownToTelegramHtml(md: string): string` function in `src/connectors/telegram/markdown-html.ts` that converts standard Markdown to Telegram HTML format. + +Supported conversions: +- `# Header` through `###### Header` → `Header` +- `**bold**` and `__bold__` → `bold` +- `*italic*` and `_italic_` → `italic` +- `~~strikethrough~~` → `strikethrough` +- `` `inline code` `` → `inline code` +- Fenced code blocks (` ```lang\n...\n``` `) → `
...
` +- `[text](url)` → `text` + +#### Scenario: Headers converted to bold +- **WHEN** input contains `## Market Summary` +- **THEN** output SHALL contain `Market Summary` + +#### Scenario: Bold and italic preserved +- **WHEN** input contains `**important** and *emphasis*` +- **THEN** output SHALL contain `important and emphasis` + +#### Scenario: Code blocks preserved +- **WHEN** input contains a fenced code block with content `const x = 1` +- **THEN** output SHALL contain `
const x = 1
` with HTML entities escaped inside + +#### Scenario: Links converted +- **WHEN** input contains `[Google](https://google.com)` +- **THEN** output SHALL contain `Google` + +### Requirement: HTML entity escaping +The converter SHALL escape `<`, `>`, and `&` in non-code text to `<`, `>`, and `&` before applying formatting transformations. Code blocks and inline code SHALL also have their content escaped to prevent HTML injection. + +#### Scenario: Angle brackets escaped +- **WHEN** input contains `price < 100 && price > 50` +- **THEN** output SHALL contain `price < 100 && price > 50` + +#### Scenario: Code content escaped +- **WHEN** input contains `` `
hello
` `` +- **THEN** output SHALL contain `<div>hello</div>` + +### Requirement: TelegramConnector uses HTML parse mode +`TelegramConnector.send()` SHALL convert message text through `markdownToTelegramHtml()` and send with `parse_mode: 'HTML'`. If the Telegram API rejects the HTML (parse error), the system SHALL fall back to sending as plain text without `parse_mode`. + +#### Scenario: Formatted message sent +- **WHEN** `send({ text: '## Title\n**bold**' })` is called +- **THEN** `bot.api.sendMessage` SHALL be called with `parse_mode: 'HTML'` and converted HTML content + +#### Scenario: HTML parse failure fallback +- **WHEN** Telegram API rejects the HTML content +- **THEN** the system SHALL retry `sendMessage` without `parse_mode` (plain text) + +### Requirement: TelegramPlugin uses HTML parse mode +All outbound message methods in `TelegramPlugin` SHALL use the converter: +- `sendReply(chatId, text)` — converts text to HTML, sends with `parse_mode: 'HTML'` +- `sendReplyWithPlaceholder(chatId, text, media, placeholderId)` — converts text, edits placeholder with HTML, sends remaining chunks with HTML +- Both methods SHALL fall back to plain text on parse errors + +#### Scenario: Direct chat reply formatted +- **WHEN** the AI responds with markdown in a Telegram chat +- **THEN** the reply SHALL be sent with `parse_mode: 'HTML'` and proper formatting + +#### Scenario: Placeholder edit with HTML +- **WHEN** `sendReplyWithPlaceholder` edits the `...` placeholder message +- **THEN** `editMessageText` SHALL include `parse_mode: 'HTML'` + +### Requirement: Chunking compatibility +The `splitMessage()` function SHALL work correctly with HTML-formatted text. The system SHALL convert markdown to HTML before chunking, so that HTML tags are not split across chunks. + +#### Scenario: Long HTML message chunked +- **WHEN** a converted HTML message exceeds 4096 characters +- **THEN** the system SHALL split at newlines or spaces (not inside HTML tags) and send each chunk with `parse_mode: 'HTML'` diff --git a/src/connectors/telegram/markdown-html.ts b/src/connectors/telegram/markdown-html.ts new file mode 100644 index 00000000..a30fd965 --- /dev/null +++ b/src/connectors/telegram/markdown-html.ts @@ -0,0 +1,71 @@ +/** + * Convert standard Markdown to Telegram-compatible HTML. + * + * Telegram HTML supports: , , , , ,
, .
+ * Headers, tables, and other advanced markdown are converted to approximate
+ * equivalents (bold headers, preformatted tables).
+ */
+
+const PLACEHOLDER = '\x00CB'
+
+interface CodeBlock {
+  placeholder: string
+  html: string
+}
+
+/**
+ * Convert markdown text to Telegram HTML format.
+ * Returns a string safe to use with parse_mode: 'HTML'.
+ */
+export function markdownToTelegramHtml(md: string): string {
+  const blocks: CodeBlock[] = []
+  let blockIdx = 0
+
+  // 1. Extract fenced code blocks before any escaping
+  let text = md.replace(/```(\w*)\n([\s\S]*?)```/g, (_match, _lang, code) => {
+    const ph = `${PLACEHOLDER}${blockIdx++}${PLACEHOLDER}`
+    blocks.push({ placeholder: ph, html: `
${escapeHtml(code.trimEnd())}
` }) + return ph + }) + + // 2. Extract inline code + text = text.replace(/`([^`\n]+)`/g, (_match, code) => { + const ph = `${PLACEHOLDER}${blockIdx++}${PLACEHOLDER}` + blocks.push({ placeholder: ph, html: `${escapeHtml(code)}` }) + return ph + }) + + // 3. Escape HTML entities in remaining text + text = escapeHtml(text) + + // 4. Headers → bold (## Header → \nHeader\n) + text = text.replace(/^#{1,6}\s+(.+)$/gm, '$1') + + // 5. Bold: **text** or __text__ + text = text.replace(/\*\*(.+?)\*\*/g, '$1') + text = text.replace(/__(.+?)__/g, '$1') + + // 6. Italic: *text* or _text_ (not inside bold markers) + text = text.replace(/(?$1
') + text = text.replace(/(?$1') + + // 7. Strikethrough: ~~text~~ + text = text.replace(/~~(.+?)~~/g, '$1') + + // 8. Links: [text](url) + text = text.replace(/\[([^\]]+)\]\(([^)]+)\)/g, '$1') + + // 9. Restore code blocks + for (const block of blocks) { + text = text.replace(block.placeholder, block.html) + } + + return text.trim() +} + +function escapeHtml(s: string): string { + return s + .replace(/&/g, '&') + .replace(//g, '>') +} diff --git a/src/connectors/telegram/telegram-connector.ts b/src/connectors/telegram/telegram-connector.ts index 90ca452d..a451d604 100644 --- a/src/connectors/telegram/telegram-connector.ts +++ b/src/connectors/telegram/telegram-connector.ts @@ -12,6 +12,7 @@ import { readFile } from 'node:fs/promises' import { Bot, InputFile } from 'grammy' import type { Connector, ConnectorCapabilities, SendPayload, SendResult } from '../types.js' +import { markdownToTelegramHtml } from './markdown-html.js' export const MAX_MESSAGE_LENGTH = 4096 @@ -40,11 +41,17 @@ export class TelegramConnector implements Connector { } } - // Send text with chunking + // Send text with chunking + HTML formatting if (payload.text) { - const chunks = splitMessage(payload.text, MAX_MESSAGE_LENGTH) + const html = markdownToTelegramHtml(payload.text) + const chunks = splitMessage(html, MAX_MESSAGE_LENGTH) for (const chunk of chunks) { - await this.bot.api.sendMessage(this.chatId, chunk) + try { + await this.bot.api.sendMessage(this.chatId, chunk, { parse_mode: 'HTML' }) + } catch { + // Fallback to plain text if HTML parsing fails + await this.bot.api.sendMessage(this.chatId, chunk) + } } } diff --git a/src/connectors/telegram/telegram-plugin.ts b/src/connectors/telegram/telegram-plugin.ts index ad6756d7..bcdccf7d 100644 --- a/src/connectors/telegram/telegram-plugin.ts +++ b/src/connectors/telegram/telegram-plugin.ts @@ -13,6 +13,7 @@ import { forceCompact } from '../../core/compaction' import { readAIBackend, writeAIBackend, type AIBackend } from '../../core/config' import type { ConnectorCenter } from '../../core/connector-center.js' import { TelegramConnector, splitMessage, MAX_MESSAGE_LENGTH } from './telegram-connector.js' +import { markdownToTelegramHtml } from './markdown-html.js' const BACKEND_LABELS: Record = { 'claude-code': 'Claude Code', @@ -372,16 +373,26 @@ export class TelegramPlugin implements Plugin { // Send text — edit placeholder for first chunk, send the rest as new messages if (text) { - const chunks = splitMessage(text, MAX_MESSAGE_LENGTH) + const html = markdownToTelegramHtml(text) + const chunks = splitMessage(html, MAX_MESSAGE_LENGTH) let startIdx = 0 if (placeholderMsgId && chunks.length > 0) { - const edited = await this.bot!.api.editMessageText(chatId, placeholderMsgId, chunks[0]).then(() => true).catch(() => false) + const edited = await this.bot!.api.editMessageText(chatId, placeholderMsgId, chunks[0], { parse_mode: 'HTML' }) + .then(() => true) + .catch(async () => { + // Fallback: try without parse_mode + return this.bot!.api.editMessageText(chatId, placeholderMsgId!, chunks[0]).then(() => true).catch(() => false) + }) if (edited) startIdx = 1 } for (let i = startIdx; i < chunks.length; i++) { - await this.bot!.api.sendMessage(chatId, chunks[i]) + try { + await this.bot!.api.sendMessage(chatId, chunks[i], { parse_mode: 'HTML' }) + } catch { + await this.bot!.api.sendMessage(chatId, chunks[i]) + } } // Placeholder was edited — done @@ -396,9 +407,14 @@ export class TelegramPlugin implements Plugin { private async sendReply(chatId: number, text: string) { if (text) { - const chunks = splitMessage(text, MAX_MESSAGE_LENGTH) + const html = markdownToTelegramHtml(text) + const chunks = splitMessage(html, MAX_MESSAGE_LENGTH) for (const chunk of chunks) { - await this.bot!.api.sendMessage(chatId, chunk) + try { + await this.bot!.api.sendMessage(chatId, chunk, { parse_mode: 'HTML' }) + } catch { + await this.bot!.api.sendMessage(chatId, chunk) + } } } }