Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 76 additions & 0 deletions openspec/specs/telegram-markdown-rendering/spec.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
## Purpose

Convert standard Markdown output from AI providers to Telegram-compatible HTML so that messages rendered in Telegram display proper formatting (bold headers, italic text, code blocks, links) instead of raw markdown syntax. Includes a fallback to plain text if HTML parsing fails.

## Requirements

### Requirement: Markdown-to-Telegram-HTML converter
The system SHALL provide a `markdownToTelegramHtml(md: string): string` function in `src/connectors/telegram/markdown-html.ts` that converts standard Markdown to Telegram HTML format.

Supported conversions:
- `# Header` through `###### Header` → `<b>Header</b>`
- `**bold**` and `__bold__` → `<b>bold</b>`
- `*italic*` and `_italic_` → `<i>italic</i>`
- `~~strikethrough~~` → `<s>strikethrough</s>`
- `` `inline code` `` → `<code>inline code</code>`
- Fenced code blocks (` ```lang\n...\n``` `) → `<pre>...</pre>`
- `[text](url)` → `<a href="url">text</a>`

#### Scenario: Headers converted to bold
- **WHEN** input contains `## Market Summary`
- **THEN** output SHALL contain `<b>Market Summary</b>`

#### Scenario: Bold and italic preserved
- **WHEN** input contains `**important** and *emphasis*`
- **THEN** output SHALL contain `<b>important</b> and <i>emphasis</i>`

#### Scenario: Code blocks preserved
- **WHEN** input contains a fenced code block with content `const x = 1`
- **THEN** output SHALL contain `<pre>const x = 1</pre>` with HTML entities escaped inside

#### Scenario: Links converted
- **WHEN** input contains `[Google](https://google.com)`
- **THEN** output SHALL contain `<a href="https://google.com">Google</a>`

### Requirement: HTML entity escaping
The converter SHALL escape `<`, `>`, and `&` in non-code text to `&lt;`, `&gt;`, and `&amp;` before applying formatting transformations. Code blocks and inline code SHALL also have their content escaped to prevent HTML injection.

#### Scenario: Angle brackets escaped
- **WHEN** input contains `price < 100 && price > 50`
- **THEN** output SHALL contain `price &lt; 100 &amp;&amp; price &gt; 50`

#### Scenario: Code content escaped
- **WHEN** input contains `` `<div>hello</div>` ``
- **THEN** output SHALL contain `<code>&lt;div&gt;hello&lt;/div&gt;</code>`

### Requirement: TelegramConnector uses HTML parse mode
`TelegramConnector.send()` SHALL convert message text through `markdownToTelegramHtml()` and send with `parse_mode: 'HTML'`. If the Telegram API rejects the HTML (parse error), the system SHALL fall back to sending as plain text without `parse_mode`.

#### Scenario: Formatted message sent
- **WHEN** `send({ text: '## Title\n**bold**' })` is called
- **THEN** `bot.api.sendMessage` SHALL be called with `parse_mode: 'HTML'` and converted HTML content

#### Scenario: HTML parse failure fallback
- **WHEN** Telegram API rejects the HTML content
- **THEN** the system SHALL retry `sendMessage` without `parse_mode` (plain text)

### Requirement: TelegramPlugin uses HTML parse mode
All outbound message methods in `TelegramPlugin` SHALL use the converter:
- `sendReply(chatId, text)` — converts text to HTML, sends with `parse_mode: 'HTML'`
- `sendReplyWithPlaceholder(chatId, text, media, placeholderId)` — converts text, edits placeholder with HTML, sends remaining chunks with HTML
- Both methods SHALL fall back to plain text on parse errors

#### Scenario: Direct chat reply formatted
- **WHEN** the AI responds with markdown in a Telegram chat
- **THEN** the reply SHALL be sent with `parse_mode: 'HTML'` and proper formatting

#### Scenario: Placeholder edit with HTML
- **WHEN** `sendReplyWithPlaceholder` edits the `...` placeholder message
- **THEN** `editMessageText` SHALL include `parse_mode: 'HTML'`

### Requirement: Chunking compatibility
The `splitMessage()` function SHALL work correctly with HTML-formatted text. The system SHALL convert markdown to HTML before chunking, so that HTML tags are not split across chunks.

#### Scenario: Long HTML message chunked
- **WHEN** a converted HTML message exceeds 4096 characters
- **THEN** the system SHALL split at newlines or spaces (not inside HTML tags) and send each chunk with `parse_mode: 'HTML'`
71 changes: 71 additions & 0 deletions src/connectors/telegram/markdown-html.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
/**
* Convert standard Markdown to Telegram-compatible HTML.
*
* Telegram HTML supports: <b>, <i>, <u>, <s>, <code>, <pre>, <a href="">.
* Headers, tables, and other advanced markdown are converted to approximate
* equivalents (bold headers, preformatted tables).
*/

const PLACEHOLDER = '\x00CB'

interface CodeBlock {
placeholder: string
html: string
}

/**
* Convert markdown text to Telegram HTML format.
* Returns a string safe to use with parse_mode: 'HTML'.
*/
export function markdownToTelegramHtml(md: string): string {
const blocks: CodeBlock[] = []
let blockIdx = 0

// 1. Extract fenced code blocks before any escaping
let text = md.replace(/```(\w*)\n([\s\S]*?)```/g, (_match, _lang, code) => {
const ph = `${PLACEHOLDER}${blockIdx++}${PLACEHOLDER}`
blocks.push({ placeholder: ph, html: `<pre>${escapeHtml(code.trimEnd())}</pre>` })
return ph
})

// 2. Extract inline code
text = text.replace(/`([^`\n]+)`/g, (_match, code) => {
const ph = `${PLACEHOLDER}${blockIdx++}${PLACEHOLDER}`
blocks.push({ placeholder: ph, html: `<code>${escapeHtml(code)}</code>` })
return ph
})

// 3. Escape HTML entities in remaining text
text = escapeHtml(text)

// 4. Headers → bold (## Header → \n<b>Header</b>\n)
text = text.replace(/^#{1,6}\s+(.+)$/gm, '<b>$1</b>')

// 5. Bold: **text** or __text__
text = text.replace(/\*\*(.+?)\*\*/g, '<b>$1</b>')
text = text.replace(/__(.+?)__/g, '<b>$1</b>')

// 6. Italic: *text* or _text_ (not inside bold markers)
text = text.replace(/(?<!\w)\*([^*\n]+?)\*(?!\w)/g, '<i>$1</i>')
text = text.replace(/(?<!\w)_([^_\n]+?)_(?!\w)/g, '<i>$1</i>')

// 7. Strikethrough: ~~text~~
text = text.replace(/~~(.+?)~~/g, '<s>$1</s>')

// 8. Links: [text](url)
text = text.replace(/\[([^\]]+)\]\(([^)]+)\)/g, '<a href="$2">$1</a>')

// 9. Restore code blocks
for (const block of blocks) {
text = text.replace(block.placeholder, block.html)
}

return text.trim()
}

function escapeHtml(s: string): string {
return s
.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
}
13 changes: 10 additions & 3 deletions src/connectors/telegram/telegram-connector.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import { readFile } from 'node:fs/promises'
import { Bot, InputFile } from 'grammy'
import type { Connector, ConnectorCapabilities, SendPayload, SendResult } from '../types.js'
import { markdownToTelegramHtml } from './markdown-html.js'

export const MAX_MESSAGE_LENGTH = 4096

Expand Down Expand Up @@ -40,11 +41,17 @@ export class TelegramConnector implements Connector {
}
}

// Send text with chunking
// Send text with chunking + HTML formatting
if (payload.text) {
const chunks = splitMessage(payload.text, MAX_MESSAGE_LENGTH)
const html = markdownToTelegramHtml(payload.text)
const chunks = splitMessage(html, MAX_MESSAGE_LENGTH)
for (const chunk of chunks) {
await this.bot.api.sendMessage(this.chatId, chunk)
try {
await this.bot.api.sendMessage(this.chatId, chunk, { parse_mode: 'HTML' })
} catch {
// Fallback to plain text if HTML parsing fails
await this.bot.api.sendMessage(this.chatId, chunk)
}
}
}

Expand Down
26 changes: 21 additions & 5 deletions src/connectors/telegram/telegram-plugin.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import { forceCompact } from '../../core/compaction'
import { readAIBackend, writeAIBackend, type AIBackend } from '../../core/config'
import type { ConnectorCenter } from '../../core/connector-center.js'
import { TelegramConnector, splitMessage, MAX_MESSAGE_LENGTH } from './telegram-connector.js'
import { markdownToTelegramHtml } from './markdown-html.js'

const BACKEND_LABELS: Record<AIBackend, string> = {
'claude-code': 'Claude Code',
Expand Down Expand Up @@ -372,16 +373,26 @@ export class TelegramPlugin implements Plugin {

// Send text — edit placeholder for first chunk, send the rest as new messages
if (text) {
const chunks = splitMessage(text, MAX_MESSAGE_LENGTH)
const html = markdownToTelegramHtml(text)
const chunks = splitMessage(html, MAX_MESSAGE_LENGTH)
let startIdx = 0

if (placeholderMsgId && chunks.length > 0) {
const edited = await this.bot!.api.editMessageText(chatId, placeholderMsgId, chunks[0]).then(() => true).catch(() => false)
const edited = await this.bot!.api.editMessageText(chatId, placeholderMsgId, chunks[0], { parse_mode: 'HTML' })
.then(() => true)
.catch(async () => {
// Fallback: try without parse_mode
return this.bot!.api.editMessageText(chatId, placeholderMsgId!, chunks[0]).then(() => true).catch(() => false)
})
if (edited) startIdx = 1
}

for (let i = startIdx; i < chunks.length; i++) {
await this.bot!.api.sendMessage(chatId, chunks[i])
try {
await this.bot!.api.sendMessage(chatId, chunks[i], { parse_mode: 'HTML' })
} catch {
await this.bot!.api.sendMessage(chatId, chunks[i])
}
}

// Placeholder was edited — done
Expand All @@ -396,9 +407,14 @@ export class TelegramPlugin implements Plugin {

private async sendReply(chatId: number, text: string) {
if (text) {
const chunks = splitMessage(text, MAX_MESSAGE_LENGTH)
const html = markdownToTelegramHtml(text)
const chunks = splitMessage(html, MAX_MESSAGE_LENGTH)
for (const chunk of chunks) {
await this.bot!.api.sendMessage(chatId, chunk)
try {
await this.bot!.api.sendMessage(chatId, chunk, { parse_mode: 'HTML' })
} catch {
await this.bot!.api.sendMessage(chatId, chunk)
}
}
}
}
Expand Down