From f0fc757df395daa3549373dc0413578494baacce Mon Sep 17 00:00:00 2001 From: restuta Date: Tue, 24 Mar 2026 13:38:42 +0700 Subject: [PATCH] feat: inline local images during publish --- src/cli/main.ts | 19 +++ src/core/contract.ts | 1 + src/core/markdown.ts | 11 +- src/core/publish-markdown.ts | 208 ++++++++++++++++++++++++++++ src/core/publish-service.ts | 5 +- src/server/app.ts | 3 + tests/integration/cli.test.ts | 57 ++++++++ tests/unit/markdown.test.ts | 8 ++ tests/unit/publish-markdown.test.ts | 53 +++++++ 9 files changed, 362 insertions(+), 3 deletions(-) create mode 100644 src/core/publish-markdown.ts create mode 100644 tests/unit/publish-markdown.test.ts diff --git a/src/cli/main.ts b/src/cli/main.ts index e3591de..8659882 100644 --- a/src/cli/main.ts +++ b/src/cli/main.ts @@ -8,6 +8,8 @@ import { ListPagesResponseSchema, PublishedPageSchema, } from "../core/contract.js"; +import { parseMarkdownDocument } from "../core/markdown.js"; +import { prepareMarkdownBodyForPublish } from "../core/publish-markdown.js"; import { loadConfig, loadMapping, @@ -117,6 +119,10 @@ async function runPublish(context: CommandContext): Promise { filePath === undefined ? await readStdin() : await readFile(path.resolve(filePath), "utf8"); + const renderMarkdown = + filePath === undefined + ? undefined + : await buildRenderMarkdown(markdown, path.resolve(filePath)); const response = await fetch( `${apiBase}/api/namespaces/${encodeURIComponent(namespace)}/pages/publish`, { @@ -127,6 +133,7 @@ async function runPublish(context: CommandContext): Promise { }, body: JSON.stringify({ markdown, + ...(renderMarkdown === undefined ? {} : { renderMarkdown }), ...(options.slug === undefined ? {} : { slug: options.slug }), ...(existingMapping?.pageId === undefined ? {} @@ -154,6 +161,18 @@ async function runPublish(context: CommandContext): Promise { console.log(published.url); } +async function buildRenderMarkdown( + markdown: string, + sourcePath: string, +): Promise { + const parsed = parseMarkdownDocument(markdown); + const renderMarkdown = await prepareMarkdownBodyForPublish(parsed.body, { + sourcePath, + }); + + return renderMarkdown === parsed.body ? undefined : renderMarkdown; +} + async function runList(context: CommandContext): Promise { const all = context.args.includes("--all"); const filtered = context.args.filter((a) => a !== "--all"); diff --git a/src/core/contract.ts b/src/core/contract.ts index 09afc3f..31923e0 100644 --- a/src/core/contract.ts +++ b/src/core/contract.ts @@ -27,6 +27,7 @@ export const ClaimNamespaceResponseSchema = z.object({ export const PublishPageRequestSchema = z.object({ markdown: z.string().min(1), + renderMarkdown: z.string().min(1).optional(), slug: NameSchema.optional(), pageId: z.string().uuid().optional(), }); diff --git a/src/core/markdown.ts b/src/core/markdown.ts index 725c846..156d652 100644 --- a/src/core/markdown.ts +++ b/src/core/markdown.ts @@ -1,6 +1,6 @@ import matter from "gray-matter"; import rehypeHighlight from "rehype-highlight"; -import rehypeSanitize from "rehype-sanitize"; +import rehypeSanitize, { defaultSchema } from "rehype-sanitize"; import rehypeStringify from "rehype-stringify"; import remarkGfm from "remark-gfm"; import remarkParse from "remark-parse"; @@ -52,12 +52,19 @@ export async function renderMarkdownToHtml( markdown: string, ): Promise { const renderedMarkdown = autolinkBareUrls(stripWikilinks(markdown.trim())); + const sanitizeSchema = { + ...defaultSchema, + protocols: { + ...defaultSchema.protocols, + src: [...(defaultSchema.protocols?.["src"] ?? []), "data"], + }, + }; const rawHtml = String( await unified() .use(remarkParse) .use(remarkGfm) .use(remarkRehype) - .use(rehypeSanitize) + .use(rehypeSanitize, sanitizeSchema) .use(rehypeHighlight) .use(rehypeStringify) .process(renderedMarkdown), diff --git a/src/core/publish-markdown.ts b/src/core/publish-markdown.ts new file mode 100644 index 0000000..6f06b19 --- /dev/null +++ b/src/core/publish-markdown.ts @@ -0,0 +1,208 @@ +import { readFile } from "node:fs/promises"; +import path from "node:path"; + +export interface PrepareMarkdownBodyOptions { + sourcePath?: string; +} + +const LOCAL_IMAGE_EXTENSIONS = new Set([ + ".png", + ".jpg", + ".jpeg", + ".gif", + ".webp", + ".svg", + ".avif", +]); + +const OBSIDIAN_IMAGE_EMBED_RE = /!\[\[([^\]\n]+)\]\]/g; +const MARKDOWN_IMAGE_RE = /!\[([^\]]*)\]\(([^)\n]+)\)/g; + +export async function prepareMarkdownBodyForPublish( + markdownBody: string, + options: PrepareMarkdownBodyOptions = {}, +): Promise { + if (options.sourcePath === undefined) { + return markdownBody; + } + + const baseDir = path.dirname(options.sourcePath); + + return transformOutsideCodeBlocks(markdownBody, async (segment) => { + const withObsidianEmbeds = await replaceAsync( + segment, + OBSIDIAN_IMAGE_EMBED_RE, + async (match, innerTarget) => { + const [rawTarget] = innerTarget.split("|"); + const target = rawTarget?.trim(); + + if (target === undefined || target.length === 0) { + return match; + } + + const asset = await resolveLocalImageAsset(baseDir, target); + + if (asset === null) { + return match; + } + + return `![${asset.alt}](${asset.dataUrl})`; + }, + ); + + return replaceAsync( + withObsidianEmbeds, + MARKDOWN_IMAGE_RE, + async (match, alt, rawTarget) => { + const target = normalizeMarkdownImageTarget(rawTarget); + + if (target === null || isExternalAssetTarget(target)) { + return match; + } + + const asset = await resolveLocalImageAsset(baseDir, target); + + if (asset === null) { + return match; + } + + const label = alt.trim().length > 0 ? alt.trim() : asset.alt; + return `![${escapeMarkdownLabel(label)}](${asset.dataUrl})`; + }, + ); + }); +} + +async function transformOutsideCodeBlocks( + markdown: string, + transform: (segment: string) => Promise, +): Promise { + const lines = markdown.split("\n"); + const result: string[] = []; + const buffer: string[] = []; + let inCodeBlock = false; + + async function flushBuffer(): Promise { + if (buffer.length === 0) { + return; + } + + result.push(await transform(buffer.join("\n"))); + buffer.length = 0; + } + + for (const line of lines) { + if (line.trimStart().startsWith("```")) { + await flushBuffer(); + inCodeBlock = !inCodeBlock; + result.push(line); + continue; + } + + if (inCodeBlock) { + result.push(line); + continue; + } + + buffer.push(line); + } + + await flushBuffer(); + + return result.join("\n"); +} + +async function replaceAsync( + text: string, + pattern: RegExp, + replacer: (...args: string[]) => Promise, +): Promise { + const matches = [...text.matchAll(pattern)]; + + if (matches.length === 0) { + return text; + } + + let result = ""; + let lastIndex = 0; + + for (const match of matches) { + const fullMatch = match[0]; + const matchIndex = match.index ?? 0; + result += text.slice(lastIndex, matchIndex); + result += await replacer(...match); + lastIndex = matchIndex + fullMatch.length; + } + + result += text.slice(lastIndex); + return result; +} + +function normalizeMarkdownImageTarget(rawTarget: string): string | null { + const trimmed = rawTarget.trim(); + + if (trimmed.length === 0) { + return null; + } + + if (trimmed.startsWith("<") && trimmed.endsWith(">")) { + return trimmed.slice(1, -1).trim(); + } + + const withOptionalTitle = trimmed.match(/^(\S+)(?:\s+["'][^"']*["'])?$/); + return withOptionalTitle?.[1] ?? trimmed; +} + +function isExternalAssetTarget(target: string): boolean { + return /^(?:[a-z]+:)?\/\//i.test(target) || target.startsWith("data:"); +} + +async function resolveLocalImageAsset( + baseDir: string, + rawTarget: string, +): Promise<{ alt: string; dataUrl: string } | null> { + const resolvedPath = path.resolve(baseDir, rawTarget); + const extension = path.extname(resolvedPath).toLowerCase(); + + if (!LOCAL_IMAGE_EXTENSIONS.has(extension)) { + return null; + } + + try { + const content = await readFile(resolvedPath); + return { + alt: escapeMarkdownLabel(path.basename(rawTarget)), + dataUrl: buildDataUrl(content, extension), + }; + } catch { + return null; + } +} + +function buildDataUrl(content: Buffer, extension: string): string { + return `data:${mimeTypeForExtension(extension)};base64,${content.toString("base64")}`; +} + +function mimeTypeForExtension(extension: string): string { + switch (extension) { + case ".png": + return "image/png"; + case ".jpg": + case ".jpeg": + return "image/jpeg"; + case ".gif": + return "image/gif"; + case ".webp": + return "image/webp"; + case ".svg": + return "image/svg+xml"; + case ".avif": + return "image/avif"; + default: + return "application/octet-stream"; + } +} + +function escapeMarkdownLabel(value: string): string { + return value.replaceAll("[", "\\[").replaceAll("]", "\\]"); +} diff --git a/src/core/publish-service.ts b/src/core/publish-service.ts index 8cdcf8e..4925a9a 100644 --- a/src/core/publish-service.ts +++ b/src/core/publish-service.ts @@ -23,6 +23,7 @@ import { ensureName, slugify } from "./slug.js"; export interface PublishPageInput { markdown: string; + renderMarkdown?: string; namespace: string; pageId?: string; requestedSlug?: string; @@ -89,6 +90,7 @@ export function createPublishService( await authenticate(safeNamespace, input.token); const parsed = parseMarkdownDocument(input.markdown); + const renderMarkdown = input.renderMarkdown ?? parsed.body; const requestedSlug = input.requestedSlug ?? parsed.frontmatter.slug ?? slugify(parsed.title); const safeSlug = ensureName(slugify(requestedSlug)); @@ -102,7 +104,7 @@ export function createPublishService( const now = new Date().toISOString(); const markdownBlobKey = `${pageId}.md`; const htmlBlobKey = `${pageId}.html`; - const rendered = await renderMarkdownToHtml(parsed.body); + const rendered = await renderMarkdownToHtml(renderMarkdown); const htmlDocument = buildHtmlDocument({ title: parsed.title, description: parsed.description, @@ -112,6 +114,7 @@ export function createPublishService( const contentHash = sha256( JSON.stringify({ markdown: input.markdown, + renderMarkdown, slug, title: parsed.title, description: parsed.description, diff --git a/src/server/app.ts b/src/server/app.ts index 66c0562..64ef6aa 100644 --- a/src/server/app.ts +++ b/src/server/app.ts @@ -101,12 +101,14 @@ Open source — [github.com/Restuta/pubmd](https://github.com/Restuta/pubmd)`; try { let markdown: string; + let renderMarkdown: string | undefined; let slug: string | undefined; let pageId: string | undefined; if (isJson) { const body = PublishPageRequestSchema.parse(await context.req.json()); markdown = body.markdown; + renderMarkdown = body.renderMarkdown; slug = body.slug; pageId = body.pageId; } else { @@ -119,6 +121,7 @@ Open source — [github.com/Restuta/pubmd](https://github.com/Restuta/pubmd)`; namespace: context.req.param("namespace"), token, markdown, + ...(renderMarkdown === undefined ? {} : { renderMarkdown }), origin: requestOrigin(context.req.url), ...(pageId === undefined ? {} : { pageId }), ...(slug === undefined ? {} : { requestedSlug: slug }), diff --git a/tests/integration/cli.test.ts b/tests/integration/cli.test.ts index 3fdedd3..c6c534d 100644 --- a/tests/integration/cli.test.ts +++ b/tests/integration/cli.test.ts @@ -109,6 +109,63 @@ Updated body. }; expect(Object.keys(mapping.files)).toHaveLength(0); }); + + it("renders local image embeds while preserving the original raw markdown", async () => { + const root = await mkdtemp(path.join(os.tmpdir(), "publish-it-cli-img-")); + const configDir = path.join(root, "config"); + const mappingPath = path.join(root, ".pub"); + const cwd = path.join(root, "workspace"); + const notePath = path.join(cwd, "note.md"); + const imagePath = path.join(cwd, "diagram.svg"); + + server = await startTestServer(root); + await mkdir(cwd, { recursive: true }); + await writeFile( + imagePath, + '', + "utf8", + ); + await writeFile( + notePath, + `--- +title: Embedded Image +--- + +Look: + +![[diagram.svg|320x200]] +`, + "utf8", + ); + + await runCli(["claim", "restuta", "--api-base", server.origin], { + cwd, + env: { + PUB_CONFIG_DIR: configDir, + PUB_MAPPING_PATH: mappingPath, + }, + }); + + const publishResult = await runCli( + ["publish", notePath, "--api-base", server.origin], + { + cwd, + env: { + PUB_CONFIG_DIR: configDir, + PUB_MAPPING_PATH: mappingPath, + }, + }, + ); + const pageUrl = publishResult.stdout.trim(); + + const htmlResponse = await fetch(pageUrl); + const html = await htmlResponse.text(); + expect(html).toContain("data:image/svg+xml;base64,"); + + const rawResponse = await fetch(`${pageUrl}?raw=1`); + const rawMarkdown = await rawResponse.text(); + expect(rawMarkdown).toContain("![[diagram.svg|320x200]]"); + }); }); async function runCli( diff --git a/tests/unit/markdown.test.ts b/tests/unit/markdown.test.ts index b3bad87..669bee9 100644 --- a/tests/unit/markdown.test.ts +++ b/tests/unit/markdown.test.ts @@ -154,6 +154,14 @@ describe("autolinkBareUrls", () => { ); }); + it("preserves data URL image sources in rendered HTML", async () => { + const rendered = await renderMarkdownToHtml( + "![Diagram](data:image/svg+xml;base64,PHN2Zy8+)", + ); + + expect(rendered.html).toContain('src="data:image/svg+xml;base64,PHN2Zy8+"'); + }); + it("strips wikilinks and autolinks URLs inside them", async () => { const rendered = await renderMarkdownToHtml( "like [[telegra.ph]] but for the terminal era", diff --git a/tests/unit/publish-markdown.test.ts b/tests/unit/publish-markdown.test.ts new file mode 100644 index 0000000..df2b6db --- /dev/null +++ b/tests/unit/publish-markdown.test.ts @@ -0,0 +1,53 @@ +import { mkdir, mkdtemp, writeFile } from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; + +import { describe, expect, it } from "vitest"; + +import { prepareMarkdownBodyForPublish } from "../../src/core/publish-markdown.js"; + +describe("prepareMarkdownBodyForPublish", () => { + it("converts Obsidian image embeds into data URL markdown images", async () => { + const root = await mkdtemp(path.join(os.tmpdir(), "pubmd-image-embed-")); + const notePath = path.join(root, "note.md"); + const imagePath = path.join(root, "diagram.svg"); + + await writeFile( + imagePath, + '', + "utf8", + ); + + const prepared = await prepareMarkdownBodyForPublish( + "Before\n\n![[diagram.svg|400x300]]\n\nAfter", + { + sourcePath: notePath, + }, + ); + + expect(prepared).toContain("data:image/svg+xml;base64,"); + expect(prepared).not.toContain("![[diagram.svg"); + }); + + it("converts relative markdown image paths into data URLs", async () => { + const root = await mkdtemp(path.join(os.tmpdir(), "pubmd-inline-image-")); + const notePath = path.join(root, "note.md"); + const imagePath = path.join(root, "assets", "photo.svg"); + + await mkdir(path.dirname(imagePath), { recursive: true }); + await writeFile( + imagePath, + '', + "utf8", + ); + + const prepared = await prepareMarkdownBodyForPublish( + "![Team photo](./assets/photo.svg)", + { + sourcePath: notePath, + }, + ); + + expect(prepared).toContain("![Team photo](data:image/svg+xml;base64,"); + }); +});