From 9f4b6a3e6e3812e0be0b3277d0bc090c5773afbd Mon Sep 17 00:00:00 2001 From: Romsahel Date: Wed, 4 Feb 2026 16:08:30 +0100 Subject: [PATCH 1/2] renderer: use RFC 2231 encoding for Content-Disposition and Content-Type parameters RFC 2047 encoded-words are forbidden in MIME parameter values. This change implements RFC 2231 encoding (charset'language'percent-encoded) for Content-Type and Content-Disposition parameters while preserving RFC 2047 for other headers. --- lib/mail/renderers/rfc_2822.ex | 61 +++++++++++++++++++++------ test/mail/renderers/rfc_2822_test.exs | 45 ++++++++++++++++++++ 2 files changed, 92 insertions(+), 14 deletions(-) diff --git a/lib/mail/renderers/rfc_2822.ex b/lib/mail/renderers/rfc_2822.ex index b1903063..bff96a56 100644 --- a/lib/mail/renderers/rfc_2822.ex +++ b/lib/mail/renderers/rfc_2822.ex @@ -118,6 +118,15 @@ defmodule Mail.Renderers.RFC2822 do render_header_value(key, value) end + @rfc2231_headers ["Content-Disposition", "Content-Type"] + defp render_header_value(key, value) when key in @rfc2231_headers and is_binary(value) do + value + end + + defp render_header_value(key, [value | subtypes]) when key in @rfc2231_headers do + Enum.join([value | render_subtypes(subtypes, :rfc_2231)], "; ") + end + defp render_header_value(_key, [value | subtypes]), do: Enum.join([encode_header_value(value, :quoted_printable) | render_subtypes(subtypes)], "; ") @@ -143,27 +152,46 @@ defmodule Mail.Renderers.RFC2822 do defp render_address(email), do: validate_address(email) - defp render_subtypes([]), do: [] + defp render_subtypes(subtypes, encoding \\ :quoted_printable) + + defp render_subtypes([], _encoding), do: [] - defp render_subtypes([{key, value} | subtypes]) when is_atom(key), - do: render_subtypes([{Atom.to_string(key), value} | subtypes]) + defp render_subtypes([{key, value} | subtypes], encoding) when is_atom(key), + do: render_subtypes([{Atom.to_string(key), value} | subtypes], encoding) - defp render_subtypes([{"boundary", value} | subtypes]) do - [~s(boundary="#{value}") | render_subtypes(subtypes)] + defp render_subtypes([{"boundary", value} | subtypes], encoding) do + [~s(boundary="#{value}") | render_subtypes(subtypes, encoding)] end - defp render_subtypes([{key, value} | subtypes]) do + # RFC 2231 parameter encoding for Content-Type and Content-Disposition + defp render_subtypes([{key, value} | subtypes], :rfc_2231) do key = String.replace(key, "_", "-") - value = encode_header_value(value, :quoted_printable) - value = - if value =~ ~r/[\s()<>@,;:\\<\/\[\]?=]/ do - "\"#{value}\"" - else - value - end + if contains_non_ascii?(value) do + value = encode_header_value(value, :rfc_2231) + ["#{key}*=UTF-8''#{value}" | render_subtypes(subtypes, :rfc_2231)] + else + value = maybe_wrap_in_quotes(value) + ["#{key}=#{value}" | render_subtypes(subtypes, :rfc_2231)] + end + end - ["#{key}=#{value}" | render_subtypes(subtypes)] + defp render_subtypes([{key, value} | subtypes], :quoted_printable) do + key = String.replace(key, "_", "-") + value = value |> encode_header_value(:quoted_printable) |> maybe_wrap_in_quotes() + ["#{key}=#{value}" | render_subtypes(subtypes, :quoted_printable)] + end + + defp contains_non_ascii?(value) do + String.match?(value, ~r/[\x80-\xFF]/) + end + + defp maybe_wrap_in_quotes(value) do + if value =~ ~r/[\s()<>@,;:\\<\/\[\]?=]/ do + "\"#{value}\"" + else + value + end end @doc """ @@ -198,6 +226,11 @@ defmodule Mail.Renderers.RFC2822 do end end + defp encode_header_value(header_value, :rfc_2231) do + # RFC 2231: parameter*=UTF-8''percent-encoded-value + URI.encode(header_value, &URI.char_unreserved?/1) + end + defp wrap_encoded_words(value) do :binary.split(value, "=\r\n", [:global]) |> Enum.map(fn chunk -> <<"=?UTF-8?Q?", chunk::binary, "?=">> end) diff --git a/test/mail/renderers/rfc_2822_test.exs b/test/mail/renderers/rfc_2822_test.exs index cd355957..0e6a24d3 100644 --- a/test/mail/renderers/rfc_2822_test.exs +++ b/test/mail/renderers/rfc_2822_test.exs @@ -367,6 +367,51 @@ defmodule Mail.Renderers.RFC2822Test do end end + describe "RFC 2047 encoding restrictions" do + test "Subject header uses RFC 2047 encoding for non-ASCII characters" do + header = Mail.Renderers.RFC2822.render_header("subject", "café") + assert header == "Subject: =?UTF-8?Q?caf=C3=A9?=" + + header = Mail.Renderers.RFC2822.render_header("subject", "Test 日本語 Subject") + assert header =~ "Subject: =?UTF-8?Q?" + assert header =~ "?=" + end + + test "From header uses RFC 2047 encoding for non-ASCII names" do + header = Mail.Renderers.RFC2822.render_header("from", {"José García", "jose@example.com"}) + assert header == ~s(From: =?UTF-8?Q?"Jos=C3=A9 Garc=C3=ADa"?= ) + + header = Mail.Renderers.RFC2822.render_header("from", {"山田太郎", "yamada@example.com"}) + assert header =~ "From: =?UTF-8?Q?" + assert header =~ "?= " + end + + test "Content-Disposition non-ASCII filename parameter uses RFC 2231 encoding" do + # RFC 2047 explicitly forbids encoded-words in Content-Disposition parameters + header = + Mail.Renderers.RFC2822.render_header( + "Content-Disposition", + ["attachment", filename: "café.pdf"] + ) + + # Should NOT contain RFC 2047 encoded-word markers + # Should use RFC 2231 encoding for non-ASCII characters + assert header == "Content-Disposition: attachment; filename*=UTF-8''caf%C3%A9.pdf" + end + + test "Content-Disposition ASCII filename parameter does NOT use RFC 2047 encoding" do + # RFC 2047 explicitly forbids encoded-words in Content-Disposition parameters + header = + Mail.Renderers.RFC2822.render_header( + "Content-Disposition", + ["attachment", filename: "name.pdf"] + ) + + # Should NOT contain RFC 2047 encoded-word markers + assert header == "Content-Disposition: attachment; filename=name.pdf" + end + end + describe "multipart configuration" do test "multipart/alternative with text/plain and text/html" do message = From efb57fadf70e23d42574398c19f93cc49eb244e6 Mon Sep 17 00:00:00 2001 From: Romsahel Date: Wed, 4 Feb 2026 16:14:37 +0100 Subject: [PATCH 2/2] fix: RFC 2231 parser handling of simple extended parameters Fix parser to correctly handle simple RFC 2231 extended parameters (e.g., filename*=UTF-8''value) by using `trim: true` in String.split. --- lib/mail/parsers/rfc_2822.ex | 4 ++-- test/mail/message_test.exs | 12 ++++++++---- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/lib/mail/parsers/rfc_2822.ex b/lib/mail/parsers/rfc_2822.ex index a59911be..bb999526 100644 --- a/lib/mail/parsers/rfc_2822.ex +++ b/lib/mail/parsers/rfc_2822.ex @@ -947,11 +947,11 @@ defmodule Mail.Parsers.RFC2822 do |> Enum.map(fn # Find parameters that are split into multiple parts {key, value} when is_binary(value) -> - case String.split(key, "*", parts: 2) do + case String.split(key, "*", parts: 2, trim: true) do [key, part] -> {{key, part}, value} - _ -> + [key] -> {key, value} end diff --git a/test/mail/message_test.exs b/test/mail/message_test.exs index 3cf9344b..a2ee2b52 100644 --- a/test/mail/message_test.exs +++ b/test/mail/message_test.exs @@ -262,13 +262,17 @@ defmodule Mail.MessageTest do |> Mail.put_attachment({file_name, "data"}, headers: [content_id: "attachment-id"]) |> Mail.render() - encoded_header_value = - "=?UTF-8?Q?" <> Mail.Encoders.QuotedPrintable.encode("READMEüä.md") <> "?=" + refute String.contains?(message, "=?UTF-8?Q?") - assert String.contains?(message, encoded_header_value) + assert String.contains?(message, "filename*=UTF-8''README%C3%BC%C3%A4.md") assert %Mail.Message{ - headers: %{"content-disposition" => ["attachment", {"filename", ^file_name}]} + headers: %{ + "content-disposition" => [ + "attachment", + {"filename", "UTF-8''README%C3%BC%C3%A4.md"} + ] + } } = Mail.Parsers.RFC2822.parse(message) end