From 3850e7e5b2050752e8a79dec9f81c656bfe133da Mon Sep 17 00:00:00 2001 From: Marcelo Trylesinski Date: Fri, 10 Apr 2026 13:54:31 +0200 Subject: [PATCH 1/2] Silently discard epilogue data after the closing boundary Django's MultiPartParser and Werkzeug's sansio multipart parser both silently accept (and ignore) the optional CRLF + epilogue that RFC 2046 section 5.1.1 allows after the closing boundary. python-multipart was logging "Skipping data after last boundary" instead, and the single- chunk guard added in #193 also missed the case where the trailing CRLF was split across two writes - a trailing `\r` at the end of one chunk would fall through to the warning branch because `i + 1 < length` was false, even though the `\n` arrived in the next chunk. Drop the warning and the split-chunk guard entirely. The epilogue is short-circuited the same way as before (no O(N) scan), just without the spurious log line. Closes #246. --- CHANGELOG.md | 4 ++ python_multipart/multipart.py | 9 ++--- .../http/single_field_with_trailer.http | 4 +- tests/test_multipart.py | 37 ++++++++++--------- 4 files changed, 29 insertions(+), 25 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6a00766..d1c5fad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## UNRELEASED + +* Silently discard epilogue data after the closing boundary instead of logging a warning, matching Django and Werkzeug. Also fixes a spurious warning when the trailing CRLF is split across chunks [#246](https://github.com/Kludex/python-multipart/issues/246). + ## 0.0.24 (2026-04-05) * Validate `chunk_size` in `parse_form()` [#244](https://github.com/Kludex/python-multipart/pull/244). diff --git a/python_multipart/multipart.py b/python_multipart/multipart.py index ca78034..d64ba1c 100644 --- a/python_multipart/multipart.py +++ b/python_multipart/multipart.py @@ -1413,12 +1413,9 @@ def data_callback(name: CallbackName, end_i: int, remaining: bool = False) -> No state = MultipartState.END elif state == MultipartState.END: - # Don't do anything if chunk ends with CRLF. - if c == CR and i + 1 < length and data[i + 1] == LF: - i += 2 - continue - # Skip data after the last boundary. - self.logger.warning("Skipping data after last boundary") + # Silently discard any epilogue data (RFC 2046 section 5.1.1 + # allows a CRLF and optional epilogue after the closing + # boundary). Django and Werkzeug do the same. i = length break diff --git a/tests/test_data/http/single_field_with_trailer.http b/tests/test_data/http/single_field_with_trailer.http index a570340..ea0c148 100644 --- a/tests/test_data/http/single_field_with_trailer.http +++ b/tests/test_data/http/single_field_with_trailer.http @@ -3,5 +3,5 @@ Content-Disposition: form-data; name="field" This is a test. ------WebKitFormBoundaryTkr3kCBQlBe1nrhc-- -this trailer causes a warning -but should be ignored \ No newline at end of file +this trailer is epilogue data +and should be silently ignored \ No newline at end of file diff --git a/tests/test_multipart.py b/tests/test_multipart.py index a62d563..c0b01f0 100644 --- a/tests/test_multipart.py +++ b/tests/test_multipart.py @@ -1336,7 +1336,7 @@ def on_field(f: Field) -> None: self.assertEqual(fields[2].value, b"asdf") def test_multipart_parser_newlines_before_first_boundary(self) -> None: - """This test makes sure that the parser does not handle when there is junk data after the last boundary.""" + """Parser must not hang or blow up on a large preamble of blank lines before the first boundary.""" num = 5_000_000 data = ( "\r\n" * num + "--boundary\r\n" @@ -1355,7 +1355,7 @@ def on_file(f: File) -> None: f.write(data.encode("latin-1")) def test_multipart_parser_data_after_last_boundary(self) -> None: - """This test makes sure that the parser does not handle when there is junk data after the last boundary.""" + """Parser must short-circuit on arbitrary epilogue data after the closing boundary (no O(N) scan).""" num = 50_000_000 data = ( "--boundary\r\n" @@ -1377,25 +1377,28 @@ def on_file(f: File) -> None: def inject_fixtures(self, caplog: pytest.LogCaptureFixture) -> None: self._caplog = caplog - def test_multipart_parser_data_end_with_crlf_without_warnings(self) -> None: - """This test makes sure that the parser does not handle when the data ends with a CRLF.""" - data = ( + def test_multipart_parser_epilogue_emits_no_warnings(self) -> None: + """Epilogue data after the closing boundary must not produce a warning. + + Covers both the single-chunk case and the case where the trailing + CRLF is split across `write()` calls (e.g. when `\r` and `\n` land in + separate TCP chunks behind a proxy). + """ + head = ( "--boundary\r\n" 'Content-Disposition: form-data; name="file"; filename="filename.txt"\r\n' "Content-Type: text/plain\r\n\r\n" "hello\r\n" - "--boundary--\r\n" - ) - - files: list[File] = [] - - def on_file(f: File) -> None: - files.append(f) - - f = FormParser("multipart/form-data", on_field=Mock(), on_file=on_file, boundary="boundary") - with self._caplog.at_level(logging.WARNING): - f.write(data.encode("latin-1")) - assert len(self._caplog.records) == 0 + "--boundary--" + ).encode("latin-1") + + for chunks in ([head + b"\r\n"], [head + b"\r", b"\n"], [head, b"\r\n"], [head + b"\r\nignored epilogue"]): + f = FormParser("multipart/form-data", on_field=Mock(), on_file=Mock(), boundary="boundary") + with self._caplog.at_level(logging.WARNING): + self._caplog.clear() + for chunk in chunks: + f.write(chunk) + assert self._caplog.records == [] def test_max_size_multipart(self) -> None: # Load test data. From 13cef88a13a8305b92444fb38534f821ee40f2e4 Mon Sep 17 00:00:00 2001 From: Marcelo Trylesinski Date: Fri, 10 Apr 2026 14:36:47 +0200 Subject: [PATCH 2/2] Address epilogue review comments --- CHANGELOG.md | 4 --- python_multipart/multipart.py | 5 ++- tests/test_multipart.py | 57 ++++++++++++++++++++--------------- 3 files changed, 34 insertions(+), 32 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d1c5fad..6a00766 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,9 +1,5 @@ # Changelog -## UNRELEASED - -* Silently discard epilogue data after the closing boundary instead of logging a warning, matching Django and Werkzeug. Also fixes a spurious warning when the trailing CRLF is split across chunks [#246](https://github.com/Kludex/python-multipart/issues/246). - ## 0.0.24 (2026-04-05) * Validate `chunk_size` in `parse_form()` [#244](https://github.com/Kludex/python-multipart/pull/244). diff --git a/python_multipart/multipart.py b/python_multipart/multipart.py index d64ba1c..82d4c82 100644 --- a/python_multipart/multipart.py +++ b/python_multipart/multipart.py @@ -1413,9 +1413,8 @@ def data_callback(name: CallbackName, end_i: int, remaining: bool = False) -> No state = MultipartState.END elif state == MultipartState.END: - # Silently discard any epilogue data (RFC 2046 section 5.1.1 - # allows a CRLF and optional epilogue after the closing - # boundary). Django and Werkzeug do the same. + # Silently discard any epilogue data (RFC 2046 section 5.1.1 allows a CRLF and optional + # epilogue after the closing boundary). Django and Werkzeug do the same. i = length break diff --git a/tests/test_multipart.py b/tests/test_multipart.py index c0b01f0..bf09c00 100644 --- a/tests/test_multipart.py +++ b/tests/test_multipart.py @@ -1,6 +1,5 @@ from __future__ import annotations -import logging import os import random import sys @@ -734,6 +733,14 @@ def test_not_aligned(self) -> None: "single_field_single_file", ] +EPILOGUE_TEST_HEAD = ( + "--boundary\r\n" + 'Content-Disposition: form-data; name="file"; filename="filename.txt"\r\n' + "Content-Type: text/plain\r\n\r\n" + "hello\r\n" + "--boundary--" +).encode("latin-1") + def split_all(val: bytes) -> Iterator[tuple[bytes, bytes]]: """ @@ -1373,32 +1380,32 @@ def on_file(f: File) -> None: f = FormParser("multipart/form-data", on_field=Mock(), on_file=on_file, boundary="boundary") f.write(data.encode("latin-1")) - @pytest.fixture(autouse=True) - def inject_fixtures(self, caplog: pytest.LogCaptureFixture) -> None: - self._caplog = caplog - - def test_multipart_parser_epilogue_emits_no_warnings(self) -> None: - """Epilogue data after the closing boundary must not produce a warning. + @parametrize( + "chunks", + [ + [EPILOGUE_TEST_HEAD + b"\r\n"], + [EPILOGUE_TEST_HEAD + b"\r", b"\n"], + [EPILOGUE_TEST_HEAD, b"\r\n"], + [EPILOGUE_TEST_HEAD + b"\r\n--boundary\r\nthis is not a valid header\r\n\r\nnot a real part"], + ], + ) + def test_multipart_parser_ignores_epilogue(self, chunks: list[bytes]) -> None: + """Epilogue data after the closing boundary must be ignored. - Covers both the single-chunk case and the case where the trailing - CRLF is split across `write()` calls (e.g. when `\r` and `\n` land in - separate TCP chunks behind a proxy). + Covers both the single-chunk case and the case where trailing CRLF is split across `write()` calls. + The final case asserts that epilogue bytes are not parsed or validated. """ - head = ( - "--boundary\r\n" - 'Content-Disposition: form-data; name="file"; filename="filename.txt"\r\n' - "Content-Type: text/plain\r\n\r\n" - "hello\r\n" - "--boundary--" - ).encode("latin-1") - - for chunks in ([head + b"\r\n"], [head + b"\r", b"\n"], [head, b"\r\n"], [head + b"\r\nignored epilogue"]): - f = FormParser("multipart/form-data", on_field=Mock(), on_file=Mock(), boundary="boundary") - with self._caplog.at_level(logging.WARNING): - self._caplog.clear() - for chunk in chunks: - f.write(chunk) - assert self._caplog.records == [] + files: list[File] = [] + + def on_file(f: File) -> None: + files.append(f) + + f = FormParser("multipart/form-data", on_field=Mock(), on_file=on_file, boundary="boundary") + for chunk in chunks: + f.write(chunk) + + assert len(files) == 1 + self.assert_file_data(files[0], b"hello") def test_max_size_multipart(self) -> None: # Load test data.