diff --git a/python_multipart/multipart.py b/python_multipart/multipart.py index 5961b8e..4de60e5 100644 --- a/python_multipart/multipart.py +++ b/python_multipart/multipart.py @@ -1417,12 +1417,8 @@ def data_callback(name: CallbackName, end_i: int, remaining: bool = False) -> No state = MultipartState.END elif state == MultipartState.END: - # Don't do anything if chunk ends with CRLF. - if c == CR and i + 1 < length and data[i + 1] == LF: - i += 2 - continue - # Skip data after the last boundary. - self.logger.warning("Skipping data after last boundary") + # Silently discard any epilogue data (RFC 2046 section 5.1.1 allows a CRLF and optional + # epilogue after the closing boundary). Django and Werkzeug do the same. i = length break diff --git a/tests/test_data/http/single_field_with_trailer.http b/tests/test_data/http/single_field_with_trailer.http index a570340..ea0c148 100644 --- a/tests/test_data/http/single_field_with_trailer.http +++ b/tests/test_data/http/single_field_with_trailer.http @@ -3,5 +3,5 @@ Content-Disposition: form-data; name="field" This is a test. ------WebKitFormBoundaryTkr3kCBQlBe1nrhc-- -this trailer causes a warning -but should be ignored \ No newline at end of file +this trailer is epilogue data +and should be silently ignored \ No newline at end of file diff --git a/tests/test_multipart.py b/tests/test_multipart.py index 2941f0f..e2cb571 100644 --- a/tests/test_multipart.py +++ b/tests/test_multipart.py @@ -1,6 +1,5 @@ from __future__ import annotations -import logging import os import random import sys @@ -734,6 +733,14 @@ def test_not_aligned(self) -> None: "single_field_single_file", ] +EPILOGUE_TEST_HEAD = ( + "--boundary\r\n" + 'Content-Disposition: form-data; name="file"; filename="filename.txt"\r\n' + "Content-Type: text/plain\r\n\r\n" + "hello\r\n" + "--boundary--" +).encode("latin-1") + def split_all(val: bytes) -> Iterator[tuple[bytes, bytes]]: """ @@ -1386,7 +1393,7 @@ def on_file(f: File) -> None: self.assert_file_data(files[0], b"hello") def test_multipart_parser_data_after_last_boundary(self) -> None: - """This test makes sure that the parser does not handle when there is junk data after the last boundary.""" + """Parser must short-circuit on arbitrary epilogue data after the closing boundary (no O(N) scan).""" num = 50_000_000 data = ( "--boundary\r\n" @@ -1404,29 +1411,32 @@ def on_file(f: File) -> None: f = FormParser("multipart/form-data", on_field=Mock(), on_file=on_file, boundary="boundary") f.write(data.encode("latin-1")) - @pytest.fixture(autouse=True) - def inject_fixtures(self, caplog: pytest.LogCaptureFixture) -> None: - self._caplog = caplog - - def test_multipart_parser_data_end_with_crlf_without_warnings(self) -> None: - """This test makes sure that the parser does not handle when the data ends with a CRLF.""" - data = ( - "--boundary\r\n" - 'Content-Disposition: form-data; name="file"; filename="filename.txt"\r\n' - "Content-Type: text/plain\r\n\r\n" - "hello\r\n" - "--boundary--\r\n" - ) + @parametrize( + "chunks", + [ + [EPILOGUE_TEST_HEAD + b"\r\n"], + [EPILOGUE_TEST_HEAD + b"\r", b"\n"], + [EPILOGUE_TEST_HEAD, b"\r\n"], + [EPILOGUE_TEST_HEAD + b"\r\n--boundary\r\nthis is not a valid header\r\n\r\nnot a real part"], + ], + ) + def test_multipart_parser_ignores_epilogue(self, chunks: list[bytes]) -> None: + """Epilogue data after the closing boundary must be ignored. + Covers both the single-chunk case and the case where trailing CRLF is split across `write()` calls. + The final case asserts that epilogue bytes are not parsed or validated. + """ files: list[File] = [] def on_file(f: File) -> None: files.append(f) f = FormParser("multipart/form-data", on_field=Mock(), on_file=on_file, boundary="boundary") - with self._caplog.at_level(logging.WARNING): - f.write(data.encode("latin-1")) - assert len(self._caplog.records) == 0 + for chunk in chunks: + f.write(chunk) + + assert len(files) == 1 + self.assert_file_data(files[0], b"hello") def test_max_size_multipart(self) -> None: # Load test data.