Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 2 additions & 6 deletions python_multipart/multipart.py
Original file line number Diff line number Diff line change
Expand Up @@ -1417,12 +1417,8 @@ def data_callback(name: CallbackName, end_i: int, remaining: bool = False) -> No
state = MultipartState.END

elif state == MultipartState.END:
# Don't do anything if chunk ends with CRLF.
if c == CR and i + 1 < length and data[i + 1] == LF:
i += 2
continue
# Skip data after the last boundary.
self.logger.warning("Skipping data after last boundary")
# Silently discard any epilogue data (RFC 2046 section 5.1.1 allows a CRLF and optional
# epilogue after the closing boundary). Django and Werkzeug do the same.
i = length
break

Expand Down
4 changes: 2 additions & 2 deletions tests/test_data/http/single_field_with_trailer.http
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ Content-Disposition: form-data; name="field"

This is a test.
------WebKitFormBoundaryTkr3kCBQlBe1nrhc--
this trailer causes a warning
but should be ignored
this trailer is epilogue data
and should be silently ignored
46 changes: 28 additions & 18 deletions tests/test_multipart.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from __future__ import annotations

import logging
import os
import random
import sys
Expand Down Expand Up @@ -734,6 +733,14 @@ def test_not_aligned(self) -> None:
"single_field_single_file",
]

EPILOGUE_TEST_HEAD = (
"--boundary\r\n"
'Content-Disposition: form-data; name="file"; filename="filename.txt"\r\n'
"Content-Type: text/plain\r\n\r\n"
"hello\r\n"
"--boundary--"
).encode("latin-1")


def split_all(val: bytes) -> Iterator[tuple[bytes, bytes]]:
"""
Expand Down Expand Up @@ -1386,7 +1393,7 @@ def on_file(f: File) -> None:
self.assert_file_data(files[0], b"hello")

def test_multipart_parser_data_after_last_boundary(self) -> None:
"""This test makes sure that the parser does not handle when there is junk data after the last boundary."""
"""Parser must short-circuit on arbitrary epilogue data after the closing boundary (no O(N) scan)."""
num = 50_000_000
data = (
"--boundary\r\n"
Expand All @@ -1404,29 +1411,32 @@ def on_file(f: File) -> None:
f = FormParser("multipart/form-data", on_field=Mock(), on_file=on_file, boundary="boundary")
f.write(data.encode("latin-1"))

@pytest.fixture(autouse=True)
def inject_fixtures(self, caplog: pytest.LogCaptureFixture) -> None:
self._caplog = caplog

def test_multipart_parser_data_end_with_crlf_without_warnings(self) -> None:
"""This test makes sure that the parser does not handle when the data ends with a CRLF."""
data = (
"--boundary\r\n"
'Content-Disposition: form-data; name="file"; filename="filename.txt"\r\n'
"Content-Type: text/plain\r\n\r\n"
"hello\r\n"
"--boundary--\r\n"
)
@parametrize(
"chunks",
[
[EPILOGUE_TEST_HEAD + b"\r\n"],
[EPILOGUE_TEST_HEAD + b"\r", b"\n"],
[EPILOGUE_TEST_HEAD, b"\r\n"],
[EPILOGUE_TEST_HEAD + b"\r\n--boundary\r\nthis is not a valid header\r\n\r\nnot a real part"],
],
)
def test_multipart_parser_ignores_epilogue(self, chunks: list[bytes]) -> None:
"""Epilogue data after the closing boundary must be ignored.

Covers both the single-chunk case and the case where trailing CRLF is split across `write()` calls.
The final case asserts that epilogue bytes are not parsed or validated.
"""
files: list[File] = []

def on_file(f: File) -> None:
files.append(f)

f = FormParser("multipart/form-data", on_field=Mock(), on_file=on_file, boundary="boundary")
with self._caplog.at_level(logging.WARNING):
f.write(data.encode("latin-1"))
assert len(self._caplog.records) == 0
for chunk in chunks:
f.write(chunk)

assert len(files) == 1
self.assert_file_data(files[0], b"hello")

def test_max_size_multipart(self) -> None:
# Load test data.
Expand Down
Loading