diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..749d6190 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +# Wire-accurate bytes (must keep CRLF); Git must not normalize to LF on Unix checkouts. +imap-codec/tests/fixtures_mailru_fetch_2829.bin binary diff --git a/imap-codec/Cargo.toml b/imap-codec/Cargo.toml index 3cc2b6e5..c46262d1 100644 --- a/imap-codec/Cargo.toml +++ b/imap-codec/Cargo.toml @@ -35,6 +35,7 @@ quirk = [ "quirk_spaces_between_addresses", "quirk_empty_continue_req", "quirk_body_fld_enc_nil_to_empty", + "quirk_body_fld_param_nil_value", "quirk_always_normalize_sequence_sets", ] # Make `\r` in `\r\n` optional. @@ -61,6 +62,8 @@ quirk_trailing_space_search = [] quirk_empty_continue_req = [] # Encode NIL `body-fld-enc` as empty string. quirk_body_fld_enc_nil_to_empty = [] +# Accept NIL as the second element in `body-fld-param` pairs (e.g. Mail.ru `("boundary" NIL)`). +quirk_body_fld_param_nil_value = [] # Always normalize sequence sets during encoding quirk_always_normalize_sequence_sets = [] diff --git a/imap-codec/src/body.rs b/imap-codec/src/body.rs index b78f51cc..f7a1822d 100644 --- a/imap-codec/src/body.rs +++ b/imap-codec/src/body.rs @@ -210,6 +210,28 @@ pub(crate) fn body_fields(input: &[u8]) -> IMAPResult<&[u8], BasicFields> { )) } +/// Second half of a `body-fld-param` key/value pair. +/// +/// RFC 3501 requires `string`; some servers (e.g. Mail.ru) send `NIL` for the value +/// in disposition parameter lists like `("boundary" NIL)`. +#[inline] +fn body_fld_param_value(input: &[u8]) -> IMAPResult<&[u8], IString> { + #[cfg(not(feature = "quirk_body_fld_param_nil_value"))] + { + string(input) + } + + #[cfg(feature = "quirk_body_fld_param_nil_value")] + { + alt(( + string, + map(nil, |_| { + IString::try_from("").expect("empty string is valid IString") + }), + ))(input) + } +} + /// ```abnf /// body-fld-param = "(" /// string SP string @@ -223,7 +245,10 @@ pub(crate) fn body_fld_param(input: &[u8]) -> IMAPResult<&[u8], Vec<(IString, IS // Quirk: See https://github.com/emersion/go-imap/issues/557 separated_list0( sp, - map(tuple((string, sp, string)), |(key, _, value)| (key, value)), + map( + tuple((string, sp, body_fld_param_value)), + |(key, _, value)| (key, value), + ), ), tag(b")"), ), @@ -644,6 +669,21 @@ mod tests { let _ = body(8)(str::repeat("(", 1_000_000).as_bytes()); } + /// Mail.ru `BODYSTRUCTURE`: `multipart/mixed` whose first part is nested `multipart/alternative`. + #[test] + fn test_parse_mailru_nested_multipart_body() { + // Three `(` after `BODYSTRUCTURE `: mixed wrapper, alternative wrapper, first text part. + let s = concat!( + "(", + "((\"text\" \"plain\" (\"charset\" \"utf-8\") NIL NIL \"base64\" 1628 0 NIL NIL NIL NIL)", + "(\"text\" \"html\" (\"charset\" \"utf-8\") NIL NIL \"quoted-printable\" 21021 0 NIL NIL NIL NIL) ", + "\"alternative\" (\"boundary\" NIL)) ", + "\"mixed\" (\"boundary\" NIL)", + ")" + ); + body(8)(s.as_bytes()).expect("Mail.ru nested multipart"); + } + #[test] fn test_parse_body_ext_mpart() { for test in [ diff --git a/imap-codec/src/core.rs b/imap-codec/src/core.rs index fa33a11f..8672ce21 100644 --- a/imap-codec/src/core.rs +++ b/imap-codec/src/core.rs @@ -87,25 +87,33 @@ pub(crate) fn string(input: &[u8]) -> IMAPResult<&[u8], IString> { /// This function only allocates a new String, when needed, i.e. when /// quoted chars need to be replaced. pub(crate) fn quoted(input: &[u8]) -> IMAPResult<&[u8], Quoted> { - let mut parser = tuple(( - dquote, + // RFC 3501: quoted = DQUOTE *QUOTED-CHAR DQUOTE — zero characters between quotes is valid. + // Mail.ru (and others) emit `""` in envelope addresses. + let mut parser = alt(( + map(tuple((dquote, dquote)), |_| unescape_quoted("")), map( - escaped( - take_while1(is_any_text_char_except_quoted_specials), - '\\', - one_of("\\\""), - ), - // # Safety - // - // `unwrap` is safe because val contains ASCII-only characters. - |val| from_utf8(val).unwrap(), + tuple(( + dquote, + map( + escaped( + take_while1(is_any_text_char_except_quoted_specials), + '\\', + one_of("\\\""), + ), + // # Safety + // + // `unwrap` is safe because val contains ASCII-only characters. + |val| unescape_quoted(from_utf8(val).unwrap()), + ), + dquote, + )), + |(_, cow, _)| cow, ), - dquote, )); - let (remaining, (_, quoted, _)) = parser(input)?; + let (remaining, cow) = parser(input)?; - Ok((remaining, Quoted::unvalidated(unescape_quoted(quoted)))) + Ok((remaining, Quoted::unvalidated(cow))) } /// `QUOTED-CHAR = / "\" quoted-specials` @@ -360,6 +368,11 @@ mod tests { // ... or this (Hello "World")? assert_eq!(val, Quoted::try_from("Hello \"World\"").unwrap()); + // RFC 3501: quoted = DQUOTE *QUOTED-CHAR DQUOTE (zero chars allowed; Mail.ru uses "") + let (rem, val) = quoted(br#""""#).unwrap(); + assert_eq!(rem, b""); + assert_eq!(val, Quoted::try_from("").unwrap()); + // Test Incomplete assert!(matches!(quoted(br#""#), Err(nom::Err::Incomplete(_)))); assert!(matches!(quoted(br#""\"#), Err(nom::Err::Incomplete(_)))); diff --git a/imap-codec/src/fetch.rs b/imap-codec/src/fetch.rs index 93dc7ec2..b35fbd0f 100644 --- a/imap-codec/src/fetch.rs +++ b/imap-codec/src/fetch.rs @@ -359,7 +359,11 @@ mod tests { }; use super::*; - use crate::testing::known_answer_test_encode; + use crate::{ + body::body, + envelope::envelope, + testing::{known_answer_test_encode, trim_line_end}, + }; #[test] fn test_encode_message_data_item_name() { @@ -542,4 +546,45 @@ mod tests { known_answer_test_encode(test) } } + + #[test] + fn mailru_fixture_bodystructure_only_parses() { + static LINE: &[u8] = include_bytes!("../tests/fixtures_mailru_fetch_2829.bin"); + let line = trim_line_end(LINE); + let key = b"BODYSTRUCTURE "; + let i = line + .windows(key.len()) + .position(|w| w == key) + .expect("BODYSTRUCTURE"); + let rest = &line[i + key.len()..]; + let r = body(8)(rest); + assert!(r.is_ok(), "body: {r:?}"); + let (rem, _) = r.unwrap(); + assert_eq!( + rem, + b")", + "after body: {:?}", + core::str::from_utf8(rem).ok() + ); + } + + #[test] + fn mailru_fixture_envelope_only_parses() { + static LINE: &[u8] = include_bytes!("../tests/fixtures_mailru_fetch_2829.bin"); + let line = trim_line_end(LINE); + let key = b"ENVELOPE "; + let i = line + .windows(key.len()) + .position(|w| w == key) + .expect("ENVELOPE"); + let rest = &line[i + key.len()..]; + let r = envelope(rest); + assert!(r.is_ok(), "envelope: {r:?}"); + let (rem, _) = r.unwrap(); + assert!( + rem.starts_with(b" BODYSTRUCTURE"), + "after envelope: {:?}", + core::str::from_utf8(rem).unwrap_or("non-utf8") + ); + } } diff --git a/imap-codec/src/response.rs b/imap-codec/src/response.rs index 2d88b1a7..cdffc91e 100644 --- a/imap-codec/src/response.rs +++ b/imap-codec/src/response.rs @@ -810,4 +810,18 @@ mod tests { #[cfg(feature = "quirk_trailing_space_capability")] assert!(response_data(b"* CAPABILITY IMAP4REV1 \r\n").is_ok()); } + + /// Real Mail.ru `FETCH` line (see `tests/mailru_fetch_repro.rs`). + #[test] + fn test_parse_mailru_fetch_fixture() { + static RAW: &[u8] = include_bytes!("../tests/fixtures_mailru_fetch_2829.bin"); + use crate::testing::normalize_imap_line_crlf; + let line = normalize_imap_line_crlf(RAW); + let parsed = response(line.as_ref()); + if let Err(ref e) = parsed { + eprintln!("{e:?}"); + } + let (rem, _) = parsed.unwrap(); + assert!(rem.is_empty()); + } } diff --git a/imap-codec/src/testing.rs b/imap-codec/src/testing.rs index c8f883b0..e7f89218 100644 --- a/imap-codec/src/testing.rs +++ b/imap-codec/src/testing.rs @@ -80,6 +80,30 @@ macro_rules! impl_kat_inverse { }; } +/// Strip `\r\n` or `\n` from the end of an included wire capture. +/// +/// Git may normalize line endings on checkout unless the file is marked `binary` in +/// `.gitattributes`. Substring tests that slice before `BODYSTRUCTURE` / `ENVELOPE` must not use +/// a fixed `len - 2` chop, which breaks when the terminator is a single `\n`. +pub(crate) fn trim_line_end(bytes: &[u8]) -> &[u8] { + bytes + .strip_suffix(b"\r\n") + .or_else(|| bytes.strip_suffix(b"\n")) + .unwrap_or(bytes) +} + +/// Ensure a wire line ends with `\r\n` for parsers that use strict `CRLF` (default features). +/// +/// Git often stores/transcodes captures as LF-only; `include_bytes!` then lacks `\r`. +pub(crate) fn normalize_imap_line_crlf(bytes: &[u8]) -> std::borrow::Cow<'_, [u8]> { + if bytes.ends_with(b"\r\n") { + return std::borrow::Cow::Borrowed(bytes); + } + let mut v = trim_line_end(bytes).to_vec(); + v.extend_from_slice(b"\r\n"); + std::borrow::Cow::Owned(v) +} + impl_kat_inverse! {kat_inverse_greeting, GreetingCodec, Greeting} impl_kat_inverse! {kat_inverse_command, CommandCodec, Command} impl_kat_inverse! {kat_inverse_response, ResponseCodec, Response} diff --git a/imap-codec/tests/fixtures_mailru_fetch_2829.bin b/imap-codec/tests/fixtures_mailru_fetch_2829.bin new file mode 100644 index 00000000..39351fa6 --- /dev/null +++ b/imap-codec/tests/fixtures_mailru_fetch_2829.bin @@ -0,0 +1 @@ +* 2829 FETCH (UID 186458 FLAGS () ENVELOPE ("Wed, 25 Mar 2026 14:00:21 +0000" "=?utf-8?B?0JjQvdCy0LXQvdGC0LDRgNC40LfQsNGG0LjRjyDRhtC10L3RgtGA0LDQu9GM0L3QvtCz0L4g0YHQutC70LDQtNCwIDI4LjAz?=" (("=?utf-8?B?0K7RgNC60LDRgQ==?=" NIL "info" "yurkas.by")) NIL ((NIL NIL "info" "yurkas.by")) (("" NIL "supron-drev" "mail.ru")) NIL NIL NIL "") BODYSTRUCTURE ((("text" "plain" ("charset" "utf-8") NIL NIL "base64" 1628 0 NIL NIL NIL NIL)("text" "html" ("charset" "utf-8") NIL NIL "quoted-printable" 21021 0 NIL NIL NIL NIL) "alternative" ("boundary" NIL)) "mixed" ("boundary" NIL))) diff --git a/imap-codec/tests/mailru_fetch_repro.rs b/imap-codec/tests/mailru_fetch_repro.rs new file mode 100644 index 00000000..c115502f --- /dev/null +++ b/imap-codec/tests/mailru_fetch_repro.rs @@ -0,0 +1,43 @@ +//! Regression: Mail.ru FETCH lines must parse (Himalaya envelope list). +use imap_codec::{ResponseCodec, decode::Decoder}; + +/// `include_bytes!` may yield LF-only after Git checkout; IMAP uses CRLF. +fn normalize_mailru_fixture_crlf(raw: &[u8]) -> Vec { + let payload = raw + .strip_suffix(b"\r\n") + .or_else(|| raw.strip_suffix(b"\n")) + .unwrap_or(raw); + let mut v = payload.to_vec(); + v.extend_from_slice(b"\r\n"); + v +} + +#[test] +fn mailru_minimal_fetch_empty_address_name_parses() { + // Minimal repro: empty quoted display name in To (Mail.ru). + let line = concat!( + "* 1 FETCH (UID 1 FLAGS () ", + "ENVELOPE (NIL NIL NIL NIL NIL ", + "((\"\" NIL \"user\" \"mail.ru\")) ", + "NIL NIL NIL NIL))\r\n" + ); + let (rem, _) = ResponseCodec::default().decode(line.as_bytes()).unwrap(); + assert!(rem.is_empty()); +} + +#[test] +fn mailru_fetch_with_mixed_alternative_bodystructure_parses() { + // Captured from a real Mail.ru IMAP `FETCH (UID FLAGS ENVELOPE BODYSTRUCTURE)` line. + static RAW: &[u8] = include_bytes!("fixtures_mailru_fetch_2829.bin"); + let line = normalize_mailru_fixture_crlf(RAW); + let decoded = ResponseCodec::default().decode(line.as_slice()); + if let Err(e) = &decoded { + eprintln!("decode error: {e:?}"); + } + let (rem, _resp) = decoded.expect("Mail.ru FETCH should parse"); + assert!( + rem.is_empty(), + "unexpected trailing bytes: {:?}", + String::from_utf8_lossy(rem) + ); +} diff --git a/justfile b/justfile index f2a507e4..df578cc9 100644 --- a/justfile +++ b/justfile @@ -72,7 +72,8 @@ cargo_hack mode: install_cargo_hack quirk_trailing_space_status,\ quirk_spaces_between_addresses,\ quirk_empty_continue_req,\ - quirk_body_fld_enc_nil_to_empty\ + quirk_body_fld_enc_nil_to_empty,\ + quirk_body_fld_param_nil_value\ {{ mode }} cargo hack check -p imap-types \ --no-dev-deps \