diff --git a/compiler/rustc_ast/src/format.rs b/compiler/rustc_ast/src/format.rs index cadebb2254d95..43621f75df556 100644 --- a/compiler/rustc_ast/src/format.rs +++ b/compiler/rustc_ast/src/format.rs @@ -77,14 +77,14 @@ pub struct FormatArguments { arguments: Vec, num_unnamed_args: usize, num_explicit_args: usize, - names: FxHashMap, + explicit_names: FxHashMap, } impl FormatArguments { pub fn new() -> Self { Self { arguments: Vec::new(), - names: FxHashMap::default(), + explicit_names: FxHashMap::default(), num_unnamed_args: 0, num_explicit_args: 0, } @@ -92,12 +92,20 @@ impl FormatArguments { pub fn add(&mut self, arg: FormatArgument) -> usize { let index = self.arguments.len(); - if let Some(name) = arg.kind.ident() { - self.names.insert(name.name, index); - } else if self.names.is_empty() { - // Only count the unnamed args before the first named arg. - // (Any later ones are errors.) - self.num_unnamed_args += 1; + match arg.kind { + FormatArgumentKind::Normal => { + // Only count the unnamed args before the first named arg. + // (Any later ones are errors.) + if self.explicit_names.is_empty() { + self.num_unnamed_args += 1; + } + } + FormatArgumentKind::Named(ident) => { + self.explicit_names.insert(ident.name, index); + } + FormatArgumentKind::Captured(_) => { + // Don't record the name yet, to keep duplicate captures until AST->HIR lowering. + } } if !matches!(arg.kind, FormatArgumentKind::Captured(..)) { // This is an explicit argument. @@ -113,8 +121,8 @@ impl FormatArguments { index } - pub fn by_name(&self, name: Symbol) -> Option<(usize, &FormatArgument)> { - let i = *self.names.get(&name)?; + pub fn by_explicit_name(&self, name: Symbol) -> Option<(usize, &FormatArgument)> { + let i = *self.explicit_names.get(&name)?; Some((i, &self.arguments[i])) } diff --git a/compiler/rustc_ast_lowering/src/format.rs b/compiler/rustc_ast_lowering/src/format.rs index 1f1f86f7edfd7..a463e95fa9d8a 100644 --- a/compiler/rustc_ast_lowering/src/format.rs +++ b/compiler/rustc_ast_lowering/src/format.rs @@ -1,14 +1,105 @@ use std::borrow::Cow; use rustc_ast::*; -use rustc_data_structures::fx::FxIndexMap; -use rustc_hir as hir; +use rustc_data_structures::fx::{FxHashMap, FxIndexMap}; +use rustc_hir::def::{DefKind, Res}; +use rustc_hir::def_id::LOCAL_CRATE; +use rustc_hir::{self as hir}; use rustc_session::config::FmtDebug; +use rustc_session::lint; use rustc_span::{ByteSymbol, DesugaringKind, Ident, Span, Symbol, sym}; use super::LoweringContext; use crate::ResolverAstLoweringExt; +/// Collect statistics about a `FormatArgs` for crater analysis. +fn collect_format_args_stats(fmt: &FormatArgs) -> FormatArgsStats { + let mut pieces = 0usize; + let mut placeholders = 0usize; + let mut width_args = 0usize; + let mut precision_args = 0usize; + + for piece in &fmt.template { + match piece { + FormatArgsPiece::Literal(_) => pieces += 1, + FormatArgsPiece::Placeholder(ph) => { + placeholders += 1; + if let Some(FormatCount::Argument(_)) = &ph.format_options.width { + width_args += 1; + } + if let Some(FormatCount::Argument(_)) = &ph.format_options.precision { + precision_args += 1; + } + } + } + } + + let mut positional = 0usize; + let mut named = 0usize; + let mut captured = 0usize; + let mut captured_names: FxHashMap = FxHashMap::default(); + + for arg in fmt.arguments.all_args() { + match &arg.kind { + FormatArgumentKind::Normal => positional += 1, + FormatArgumentKind::Named(_) => named += 1, + FormatArgumentKind::Captured(ident) => { + captured += 1; + *captured_names.entry(ident.name).or_default() += 1; + } + } + } + + let unique_captures = captured_names.len(); + let duplicate_captures = captured - unique_captures; + + // Count how many captures are duplicated 2x, 3x, 4x+. + let mut dup_2x = 0usize; + let mut dup_3x = 0usize; + let mut dup_4plus = 0usize; + #[allow(rustc::potential_query_instability)] + for (_name, count) in &captured_names { + match *count { + 0 | 1 => {} + 2 => dup_2x += 1, + 3 => dup_3x += 1, + _ => dup_4plus += 1, + } + } + + FormatArgsStats { + pieces, + placeholders, + width_args, + precision_args, + total_args: positional + named + captured, + positional, + named, + captured, + unique_captures, + duplicate_captures, + dup_2x, + dup_3x, + dup_4plus, + } +} + +struct FormatArgsStats { + pieces: usize, + placeholders: usize, + width_args: usize, + precision_args: usize, + total_args: usize, + positional: usize, + named: usize, + captured: usize, + unique_captures: usize, + duplicate_captures: usize, + dup_2x: usize, + dup_3x: usize, + dup_4plus: usize, +} + impl<'hir, R: ResolverAstLoweringExt<'hir>> LoweringContext<'_, 'hir, R> { pub(crate) fn lower_format_args(&mut self, sp: Span, fmt: &FormatArgs) -> hir::ExprKind<'hir> { // Never call the const constructor of `fmt::Arguments` if the @@ -26,9 +117,168 @@ impl<'hir, R: ResolverAstLoweringExt<'hir>> LoweringContext<'_, 'hir, R> { fmt = flatten_format_args(fmt); fmt = self.inline_literals(fmt); } + + // --- Crater instrumentation --- + // Only emit for the root crate. Cargo passes + // `--cap-lints allow` when compiling dependencies, + // so we use that signal to skip them -- just as + // cargo suppresses dependency warnings. + let before = (self.tcx.sess.opts.lint_cap + != Some(lint::Allow)) + .then(|| collect_format_args_stats(&fmt)); + + fmt = self.dedup_captured_places(fmt); + + if let Some(before) = before { + let args_after_dedup = + fmt.arguments.all_args().len(); + let deduped_by_opt = before + .total_args + .saturating_sub(args_after_dedup); + let not_deduped = + before + .duplicate_captures + .saturating_sub(deduped_by_opt); + + // Classify remaining duplicated captures by + // name resolution. + let (const_dups, constparam_dups, other_dups) = + self.classify_dup_captures(&fmt); + + let krate = + self.tcx.crate_name(LOCAL_CRATE); + + // The "old world" (current stable) arg count: + // all captures with the same name were + // deduplicated, so we count unique captures + // only. + let args_old_world = before.positional + + before.named + + before.unique_captures; + // Size estimates (bytes, assuming 64-bit: + // rt::Argument = 16 bytes). + let size_no_dedup = before.total_args * 16; + let size_old_world = args_old_world * 16; + let size_with_opt = args_after_dedup * 16; + + // Source location for cross-target dedup. + let loc = { + let sm = self.tcx.sess.source_map(); + let pos = + sm.lookup_char_pos(fmt.span.lo()); + format!( + "{}:{}:{}", + pos.file + .name + .prefer_local_unconditionally(), + pos.line, + pos.col.0, + ) + }; + // Escape for JSON embedding. File paths on + // Linux rarely contain `\` or `"`, but a + // malformed loc would silently break JSON + // parsing for that record. + let loc = + loc.replace('\\', "\\\\") + .replace('"', "\\\""); + + eprintln!( + "[FMTARGS] {{\ + \"crate\":\"{krate}\",\ + \"loc\":\"{loc}\",\ + \"p\":{},\"ph\":{},\"wa\":{},\"pa\":{},\ + \"a\":{},\"pos\":{},\"named\":{},\ + \"cap\":{},\ + \"ucap\":{},\"dup\":{},\ + \"const\":{},\"constparam\":{},\ + \"other_dup\":{},\ + \"d2\":{},\"d3\":{},\"d4p\":{},\ + \"deduped\":{},\"remaining\":{},\ + \"a_old\":{},\"a_opt\":{},\ + \"sz_no_dedup\":{},\"sz_old\":{},\ + \"sz_opt\":{}\ + }}", + before.pieces, + before.placeholders, + before.width_args, + before.precision_args, + before.total_args, + before.positional, + before.named, + before.captured, + before.unique_captures, + before.duplicate_captures, + const_dups, + constparam_dups, + other_dups, + before.dup_2x, + before.dup_3x, + before.dup_4plus, + deduped_by_opt, + not_deduped, + args_old_world, + args_after_dedup, + size_no_dedup, + size_old_world, + size_with_opt, + ); + } + expand_format_args(self, sp, &fmt, allow_const) } + /// Classify duplicated captured arguments by their name + /// resolution. Returns (const_count, constparam_count, + /// other_count) counting the number of *extra* captures + /// (beyond the first) in each category. + fn classify_dup_captures(&self, fmt: &FormatArgs) -> (usize, usize, usize) { + let mut seen: FxHashMap = FxHashMap::default(); + let mut const_dups = 0usize; + let mut constparam_dups = 0usize; + let mut other_dups = 0usize; + + for arg in fmt.arguments.all_args() { + if let FormatArgumentKind::Captured(ident) = &arg.kind { + let count = seen.entry(ident.name).or_default(); + *count += 1; + if *count == 2 { + // First duplicate -- classify by resolution. + // (Subsequent duplicates of the same name + // are already counted by the dup_2x/3x/4p + // fields.) + if let Some(partial_res) = + self.resolver.get_partial_res(arg.expr.id) + && let Some(res) = partial_res.full_res() + { + match res { + Res::Local(_) + | Res::Def( + DefKind::Static { .. }, + _, + ) => { + // Places -- handled by + // dedup_captured_places. + } + Res::Def( + DefKind::Const { .. } + | DefKind::AssocConst { .. }, + _, + ) => const_dups += 1, + Res::Def( + DefKind::ConstParam, _ + ) => constparam_dups += 1, + _ => other_dups += 1, + } + } else { + other_dups += 1; + } + } + } + } + (const_dups, constparam_dups, other_dups) + } + /// Try to convert a literal into an interned string fn try_inline_lit(&self, lit: token::Lit) -> Option { match LitKind::from_token_lit(lit) { @@ -138,6 +388,80 @@ impl<'hir, R: ResolverAstLoweringExt<'hir>> LoweringContext<'_, 'hir, R> { fmt } + + /// De-duplicate implicit captures of identifiers that refer to places. + /// + /// Turns + /// + /// `format_args!("Hello, {hello}, {hello}!")` + /// + /// into + /// + /// `format_args!("Hello, {hello}, {hello}!", hello=hello)`. + fn dedup_captured_places<'fmt>(&self, mut fmt: Cow<'fmt, FormatArgs>) -> Cow<'fmt, FormatArgs> { + use std::collections::hash_map::Entry; + + let mut deduped_arg_indices: FxHashMap = FxHashMap::default(); + let mut remove = vec![false; fmt.arguments.all_args().len()]; + let mut deduped_anything = false; + + // Re-use arguments for placeholders capturing the same local/static identifier. + for i in 0..fmt.template.len() { + if let FormatArgsPiece::Placeholder(placeholder) = &fmt.template[i] + && let Ok(arg_index) = placeholder.argument.index + && let arg = &fmt.arguments.all_args()[arg_index] + && let FormatArgumentKind::Captured(ident) = arg.kind + { + match deduped_arg_indices.entry(ident.name) { + Entry::Occupied(occupied_entry) => { + // We've seen this identifier before, and it's dedupable. Point the + // placeholder at the recorded arg index, cloning `fmt` if necessary. + let piece = &mut fmt.to_mut().template[i]; + let FormatArgsPiece::Placeholder(placeholder) = piece else { + unreachable!(); + }; + placeholder.argument.index = Ok(*occupied_entry.get()); + remove[arg_index] = true; + deduped_anything = true; + } + Entry::Vacant(vacant_entry) => { + // This is the first time we've seen a captured identifier. If it's a local + // or static, note the argument index so other occurrences can be deduped. + if let Some(partial_res) = self.resolver.get_partial_res(arg.expr.id) + && let Some(res) = partial_res.full_res() + && matches!(res, Res::Local(_) | Res::Def(DefKind::Static { .. }, _)) + { + vacant_entry.insert(arg_index); + } + } + } + } + } + + // Remove the arguments that were de-duplicated. + if deduped_anything { + let fmt = fmt.to_mut(); + + // Drop all the arguments that are marked for removal. + let mut remove_it = remove.iter(); + fmt.arguments.all_args_mut().retain(|_| remove_it.next() != Some(&true)); + + // Calculate the mapping of old to new indexes for the remaining arguments. + let index_map: Vec = remove + .into_iter() + .scan(0, |i, remove| { + let mapped = *i; + *i += !remove as usize; + Some(mapped) + }) + .collect(); + + // Correct the indexes that refer to arguments that have shifted position. + for_all_argument_indexes(&mut fmt.template, |index| *index = index_map[*index]); + } + + fmt + } } /// Flattens nested `format_args!()` into one. diff --git a/compiler/rustc_builtin_macros/src/format.rs b/compiler/rustc_builtin_macros/src/format.rs index f47dae5eba00a..2f157876b755a 100644 --- a/compiler/rustc_builtin_macros/src/format.rs +++ b/compiler/rustc_builtin_macros/src/format.rs @@ -121,7 +121,7 @@ fn parse_args<'a>(ecx: &ExtCtxt<'a>, sp: Span, tts: TokenStream) -> PResult<'a, p.bump(); p.expect(exp!(Eq))?; let expr = p.parse_expr()?; - if let Some((_, prev)) = args.by_name(ident.name) { + if let Some((_, prev)) = args.by_explicit_name(ident.name) { ecx.dcx().emit_err(errors::FormatDuplicateArg { span: ident.span, prev: prev.kind.ident().unwrap().span, @@ -396,12 +396,11 @@ fn make_format_args( } Name(name, span) => { let name = Symbol::intern(name); - if let Some((index, _)) = args.by_name(name) { + if let Some((index, _)) = args.by_explicit_name(name) { // Name found in `args`, so we resolve it to its index. - if index < args.explicit_args().len() { - // Mark it as used, if it was an explicit argument. - used[index] = true; - } + assert!(index < args.explicit_args().len()); + // Mark it as used, as this is an explicit argument. + used[index] = true; Ok(index) } else { // Name not found in `args`, so we add it as an implicitly captured argument. diff --git a/tests/pretty/hir-format-args-duplicated-captures.pp b/tests/pretty/hir-format-args-duplicated-captures.pp new file mode 100644 index 0000000000000..a744afe2f87eb --- /dev/null +++ b/tests/pretty/hir-format-args-duplicated-captures.pp @@ -0,0 +1,81 @@ +//! Test for : identifiers referring to places +//! should have their captures de-duplicated by `format_args!`, but identifiers referring to values +//! should not be de-duplicated. +extern crate std; +#[prelude_import] +use ::std::prelude::rust_2015::*; +//@ pretty-compare-only +//@ pretty-mode:hir +//@ pp-exact:hir-format-args-duplicated-captures.pp + +const X: i32 = 42; + +struct Struct; + +static STATIC: i32 = 0; + +fn main() { + // Consts and constructors refer to values. Whether we construct them multiple times or just + // once can be observed in some cases, so we don't de-duplicate them. + let _ = + { + super let args = (&X, &X); + super let args = + [format_argument::new_display(args.0), + format_argument::new_display(args.1)]; + unsafe { format_arguments::new(b"\xc0\x01 \xc0\x00", &args) } + }; + let _ = + { + super let args = (&Struct, &Struct); + super let args = + [format_argument::new_display(args.0), + format_argument::new_display(args.1)]; + unsafe { format_arguments::new(b"\xc0\x01 \xc0\x00", &args) } + }; + + // Variables and statics refer to places. We can de-duplicate without an observable difference. + let x = 3; + let _ = + { + super let args = (&STATIC,); + super let args = [format_argument::new_display(args.0)]; + unsafe { + format_arguments::new(b"\xc0\x01 \xc8\x00\x00\x00", &args) + } + }; + let _ = + { + super let args = (&x,); + super let args = [format_argument::new_display(args.0)]; + unsafe { + format_arguments::new(b"\xc0\x01 \xc8\x00\x00\x00", &args) + } + }; + + // We don't de-duplicate widths or precisions since de-duplication can be observed. + let _ = + { + super let args = (&x, &x, &x); + super let args = + [format_argument::new_display(args.0), + format_argument::from_usize(args.1), + format_argument::from_usize(args.2)]; + unsafe { + format_arguments::new(b"\xd3 \x00\x00h\x01\x00\x01 \xdb \x00\x00h\x02\x00\x00\x00\x00", + &args) + } + }; + let _ = + { + super let args = (&0.0, &x, &x); + super let args = + [format_argument::new_display(args.0), + format_argument::from_usize(args.1), + format_argument::from_usize(args.2)]; + unsafe { + format_arguments::new(b"\xe5 \x00\x00p\x01\x00\x01 \xed \x00\x00p\x02\x00\x00\x00\x00", + &args) + } + }; +} diff --git a/tests/pretty/hir-format-args-duplicated-captures.rs b/tests/pretty/hir-format-args-duplicated-captures.rs new file mode 100644 index 0000000000000..926120cd1af23 --- /dev/null +++ b/tests/pretty/hir-format-args-duplicated-captures.rs @@ -0,0 +1,28 @@ +//! Test for : identifiers referring to places +//! should have their captures de-duplicated by `format_args!`, but identifiers referring to values +//! should not be de-duplicated. +//@ pretty-compare-only +//@ pretty-mode:hir +//@ pp-exact:hir-format-args-duplicated-captures.pp + +const X: i32 = 42; + +struct Struct; + +static STATIC: i32 = 0; + +fn main() { + // Consts and constructors refer to values. Whether we construct them multiple times or just + // once can be observed in some cases, so we don't de-duplicate them. + let _ = format_args!("{X} {X}"); + let _ = format_args!("{Struct} {Struct}"); + + // Variables and statics refer to places. We can de-duplicate without an observable difference. + let x = 3; + let _ = format_args!("{STATIC} {STATIC}"); + let _ = format_args!("{x} {x}"); + + // We don't de-duplicate widths or precisions since de-duplication can be observed. + let _ = format_args!("{x:x$} {x:x$}"); + let _ = format_args!("{0:.x$} {0:.x$}", 0.0); +} diff --git a/tests/ui/deref/format-args-capture-deref-coercion.rs b/tests/ui/deref/format-args-capture-deref-coercion.rs new file mode 100644 index 0000000000000..c859bc3e1b0f5 --- /dev/null +++ b/tests/ui/deref/format-args-capture-deref-coercion.rs @@ -0,0 +1,37 @@ +//! Test for : width and precision arguments +//! implicitly captured by `format_args!` should behave as if they were written individually. +//@ run-pass +use std::ops::Deref; +use std::sync::atomic::{AtomicUsize, Ordering}; + +static DEREF_COUNTER: AtomicUsize = AtomicUsize::new(0); + +struct LogDeref; +impl Deref for LogDeref { + type Target = usize; + fn deref(&self) -> &usize { + if DEREF_COUNTER.fetch_add(1, Ordering::Relaxed).is_multiple_of(2) { + &2 + } else { + &3 + } + } +} + +fn main() { + assert_eq!(DEREF_COUNTER.load(Ordering::Relaxed), 0); + + let x = 0.0; + + let _ = format_args!("{x:LogDeref$} {x:LogDeref$}"); + // Increased by 2, as `&LogDeref` is coerced to a `&usize` twice. + assert_eq!(DEREF_COUNTER.load(Ordering::Relaxed), 2); + + let _ = format_args!("{x:.LogDeref$} {x:.LogDeref$}"); + // Increased by 2, as `&LogDeref` is coerced to a `&usize` twice. + assert_eq!(DEREF_COUNTER.load(Ordering::Relaxed), 4); + + let _ = format_args!("{x:LogDeref$} {x:.LogDeref$}"); + // Increased by 2, as `&LogDeref` is coerced to a `&usize` twice. + assert_eq!(DEREF_COUNTER.load(Ordering::Relaxed), 6); +} diff --git a/tests/ui/drop/format-args-duplicated-captured-const-drops.rs b/tests/ui/drop/format-args-duplicated-captured-const-drops.rs new file mode 100644 index 0000000000000..3748209ec4f0f --- /dev/null +++ b/tests/ui/drop/format-args-duplicated-captured-const-drops.rs @@ -0,0 +1,79 @@ +//! A regression test for . +//! `format_args!` should not deduplicate captured identifiers referring to values (e.g. consts and +//! constructors). They must be evaluated once per occurrence, whereas explicitly provided arguments +//! are evaluated only once, regardless of how many times they appear in the format string. +//@ run-pass + +use std::sync::atomic::{AtomicUsize, Ordering}; + +static DROP_COUNTER: AtomicUsize = AtomicUsize::new(0); + +#[derive(Debug)] +struct Foo; + +impl Drop for Foo { + fn drop(&mut self) { + DROP_COUNTER.fetch_add(1, Ordering::Relaxed); + } +} + +const FOO: Foo = Foo; + +fn main() { + assert_eq!(DROP_COUNTER.load(Ordering::Relaxed), 0); + + { + let _x = format_args!("{Foo:?}"); + } + assert_eq!(DROP_COUNTER.load(Ordering::Relaxed), 1); + { + let _x = format_args!("{FOO:?}"); + } + assert_eq!(DROP_COUNTER.load(Ordering::Relaxed), 2); + + { + let _x = format_args!("{Foo:?}{Foo:?}"); + } + // Increased by 2, as `Foo` is constructed for each captured `Foo`. + assert_eq!(DROP_COUNTER.load(Ordering::Relaxed), 4); + { + let _x = format_args!("{FOO:?}{FOO:?}"); + } + // Increased by 2, as `Foo` is constructed for each captured `FOO`. + assert_eq!(DROP_COUNTER.load(Ordering::Relaxed), 6); + + { + let _x = format_args!("{:?}{0:?}", Foo); + } + // Increased by 1, as `Foo` is constructed just once as an explicit argument. + assert_eq!(DROP_COUNTER.load(Ordering::Relaxed), 7); + { + let _x = format_args!("{:?}{0:?}", FOO); + } + // Increased by 1, as `Foo` is constructed just once as an explicit argument. + assert_eq!(DROP_COUNTER.load(Ordering::Relaxed), 8); + + { + let _x = format_args!("{foo:?}{foo:?}{bar:?}{Foo:?}{Foo:?}", foo = Foo, bar = Foo); + } + // Increased by 4, as `Foo` is constructed twice for captured `Foo`, and once for each + // `foo` and `bar`. + assert_eq!(DROP_COUNTER.load(Ordering::Relaxed), 12); + { + let _x = format_args!("{foo:?}{foo:?}{bar:?}{FOO:?}{FOO:?}", foo = FOO, bar = FOO); + } + // Increased by 4, as `Foo` is constructed twice for captured `FOO`, and once for each + // `foo` and `bar`. + assert_eq!(DROP_COUNTER.load(Ordering::Relaxed), 16); + + { + let _x = format_args!("{Foo:?}{Foo:?}", Foo = Foo); + } + // Increased by 1, as `Foo` is shadowed by an explicit argument. + assert_eq!(DROP_COUNTER.load(Ordering::Relaxed), 17); + { + let _x = format_args!("{FOO:?}{FOO:?}", FOO = FOO); + } + // Increased by 1, as `FOO` is shadowed by an explicit argument. + assert_eq!(DROP_COUNTER.load(Ordering::Relaxed), 18); +}