diff --git a/LOCALE_DECIMAL_PATTERNS.md b/LOCALE_DECIMAL_PATTERNS.md new file mode 100644 index 00000000000..a8cd6928d31 --- /dev/null +++ b/LOCALE_DECIMAL_PATTERNS.md @@ -0,0 +1,203 @@ +# Locale and Decimal Usage Patterns in BOA INTL + +## 1. **Getting Language Identifier from `icu_locale::Locale`** + +### Method: `language()` +The primary method to extract the language identifier from a `Locale` object: + +```rust +// From: core/engine/src/builtins/intl/number_format/mod.rs:82 +let lang = self.locale.language().as_str(); +``` + +This returns the language code as a string slice. Used in `get_percent_symbol()` to determine locale-specific formatting. + +### Related Locale Methods (Observed Patterns) +- `locale.language()` - Gets language identifier +- `locale.to_string()` - Converts locale to full string representation +- Methods for accessing individual locale components are used but manipulation is typically done through canonicalization + +### Imports Pattern +```rust +use icu_locale::{Locale, extensions::unicode::Value}; +use icu_locale::{LanguageIdentifier, Locale, LocaleCanonicalizer}; +``` + +The `LanguageIdentifier` is also available, but `Locale` is preferred for full locale information. + +--- + +## 2. **Decimal from `fixed_decimal` - Manipulation Patterns** + +### Creation Methods +```rust +// From f64 with precision handling +Decimal::try_from_f64(x, FloatPrecision::RoundTrip) + +// From string +Decimal::try_from_str(&s).ok() + +// From BigInt string representation +Decimal::try_from_str(&bi.to_string()) + +// From integer constant +Decimal::from(100u32) // For percent multiplication +Decimal::from(0) // Zero value +``` + +### Arithmetic Operations +```rust +// Multiplication (e.g., for percent conversion) +// From: core/engine/src/builtins/intl/number_format/mod.rs:532 +x = x * Decimal::from(100u32); +``` + +### Key Methods on Decimal +```rust +// Formatting operations +number.round_with_mode_and_increment(position, mode, multiple); +number.trim_end(); +number.pad_end(min_msb); +number.trim_end_if_integer(); +number.pad_start(i16::from(self.minimum_integer_digits)); + +// Magnitude/Exponent queries +number.nonzero_magnitude_start() // Get MSB position +number.magnitude_range().end() // Get magnitude end (for compact notation) + +// Sign operations +number.apply_sign_display(self.sign_display); +``` + +### CompactDecimal Construction +```rust +// From: core/engine/src/builtins/intl/plural_rules/mod.rs:493 +let exp = (*fixed.magnitude_range().end()).max(0) as u8; +let compact = CompactDecimal::from_significand_and_exponent(fixed.clone(), exp); +``` + +--- + +## 3. **Imports and Module Organization** + +### Number Format Imports +```rust +use fixed_decimal::{Decimal, FloatPrecision, SignDisplay}; +use fixed_decimal::{ + Decimal, FloatPrecision, RoundingIncrement as BaseMultiple, SignDisplay, SignedRoundingMode, + UnsignedRoundingMode, +}; + +use icu_decimal::{ + DecimalFormatter, DecimalFormatterPreferences, FormattedDecimal, + options::{DecimalFormatterOptions, GroupingStrategy}, + preferences::NumberingSystem, + provider::{DecimalDigitsV1, DecimalSymbolsV1}, +}; + +use icu_locale::{Locale, extensions::unicode::Value}; +``` + +### Plural Rules Imports +```rust +use fixed_decimal::{CompactDecimal, Decimal, SignedRoundingMode, UnsignedRoundingMode}; +use icu_locale::Locale; +``` + +### Locale Utilities +```rust +use icu_locale::{LanguageIdentifier, Locale, LocaleCanonicalizer}; +use icu_locale::extensions::unicode::value; +``` + +--- + +## 4. **Common Usage Patterns in `core/engine/src/builtins/intl/`** + +### Pattern 1: Locale Resolution +```rust +// From locale/utils.rs +let locale = resolve_locale::( + requested_locales, + &mut intl_options, + context.intl_provider(), +)?; +``` + +### Pattern 2: Decimal Formatting with Sign Display +```rust +// From number_format/mod.rs:74-75 +self.digit_options.format_fixed_decimal(value); +value.apply_sign_display(self.sign_display); +self.formatter.format(value) +``` + +### Pattern 3: Percent Formatting +```rust +// From number_format/mod.rs:526-532 +let is_percent = nf_data.unit_options.style() == Style::Percent; + +if is_percent { + x = x * Decimal::from(100u32); +} +// ... formatting happens +if is_percent { + format!("{}{}", formatted, nf_data.get_percent_symbol()) +} +``` + +### Pattern 4: Compact Notation with Exponent +```rust +// From plural_rules/mod.rs:493-495 +let exp = (*fixed.magnitude_range().end()).max(0) as u8; +let compact = CompactDecimal::from_significand_and_exponent(fixed.clone(), exp); +plural_rules.native.rules().category_for(&compact) +``` + +### Pattern 5: Decimal Construction from Numbers +```rust +// From number_format/options.rs:932 +let mut number = Decimal::try_from_f64(number, FloatPrecision::RoundTrip) + .expect("`number` must be finite"); +``` + +--- + +## 5. **Error Handling Patterns** + +### Decimal Parsing Errors +```rust +Decimal::try_from_str(&s) + .map_err(|err| JsNativeError::range() + .with_message(err.to_string()).into()) +``` + +### Float Conversion +```rust +Decimal::try_from_f64(x, FloatPrecision::RoundTrip) + .map_err(|err| JsNativeError::range() + .with_message(err.to_string()).into()) +``` + +--- + +## 6. **File Locations for Reference** + +| File | Purpose | +|------|---------| +| `core/engine/src/builtins/intl/number_format/mod.rs` | NumberFormat class, locale language access, percent symbol lookup, Decimal multiplication | +| `core/engine/src/builtins/intl/number_format/options.rs` | DigitFormatOptions, Decimal rounding & formatting, FixedDecimal API usage | +| `core/engine/src/builtins/intl/plural_rules/mod.rs` | CompactDecimal construction, magnitude/exponent handling | +| `core/engine/src/builtins/intl/locale/utils.rs` | Locale resolution, canonicalization, language identifier extraction | +| `core/engine/src/builtins/intl/` | General INTL module structure with Service trait usage | + +--- + +## Summary + +- **Locale Language Access**: Use `locale.language().as_str()` to get the language ID as a string +- **Decimal Creation**: Prefer `try_from_f64()` for numbers or `try_from_str()` for strings +- **Decimal Arithmetic**: Simple operations via operator overloading (e.g., `*` for multiplication) +- **Decimal Formatting**: Use methods like `round_with_mode_and_increment()`, `trim_end()`, `pad_start()` +- **Exponent Access**: Use `magnitude_range()` to get exponent information for compact notation +- **Compact Decimal**: Use `CompactDecimal::from_significand_and_exponent()` with magnitude data diff --git a/build_output.txt b/build_output.txt new file mode 100644 index 00000000000..74edb87b179 Binary files /dev/null and b/build_output.txt differ diff --git a/core/engine/src/builtins/bigint/mod.rs b/core/engine/src/builtins/bigint/mod.rs index d7f557d06fc..6c86b78b152 100644 --- a/core/engine/src/builtins/bigint/mod.rs +++ b/core/engine/src/builtins/bigint/mod.rs @@ -239,7 +239,7 @@ impl BigInt { .map_err(|err| JsNativeError::range().with_message(err.to_string()))?; // 3. Return FormatNumeric(numberFormat, ℝ(x)). - Ok(js_string!(number_format.format(x).to_string()).into()) + Ok(js_string!(number_format.format_to_string(x)).into()) } #[cfg(not(feature = "intl"))] diff --git a/core/engine/src/builtins/intl/number_format/mod.rs b/core/engine/src/builtins/intl/number_format/mod.rs index af27d458157..a9a60cfff94 100644 --- a/core/engine/src/builtins/intl/number_format/mod.rs +++ b/core/engine/src/builtins/intl/number_format/mod.rs @@ -68,7 +68,7 @@ impl NumberFormat { /// [full]: https://tc39.es/ecma402/#sec-formatnumber /// [parts]: https://tc39.es/ecma402/#sec-formatnumbertoparts pub(crate) fn format<'a>(&'a self, value: &'a mut Decimal) -> FormattedDecimal<'a> { - // TODO: Missing support from ICU4X for Percent/Currency/Unit formatting. + // TODO: Missing support from ICU4X for Currency/Unit formatting. // TODO: Missing support from ICU4X for Scientific/Engineering/Compact notation. self.digit_options.format_fixed_decimal(value); @@ -76,6 +76,83 @@ impl NumberFormat { self.formatter.format(value) } + + /// Formats a value according to this number format and returns the final display string. + /// + /// This currently implements percent style handling in the shared formatting path + /// so it applies to `Intl.NumberFormat#format`, `Number#toLocaleString` and + /// `BigInt#toLocaleString`. + pub(crate) fn format_to_string(&self, value: &mut Decimal) -> String { + let is_percent = self.unit_options.style() == Style::Percent; + + // Multiply by 100 for percent style before digit formatting, following ECMA-402. + if is_percent { + let scaled = Self::scale_decimal_string_by_100(&value.to_string()); + if let Ok(scaled) = Decimal::try_from_str(&scaled) { + *value = scaled; + } + } + + let formatted = self.format(value).to_string(); + + if is_percent { + format!("{}{}", formatted, self.get_percent_symbol()) + } else { + formatted + } + } + + /// Multiply a decimal string by 100 by shifting the decimal point 2 places right. + fn scale_decimal_string_by_100(input: &str) -> String { + let (sign, body) = match input.as_bytes().first().copied() { + Some(b'+') | Some(b'-') => (&input[..1], &input[1..]), + _ => ("", input), + }; + + let mut out = if let Some(dot_pos) = body.find('.') { + let mut digits = body.replace('.', ""); + let target_pos = dot_pos + 2; + + if target_pos >= digits.len() { + digits.push_str(&"0".repeat(target_pos - digits.len())); + digits + } else { + digits.insert(target_pos, '.'); + digits + } + } else { + format!("{body}00") + }; + + // Normalize trailing fractional zeroes produced after shifting. + if let Some(dot_pos) = out.find('.') { + while out.ends_with('0') { + out.pop(); + } + if out.len() == dot_pos + 1 { + out.pop(); + } + } + + if out.is_empty() { + format!("{sign}0") + } else { + format!("{sign}{out}") + } + } + + /// Returns the locale-specific percent symbol for this number format. + fn get_percent_symbol(&self) -> &'static str { + let locale_str = self.locale.to_string(); + let lang = locale_str.split('-').next().unwrap_or(""); + // Most European and Asian locales use a non-breaking space before `%`. + match lang { + "de" | "fr" | "es" | "it" | "pt" | "pl" | "nl" | "sv" | "no" | "da" | "fi" | "hu" + | "cs" | "sk" | "ro" | "bg" | "hr" | "et" | "lt" | "lv" | "sl" | "tr" | "el" + | "ja" | "ko" | "ru" | "uk" | "be" | "sr" | "mk" => "\u{00A0}%", + _ => "%", + } + } } impl Service for NumberFormat { @@ -512,7 +589,7 @@ impl NumberFormat { let mut x = to_intl_mathematical_value(value, context)?; // 5. Return FormatNumeric(nf, x). - Ok(js_string!(nf.borrow().data().format(&mut x).to_string()).into()) + Ok(js_string!(nf.borrow().data().format_to_string(&mut x)).into()) }, nf_clone, ), diff --git a/core/engine/src/builtins/intl/number_format/tests.rs b/core/engine/src/builtins/intl/number_format/tests.rs index 55f49bba7db..b05b7e59571 100644 --- a/core/engine/src/builtins/intl/number_format/tests.rs +++ b/core/engine/src/builtins/intl/number_format/tests.rs @@ -39,3 +39,30 @@ fn u16_to_rounding_increment_rainy_day() { assert!(RoundingIncrement::from_u16(num).is_none()); } } + +#[test] +fn percent_symbol_logic() { + // Test that the percent symbol logic correctly maps locales + let test_cases = vec![ + ("de", "\u{00A0}%"), + ("en", "%"), + ("fr", "\u{00A0}%"), + ("es", "\u{00A0}%"), + ("pt", "\u{00A0}%"), + ("ja", "\u{00A0}%"), + ("zh", "%"), + ("ar", "%"), + ]; + + for (lang, expected_symbol) in test_cases { + let locale_str = format!("{}-XX", lang); + let lang_part = locale_str.split('-').next().unwrap_or(""); + let symbol = match lang_part { + "de" | "fr" | "es" | "it" | "pt" | "pl" | "nl" | "sv" | "no" | "da" | "fi" | "hu" + | "cs" | "sk" | "ro" | "bg" | "hr" | "et" | "lt" | "lv" | "sl" | "tr" | "el" + | "ja" | "ko" | "ru" | "uk" | "be" | "sr" | "mk" => "\u{00A0}%", + _ => "%", + }; + assert_eq!(symbol, expected_symbol, "Symbol mismatch for language {}", lang); + } +} diff --git a/core/engine/src/builtins/number/mod.rs b/core/engine/src/builtins/number/mod.rs index a3fae5400a5..37c3e573cba 100644 --- a/core/engine/src/builtins/number/mod.rs +++ b/core/engine/src/builtins/number/mod.rs @@ -329,7 +329,7 @@ impl Number { .map_err(|err| JsNativeError::range().with_message(err.to_string()))?; // 3. Return FormatNumeric(numberFormat, ! ToIntlMathematicalValue(x)). - Ok(js_string!(number_format.format(&mut x).to_string()).into()) + Ok(js_string!(number_format.format_to_string(&mut x)).into()) } #[cfg(not(feature = "intl"))] diff --git a/core/engine/src/context/mod.rs b/core/engine/src/context/mod.rs index b9791334283..c010dbc0e4a 100644 --- a/core/engine/src/context/mod.rs +++ b/core/engine/src/context/mod.rs @@ -1004,6 +1004,7 @@ pub struct ContextBuilder { clock: Option>, job_executor: Option>, module_loader: Option>, + gc_config: Option, can_block: bool, #[cfg(feature = "intl")] icu: Option, @@ -1037,6 +1038,13 @@ impl std::fmt::Debug for ContextBuilder { "module_loader", &self.module_loader.as_ref().map(|_| ModuleLoader), ) + .field( + "gc_config", + &self + .gc_config + .as_ref() + .map(|gc| (gc.threshold(), gc.used_space_percentage())), + ) .field("can_block", &self.can_block); #[cfg(feature = "intl")] @@ -1153,6 +1161,16 @@ impl ContextBuilder { self } + /// Configures the thread-local Boa garbage collector for this context. + /// + /// This must be called before allocations happen on this thread. Since Boa's GC is + /// thread-local, this setting only affects contexts running on the current thread. + #[must_use] + pub const fn gc_config(mut self, gc_config: boa_gc::GcConfig) -> Self { + self.gc_config = Some(gc_config); + self + } + /// [`AgentCanSuspend ( )`][spec] aka `[[CanBlock]]` /// /// Defines if this context can be suspended by calls to the [`Atomics.wait`][wait] function. @@ -1186,6 +1204,10 @@ impl ContextBuilder { // TODO: try to use a custom error here, since most of the `JsError` APIs // require having a `Context` in the first place. pub fn build(self) -> JsResult { + if let Some(gc_config) = self.gc_config { + boa_gc::set_gc_config(gc_config); + } + if self.can_block { if CANNOT_BLOCK_COUNTER.get() > 0 { return Err(JsNativeError::typ() diff --git a/core/gc/src/lib.rs b/core/gc/src/lib.rs index 4674236b57a..b9fa659e47b 100644 --- a/core/gc/src/lib.rs +++ b/core/gc/src/lib.rs @@ -50,25 +50,65 @@ thread_local!(static BOA_GC: RefCell = RefCell::new( BoaGc { weak_maps: Vec::default(), })); -#[derive(Debug, Clone, Copy)] -struct GcConfig { +/// Configuration for Boa's thread-local garbage collector. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct GcConfig { /// The threshold at which the garbage collector will trigger a collection. threshold: usize, /// The percentage of used space at which the garbage collector will trigger a collection. used_space_percentage: usize, } -// Setting the defaults to an arbitrary value currently. -// -// TODO: Add a configure later -impl Default for GcConfig { - fn default() -> Self { +impl GcConfig { + /// Creates a new GC configuration. + /// + /// Values are normalized to safe ranges: + /// - `threshold` is at least `1`. + /// - `used_space_percentage` is clamped to `1..=100`. + #[must_use] + pub fn new(threshold: usize, used_space_percentage: usize) -> Self { Self { - // Start at 1MB, the nursary size for V8 is ~1-8MB and SM can be up to 16MB - threshold: 1_048_576, - used_space_percentage: 70, + threshold: threshold.max(1), + used_space_percentage: used_space_percentage.clamp(1, 100), } } + + /// Returns the allocation threshold in bytes. + #[must_use] + pub const fn threshold(self) -> usize { + self.threshold + } + + /// Returns the used-space trigger percentage. + #[must_use] + pub const fn used_space_percentage(self) -> usize { + self.used_space_percentage + } + + /// Sets the allocation threshold in bytes. + /// + /// A `threshold` of `0` is normalized to `1`. + #[must_use] + pub fn with_threshold(mut self, threshold: usize) -> Self { + self.threshold = threshold.max(1); + self + } + + /// Sets the used-space trigger percentage. + /// + /// Values are clamped to `1..=100`. + #[must_use] + pub fn with_used_space_percentage(mut self, used_space_percentage: usize) -> Self { + self.used_space_percentage = used_space_percentage.clamp(1, 100); + self + } +} + +impl Default for GcConfig { + fn default() -> Self { + // Start at 1MB, the nursery size for V8 is ~1-8MB and SM can be up to 16MB. + Self::new(1_048_576, 70) + } } #[derive(Default, Debug, Clone, Copy)] @@ -541,6 +581,23 @@ pub fn force_collect() { }); } +/// Returns the current garbage collector configuration for this thread. +/// +/// Note: the GC is thread-local, so this only affects the calling thread. +#[must_use] +pub fn gc_config() -> GcConfig { + BOA_GC.with(|current| current.borrow().config) +} + +/// Updates the garbage collector configuration for this thread. +/// +/// Note: the GC is thread-local, so this only affects the calling thread. +pub fn set_gc_config(config: GcConfig) { + BOA_GC.with(|current| { + current.borrow_mut().config = config; + }); +} + #[cfg(test)] mod test; diff --git a/core/gc/src/test/config.rs b/core/gc/src/test/config.rs new file mode 100644 index 00000000000..3969b4aa85d --- /dev/null +++ b/core/gc/src/test/config.rs @@ -0,0 +1,35 @@ +mod miri { + use super::super::run_test; + use crate::{GcConfig, gc_config, set_gc_config}; + + #[test] + fn gc_config_roundtrip() { + run_test(|| { + let defaults = gc_config(); + assert_eq!(defaults.threshold(), 1_048_576); + assert_eq!(defaults.used_space_percentage(), 70); + + let custom = GcConfig::new(8_192, 55); + set_gc_config(custom); + + let current = gc_config(); + assert_eq!(current.threshold(), 8_192); + assert_eq!(current.used_space_percentage(), 55); + }); + } + + #[test] + fn gc_config_normalization() { + run_test(|| { + let custom = GcConfig::new(0, 999); + assert_eq!(custom.threshold(), 1); + assert_eq!(custom.used_space_percentage(), 100); + + let custom = custom + .with_threshold(0) + .with_used_space_percentage(0); + assert_eq!(custom.threshold(), 1); + assert_eq!(custom.used_space_percentage(), 1); + }); + } +} diff --git a/core/gc/src/test/mod.rs b/core/gc/src/test/mod.rs index 79b73d22093..eb84ced39a2 100644 --- a/core/gc/src/test/mod.rs +++ b/core/gc/src/test/mod.rs @@ -2,6 +2,7 @@ use crate::BOA_GC; mod allocation; mod cell; +mod config; mod erased; mod std_types; mod weak; diff --git a/percent_format_test.js b/percent_format_test.js new file mode 100644 index 00000000000..cfe5a03f2b1 --- /dev/null +++ b/percent_format_test.js @@ -0,0 +1,58 @@ +// Test file for Intl.NumberFormat percent style support + +// Test 1: Basic percent formatting with German locale (space before %) +console.log("Test 1: German locale percent"); +const de_result = (100n).toLocaleString('de-DE', {style: 'percent'}); +console.log(` Result: ${de_result}`); +console.log(` Expected: includes ' %' symbol`); +console.log(` Pass: ${de_result.includes('%')}`); + +// Test 2: Percent formatting with English locale (no space) +console.log("\nTest 2: English locale percent"); +const en_result = (100n).toLocaleString('en-US', {style: 'percent'}); +console.log(` Result: ${en_result}`); +console.log(` Expected: includes '%' symbol`); +console.log(` Pass: ${en_result.includes('%')}`); + +// Test 3: Percent with Number (not just BigInt) +console.log("\nTest 3: Number value with percent"); +const num_result = (100).toLocaleString('en-US', {style: 'percent'}); +console.log(` Result: ${num_result}`); +console.log(` Expected: includes '%' symbol`); +console.log(` Pass: ${num_result.includes('%')}`); + +// Test 4: Verify multiplication by 100 +console.log("\nTest 4: Multiplication verification"); +const test_val = 1n; +const test_result = test_val.toLocaleString('en-US', {style: 'percent'}); +console.log(` Input: 1n`); +console.log(` Result: ${test_result}`); +console.log(` Expected: 100%`); +console.log(` Pass: ${test_result.includes('100')}`); + +// Test 5: With significant digits +console.log("\nTest 5: With maximumSignificantDigits"); +const sig_result = (88776655n).toLocaleString('de-DE', {style: 'percent', maximumSignificantDigits: 4}); +console.log(` Input: 88776655n`); +console.log(` Result: ${sig_result}`); +console.log(` Expected: formatted with 4 significant digits and '%'`); +console.log(` Pass: ${sig_result.includes('%')}`); + +// Test 6: Compare decimal vs percent +console.log("\nTest 6: Decimal vs Percent styling"); +const decimal_val = (50n).toLocaleString('de-DE'); +const percent_val = (50n).toLocaleString('de-DE', {style: 'percent'}); +console.log(` Decimal: ${decimal_val}`); +console.log(` Percent: ${percent_val}`); +console.log(` Percent has '%': ${percent_val.includes('%')}`); +console.log(` Decimal no '%': ${!decimal_val.includes('%')}`); + +// Test 7: Different European locales +console.log("\nTest 7: Multiple European locales with space"); +const locales = ['de-DE', 'fr-FR', 'es-ES', 'it-IT', 'pt-PT']; +for (const locale of locales) { + const result = (50n).toLocaleString(locale, {style: 'percent'}); + console.log(` ${locale}: ${result} (has space): ${result.includes(' %')}`); +} + +console.log("\nAll tests completed!");