Skip to content

Commit 3b91466

Browse files
Implement locale-aware 'n' format specifier for int, float, complex (#7350)
* Implement locale-aware 'n' format specifier for int, float, complex Add LocaleInfo struct and locale-aware formatting methods to FormatSpec. The 'n' format type now reads thousands_sep, decimal_point, and grouping from C localeconv() and applies proper locale-based number grouping. Remove @unittest.skip from test_format.test_locale. * Fix complex 'n' format and remove locale expectedFailure markers Rewrite format_complex_locale to reuse format_complex_re_im, matching formatter_unicode.c: add_parens=0 and skip_re=0 for 'n' type. Remove @expectedfailure from test_float__format__locale and test_int__format__locale in test_types.py. * Auto-format: cargo fmt --all --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
1 parent 7620c28 commit 3b91466

File tree

7 files changed

+281
-9
lines changed

7 files changed

+281
-9
lines changed

Lib/test/test_format.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -423,7 +423,6 @@ def test_non_ascii(self):
423423
self.assertEqual(format(1+2j, "\u2007^8"), "\u2007(1+2j)\u2007")
424424
self.assertEqual(format(0j, "\u2007^4"), "\u20070j\u2007")
425425

426-
@unittest.skip("TODO: RUSTPYTHON; formatting does not support locales. See https://github.com/RustPython/RustPython/issues/5181")
427426
def test_locale(self):
428427
try:
429428
oldloc = locale.setlocale(locale.LC_ALL)

Lib/test/test_types.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -431,7 +431,6 @@ def test(i, format_spec, result):
431431
test(123456, "1=20", '11111111111111123456')
432432
test(123456, "*=20", '**************123456')
433433

434-
@unittest.expectedFailure # TODO: RUSTPYTHON; + 1234.57
435434
@run_with_locale('LC_NUMERIC', 'en_US.UTF8', '')
436435
def test_float__format__locale(self):
437436
# test locale support for __format__ code 'n'
@@ -441,7 +440,6 @@ def test_float__format__locale(self):
441440
self.assertEqual(locale.format_string('%g', x, grouping=True), format(x, 'n'))
442441
self.assertEqual(locale.format_string('%.10g', x, grouping=True), format(x, '.10n'))
443442

444-
@unittest.expectedFailure # TODO: RUSTPYTHON; + 123456789012345678901234567890
445443
@run_with_locale('LC_NUMERIC', 'en_US.UTF8', '')
446444
def test_int__format__locale(self):
447445
# test locale support for __format__ code 'n' for integers

crates/common/src/format.rs

Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,19 @@ use rustpython_literal::format::Case;
1212

1313
use crate::wtf8::{CodePoint, Wtf8, Wtf8Buf};
1414

15+
/// Locale information for 'n' format specifier.
16+
/// Contains thousands separator, decimal point, and grouping pattern
17+
/// from the C library's `localeconv()`.
18+
#[derive(Clone, Debug)]
19+
pub struct LocaleInfo {
20+
pub thousands_sep: String,
21+
pub decimal_point: String,
22+
/// Grouping pattern from `lconv.grouping`.
23+
/// Each element is a group size. The last non-zero element repeats.
24+
/// e.g. `[3, 0]` means groups of 3 repeating forever.
25+
pub grouping: Vec<u8>,
26+
}
27+
1528
trait FormatParse {
1629
fn parse(text: &Wtf8) -> (Option<Self>, &Wtf8)
1730
where
@@ -460,6 +473,189 @@ impl FormatSpec {
460473
}
461474
}
462475

476+
/// Returns true if this format spec uses the locale-aware 'n' format type.
477+
pub fn has_locale_format(&self) -> bool {
478+
matches!(self.format_type, Some(FormatType::Number(Case::Lower)))
479+
}
480+
481+
/// Insert locale-aware thousands separators into an integer string.
482+
/// Follows CPython's GroupGenerator logic for variable-width grouping.
483+
fn insert_locale_grouping(int_part: &str, locale: &LocaleInfo) -> String {
484+
if locale.grouping.is_empty() || locale.thousands_sep.is_empty() || int_part.len() <= 1 {
485+
return int_part.to_string();
486+
}
487+
488+
let mut group_idx = 0;
489+
let mut group_size = locale.grouping[0] as usize;
490+
491+
if group_size == 0 {
492+
return int_part.to_string();
493+
}
494+
495+
// Collect groups of digits from right to left
496+
let len = int_part.len();
497+
let mut groups: Vec<&str> = Vec::new();
498+
let mut pos = len;
499+
500+
loop {
501+
if pos <= group_size {
502+
groups.push(&int_part[..pos]);
503+
break;
504+
}
505+
506+
groups.push(&int_part[pos - group_size..pos]);
507+
pos -= group_size;
508+
509+
// Advance to next group size
510+
if group_idx + 1 < locale.grouping.len() {
511+
let next = locale.grouping[group_idx + 1] as usize;
512+
if next != 0 {
513+
group_size = next;
514+
group_idx += 1;
515+
}
516+
// 0 means repeat previous group size forever
517+
}
518+
}
519+
520+
// Groups were collected right-to-left, reverse to get left-to-right
521+
groups.reverse();
522+
groups.join(&locale.thousands_sep)
523+
}
524+
525+
/// Apply locale-aware grouping and decimal point replacement to a formatted number.
526+
fn apply_locale_formatting(magnitude_str: String, locale: &LocaleInfo) -> String {
527+
let mut parts = magnitude_str.splitn(2, '.');
528+
let int_part = parts.next().unwrap();
529+
let grouped = Self::insert_locale_grouping(int_part, locale);
530+
531+
if let Some(frac_part) = parts.next() {
532+
format!("{grouped}{}{frac_part}", locale.decimal_point)
533+
} else {
534+
grouped
535+
}
536+
}
537+
538+
/// Format an integer with locale-aware 'n' format.
539+
pub fn format_int_locale(
540+
&self,
541+
num: &BigInt,
542+
locale: &LocaleInfo,
543+
) -> Result<String, FormatSpecError> {
544+
self.validate_format(FormatType::Decimal)?;
545+
let magnitude = num.abs();
546+
547+
let raw_magnitude_str = match self.format_type {
548+
Some(FormatType::Number(Case::Lower)) => self.format_int_radix(magnitude, 10),
549+
_ => return self.format_int(num),
550+
}?;
551+
552+
let magnitude_str = Self::apply_locale_formatting(raw_magnitude_str, locale);
553+
554+
let format_sign = self.sign.unwrap_or(FormatSign::Minus);
555+
let sign_str = match num.sign() {
556+
Sign::Minus => "-",
557+
_ => match format_sign {
558+
FormatSign::Plus => "+",
559+
FormatSign::Minus => "",
560+
FormatSign::MinusOrSpace => " ",
561+
},
562+
};
563+
564+
self.format_sign_and_align(&AsciiStr::new(&magnitude_str), sign_str, FormatAlign::Right)
565+
}
566+
567+
/// Format a float with locale-aware 'n' format.
568+
pub fn format_float_locale(
569+
&self,
570+
num: f64,
571+
locale: &LocaleInfo,
572+
) -> Result<String, FormatSpecError> {
573+
self.validate_format(FormatType::FixedPoint(Case::Lower))?;
574+
let precision = self.precision.unwrap_or(6);
575+
let magnitude = num.abs();
576+
577+
let raw_magnitude_str = match &self.format_type {
578+
Some(FormatType::Number(case)) => {
579+
let precision = if precision == 0 { 1 } else { precision };
580+
Ok(float::format_general(
581+
precision,
582+
magnitude,
583+
*case,
584+
self.alternate_form,
585+
false,
586+
))
587+
}
588+
_ => return self.format_float(num),
589+
}?;
590+
591+
let magnitude_str = Self::apply_locale_formatting(raw_magnitude_str, locale);
592+
593+
let format_sign = self.sign.unwrap_or(FormatSign::Minus);
594+
let sign_str = if num.is_sign_negative() && !num.is_nan() {
595+
"-"
596+
} else {
597+
match format_sign {
598+
FormatSign::Plus => "+",
599+
FormatSign::Minus => "",
600+
FormatSign::MinusOrSpace => " ",
601+
}
602+
};
603+
604+
self.format_sign_and_align(&AsciiStr::new(&magnitude_str), sign_str, FormatAlign::Right)
605+
}
606+
607+
/// Format a complex number with locale-aware 'n' format.
608+
pub fn format_complex_locale(
609+
&self,
610+
num: &Complex64,
611+
locale: &LocaleInfo,
612+
) -> Result<String, FormatSpecError> {
613+
// Reuse format_complex_re_im with 'g' type to get the base formatted parts,
614+
// then apply locale grouping. This matches CPython's format_complex_internal:
615+
// 'n' → 'g', add_parens=0, skip_re=0.
616+
let locale_spec = FormatSpec {
617+
format_type: Some(FormatType::GeneralFormat(Case::Lower)),
618+
..*self
619+
};
620+
let (formatted_re, formatted_im) = locale_spec.format_complex_re_im(num)?;
621+
622+
// Apply locale grouping to both parts
623+
let grouped_re = if formatted_re.is_empty() {
624+
formatted_re
625+
} else {
626+
// Split sign from magnitude, apply grouping, recombine
627+
let (sign, mag) = if formatted_re.starts_with('-')
628+
|| formatted_re.starts_with('+')
629+
|| formatted_re.starts_with(' ')
630+
{
631+
formatted_re.split_at(1)
632+
} else {
633+
("", formatted_re.as_str())
634+
};
635+
format!(
636+
"{sign}{}",
637+
Self::apply_locale_formatting(mag.to_string(), locale)
638+
)
639+
};
640+
641+
// formatted_im is like "+1234j" or "-1234j" or "1234j"
642+
// Split sign, magnitude, and 'j' suffix
643+
let im_str = &formatted_im;
644+
let (im_sign, im_rest) = if im_str.starts_with('+') || im_str.starts_with('-') {
645+
im_str.split_at(1)
646+
} else {
647+
("", im_str.as_str())
648+
};
649+
let im_mag = im_rest.strip_suffix('j').unwrap_or(im_rest);
650+
let im_grouped = Self::apply_locale_formatting(im_mag.to_string(), locale);
651+
let grouped_im = format!("{im_sign}{im_grouped}j");
652+
653+
// No parentheses for 'n' format (CPython: add_parens=0)
654+
let magnitude_str = format!("{grouped_re}{grouped_im}");
655+
656+
self.format_sign_and_align(&AsciiStr::new(&magnitude_str), "", FormatAlign::Right)
657+
}
658+
463659
pub fn format_bool(&self, input: bool) -> Result<String, FormatSpecError> {
464660
let x = u8::from(input);
465661
match &self.format_type {

crates/vm/src/builtins/complex.rs

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -321,8 +321,15 @@ impl PyComplex {
321321
if spec.is_empty() {
322322
return Ok(zelf.as_object().str(vm)?.as_wtf8().to_owned());
323323
}
324-
FormatSpec::parse(spec.as_str())
325-
.and_then(|format_spec| format_spec.format_complex(&zelf.value))
324+
let format_spec =
325+
FormatSpec::parse(spec.as_str()).map_err(|err| err.into_pyexception(vm))?;
326+
let result = if format_spec.has_locale_format() {
327+
let locale = crate::format::get_locale_info();
328+
format_spec.format_complex_locale(&zelf.value, &locale)
329+
} else {
330+
format_spec.format_complex(&zelf.value)
331+
};
332+
result
326333
.map(Wtf8Buf::from_string)
327334
.map_err(|err| err.into_pyexception(vm))
328335
}

crates/vm/src/builtins/float.rs

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -259,8 +259,15 @@ impl PyFloat {
259259
if spec.is_empty() {
260260
return Ok(zelf.as_object().str(vm)?.as_wtf8().to_owned());
261261
}
262-
FormatSpec::parse(spec.as_str())
263-
.and_then(|format_spec| format_spec.format_float(zelf.value))
262+
let format_spec =
263+
FormatSpec::parse(spec.as_str()).map_err(|err| err.into_pyexception(vm))?;
264+
let result = if format_spec.has_locale_format() {
265+
let locale = crate::format::get_locale_info();
266+
format_spec.format_float_locale(zelf.value, &locale)
267+
} else {
268+
format_spec.format_float(zelf.value)
269+
};
270+
result
264271
.map(Wtf8Buf::from_string)
265272
.map_err(|err| err.into_pyexception(vm))
266273
}

crates/vm/src/builtins/int.rs

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -499,8 +499,15 @@ impl PyInt {
499499
if spec.is_empty() && !zelf.class().is(vm.ctx.types.int_type) {
500500
return Ok(zelf.as_object().str(vm)?.as_wtf8().to_owned());
501501
}
502-
FormatSpec::parse(spec.as_str())
503-
.and_then(|format_spec| format_spec.format_int(&zelf.value))
502+
let format_spec =
503+
FormatSpec::parse(spec.as_str()).map_err(|err| err.into_pyexception(vm))?;
504+
let result = if format_spec.has_locale_format() {
505+
let locale = crate::format::get_locale_info();
506+
format_spec.format_int_locale(&zelf.value, &locale)
507+
} else {
508+
format_spec.format_int(&zelf.value)
509+
};
510+
result
504511
.map(Wtf8Buf::from_string)
505512
.map_err(|err| err.into_pyexception(vm))
506513
}

crates/vm/src/format.rs

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,64 @@ use crate::{
99
use crate::common::format::*;
1010
use crate::common::wtf8::{Wtf8, Wtf8Buf};
1111

12+
/// Get locale information from C `localeconv()` for the 'n' format specifier.
13+
#[cfg(unix)]
14+
pub(crate) fn get_locale_info() -> LocaleInfo {
15+
use core::ffi::CStr;
16+
unsafe {
17+
let lc = libc::localeconv();
18+
if lc.is_null() {
19+
return LocaleInfo {
20+
thousands_sep: String::new(),
21+
decimal_point: ".".to_string(),
22+
grouping: vec![],
23+
};
24+
}
25+
let thousands_sep = CStr::from_ptr((*lc).thousands_sep)
26+
.to_string_lossy()
27+
.into_owned();
28+
let decimal_point = CStr::from_ptr((*lc).decimal_point)
29+
.to_string_lossy()
30+
.into_owned();
31+
let grouping = parse_grouping((*lc).grouping);
32+
LocaleInfo {
33+
thousands_sep,
34+
decimal_point,
35+
grouping,
36+
}
37+
}
38+
}
39+
40+
#[cfg(not(unix))]
41+
pub(crate) fn get_locale_info() -> LocaleInfo {
42+
LocaleInfo {
43+
thousands_sep: String::new(),
44+
decimal_point: ".".to_string(),
45+
grouping: vec![],
46+
}
47+
}
48+
49+
/// Parse C `lconv.grouping` into a `Vec<u8>`.
50+
/// Reads bytes until 0 or CHAR_MAX, then appends 0 (meaning "repeat last group").
51+
#[cfg(unix)]
52+
unsafe fn parse_grouping(grouping: *const libc::c_char) -> Vec<u8> {
53+
let mut result = Vec::new();
54+
if grouping.is_null() {
55+
return result;
56+
}
57+
unsafe {
58+
let mut ptr = grouping;
59+
while ![0, libc::c_char::MAX].contains(&*ptr) {
60+
result.push(*ptr as u8);
61+
ptr = ptr.add(1);
62+
}
63+
}
64+
if !result.is_empty() {
65+
result.push(0);
66+
}
67+
result
68+
}
69+
1270
impl IntoPyException for FormatSpecError {
1371
fn into_pyexception(self, vm: &VirtualMachine) -> PyBaseExceptionRef {
1472
match self {

0 commit comments

Comments
 (0)