diff -r 4b64a049f451 Python/formatter_unicode.c --- a/Python/formatter_unicode.c Fri Aug 19 12:00:13 2016 +0300 +++ b/Python/formatter_unicode.c Fri Aug 19 23:33:16 2016 +1000 @@ -32,11 +32,11 @@ { if (presentation_type > 32 && presentation_type < 128) PyErr_Format(PyExc_ValueError, - "Cannot specify ',' with '%c'.", + "Cannot specify ',' or '_' with '%c'.", (char)presentation_type); else PyErr_Format(PyExc_ValueError, - "Cannot specify ',' with '\\x%x'.", + "Cannot specify ',' or '_' with '\\x%x'.", (unsigned int)presentation_type); } @@ -105,6 +105,12 @@ } } +/* Locale type codes. LT_NO_LOCALE must be zero. */ +#define LT_NO_LOCALE 0 +#define LT_DEFAULT_LOCALE 1 +#define LT_UNDERSCORE_LOCALE 2 +#define LT_UNDER_FOUR_LOCALE 3 +#define LT_CURRENT_LOCALE 4 typedef struct { Py_UCS4 fill_char; @@ -218,7 +224,13 @@ /* Comma signifies add thousands separators */ if (end-pos && READ_spec(pos) == ',') { - format->thousands_separators = 1; + format->thousands_separators = LT_DEFAULT_LOCALE; + ++pos; + } + + /* Underscore signifies add thousands separators */ + if (end-pos && READ_spec(pos) == '_') { + format->thousands_separators = LT_UNDERSCORE_LOCALE; ++pos; } @@ -270,6 +282,15 @@ case '\0': /* These are allowed. See PEP 378.*/ break; + case 'b': + case 'o': + case 'x': + /* Underscores are allowed in bin/oct/hex. See PEP 515. */ + if (format->thousands_separators == LT_UNDERSCORE_LOCALE) { + /* Every four digits, not every three, in bin/oct/hex. */ + format->thousands_separators = LT_UNDER_FOUR_LOCALE; + break; + } default: invalid_comma_type(format->type); return 0; @@ -346,11 +367,6 @@ /*********** common routines for numeric formatting *********************/ /************************************************************************/ -/* Locale type codes. */ -#define LT_CURRENT_LOCALE 0 -#define LT_DEFAULT_LOCALE 1 -#define LT_NO_LOCALE 2 - /* Locale info needed for formatting integers and the part of floats before and including the decimal. Note that locales only support 8-bit chars, not unicode. */ @@ -660,8 +676,8 @@ /* Find the decimal point character(s?), thousands_separator(s?), and grouping description, either for the current locale if type is - LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE, or - none if LT_NO_LOCALE. */ + LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE or + LT_UNDERSCORE_LOCALE/LT_UNDER_FOUR_LOCALE, or none if LT_NO_LOCALE. */ static int get_locale_info(int type, LocaleInfo *locale_info) { @@ -684,16 +700,22 @@ break; } case LT_DEFAULT_LOCALE: + case LT_UNDERSCORE_LOCALE: + case LT_UNDER_FOUR_LOCALE: locale_info->decimal_point = PyUnicode_FromOrdinal('.'); - locale_info->thousands_sep = PyUnicode_FromOrdinal(','); + locale_info->thousands_sep = PyUnicode_FromOrdinal( + type == LT_DEFAULT_LOCALE ? ',' : '_'); if (!locale_info->decimal_point || !locale_info->thousands_sep) { Py_XDECREF(locale_info->decimal_point); Py_XDECREF(locale_info->thousands_sep); return -1; } - locale_info->grouping = "\3"; /* Group every 3 characters. The + if (type != LT_UNDER_FOUR_LOCALE) + locale_info->grouping = "\3"; /* Group every 3 characters. The (implicit) trailing 0 means repeat infinitely. */ + else + locale_info->grouping = "\4"; /* Bin/oct/hex group every four. */ break; case LT_NO_LOCALE: locale_info->decimal_point = PyUnicode_FromOrdinal('.'); @@ -945,9 +967,7 @@ /* Determine the grouping, separator, and decimal point, if any. */ if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE : - (format->thousands_separators ? - LT_DEFAULT_LOCALE : - LT_NO_LOCALE), + format->thousands_separators, &locale) == -1) goto done; @@ -1092,9 +1112,7 @@ /* Determine the grouping, separator, and decimal point, if any. */ if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE : - (format->thousands_separators ? - LT_DEFAULT_LOCALE : - LT_NO_LOCALE), + format->thousands_separators, &locale) == -1) goto done; @@ -1270,9 +1288,7 @@ /* Determine the grouping, separator, and decimal point, if any. */ if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE : - (format->thousands_separators ? - LT_DEFAULT_LOCALE : - LT_NO_LOCALE), + format->thousands_separators, &locale) == -1) goto done;