Skip to content

Commit 2611b0b

Browse files
authored
Merge pull request #17011 from mhvk/units-composite-unicode
Recognize units written with unicode symbols also in composites
2 parents 7352d2b + 467fa58 commit 2611b0b

8 files changed

Lines changed: 128 additions & 110 deletions

File tree

astropy/units/format/cds.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,8 +106,10 @@ def t_X(t): # multiplication for factor in front of unit
106106
r"[x×]"
107107
return t
108108

109+
# Most units are just combinations of letters with no numbers, but there
110+
# are a few special ones (\h is Planch constant) and three that end in 0.
109111
def t_UNIT(t):
110-
r"\%|°|\\h|((?!\d)\w)+"
112+
r"%|°|\\h|(a|eps|mu)0|((?!\d)\w)+"
111113
t.value = cls._get_unit(t)
112114
return t
113115

astropy/units/format/cds_lextab.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
_lexreflags = 32
1515
_lexliterals = ''
1616
_lexstateinfo = {'INITIAL': 'inclusive'}
17-
_lexstatere = {'INITIAL': [('(?P<t_UFLOAT>((\\d+\\.?\\d+)|(\\.\\d+))([eE][+-]?\\d+)?)|(?P<t_UINT>\\d+)|(?P<t_SIGN>[+-](?=\\d))|(?P<t_X>[x×])|(?P<t_UNIT>\\%|°|\\\\h|((?!\\d)\\w)+)|(?P<t_DIMENSIONLESS>---|-)|(?P<t_PRODUCT>\\.)|(?P<t_OPEN_PAREN>\\()|(?P<t_CLOSE_PAREN>\\))|(?P<t_OPEN_BRACKET>\\[)|(?P<t_CLOSE_BRACKET>\\])|(?P<t_DIVISION>/)', [None, ('t_UFLOAT', 'UFLOAT'), None, None, None, None, ('t_UINT', 'UINT'), ('t_SIGN', 'SIGN'), ('t_X', 'X'), ('t_UNIT', 'UNIT'), None, ('t_DIMENSIONLESS', 'DIMENSIONLESS'), (None, 'PRODUCT'), (None, 'OPEN_PAREN'), (None, 'CLOSE_PAREN'), (None, 'OPEN_BRACKET'), (None, 'CLOSE_BRACKET'), (None, 'DIVISION')])]}
17+
_lexstatere = {'INITIAL': [('(?P<t_UFLOAT>((\\d+\\.?\\d+)|(\\.\\d+))([eE][+-]?\\d+)?)|(?P<t_UINT>\\d+)|(?P<t_SIGN>[+-](?=\\d))|(?P<t_X>[x×])|(?P<t_UNIT>%|°|\\\\h|(a|eps|mu)0|((?!\\d)\\w)+)|(?P<t_DIMENSIONLESS>---|-)|(?P<t_PRODUCT>\\.)|(?P<t_OPEN_PAREN>\\()|(?P<t_CLOSE_PAREN>\\))|(?P<t_OPEN_BRACKET>\\[)|(?P<t_CLOSE_BRACKET>\\])|(?P<t_DIVISION>/)', [None, ('t_UFLOAT', 'UFLOAT'), None, None, None, None, ('t_UINT', 'UINT'), ('t_SIGN', 'SIGN'), ('t_X', 'X'), ('t_UNIT', 'UNIT'), None, None, ('t_DIMENSIONLESS', 'DIMENSIONLESS'), (None, 'PRODUCT'), (None, 'OPEN_PAREN'), (None, 'CLOSE_PAREN'), (None, 'OPEN_BRACKET'), (None, 'CLOSE_BRACKET'), (None, 'DIVISION')])]}
1818
_lexstateignore = {'INITIAL': ''}
1919
_lexstateerrorf = {'INITIAL': 't_error'}
2020
_lexstateeoff = {}

astropy/units/format/cds_parsetab.py

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -37,31 +37,31 @@
3737
del _lr_goto_items
3838
_lr_productions = [
3939
("S' -> main","S'",1,None,None,None),
40-
('main -> factor combined_units','main',2,'p_main','cds.py',147),
41-
('main -> combined_units','main',1,'p_main','cds.py',148),
42-
('main -> DIMENSIONLESS','main',1,'p_main','cds.py',149),
43-
('main -> OPEN_BRACKET combined_units CLOSE_BRACKET','main',3,'p_main','cds.py',150),
44-
('main -> OPEN_BRACKET DIMENSIONLESS CLOSE_BRACKET','main',3,'p_main','cds.py',151),
45-
('main -> factor','main',1,'p_main','cds.py',152),
46-
('combined_units -> product_of_units','combined_units',1,'p_combined_units','cds.py',166),
47-
('combined_units -> division_of_units','combined_units',1,'p_combined_units','cds.py',167),
48-
('product_of_units -> unit_expression PRODUCT combined_units','product_of_units',3,'p_product_of_units','cds.py',173),
49-
('product_of_units -> unit_expression','product_of_units',1,'p_product_of_units','cds.py',174),
50-
('division_of_units -> DIVISION unit_expression','division_of_units',2,'p_division_of_units','cds.py',183),
51-
('division_of_units -> combined_units DIVISION unit_expression','division_of_units',3,'p_division_of_units','cds.py',184),
52-
('unit_expression -> unit_with_power','unit_expression',1,'p_unit_expression','cds.py',193),
53-
('unit_expression -> OPEN_PAREN combined_units CLOSE_PAREN','unit_expression',3,'p_unit_expression','cds.py',194),
54-
('factor -> signed_float X UINT signed_int','factor',4,'p_factor','cds.py',203),
55-
('factor -> UINT X UINT signed_int','factor',4,'p_factor','cds.py',204),
56-
('factor -> UINT signed_int','factor',2,'p_factor','cds.py',205),
57-
('factor -> UINT','factor',1,'p_factor','cds.py',206),
58-
('factor -> signed_float','factor',1,'p_factor','cds.py',207),
59-
('unit_with_power -> UNIT numeric_power','unit_with_power',2,'p_unit_with_power','cds.py',222),
60-
('unit_with_power -> UNIT','unit_with_power',1,'p_unit_with_power','cds.py',223),
61-
('numeric_power -> sign UINT','numeric_power',2,'p_numeric_power','cds.py',232),
62-
('sign -> SIGN','sign',1,'p_sign','cds.py',238),
63-
('sign -> <empty>','sign',0,'p_sign','cds.py',239),
64-
('signed_int -> SIGN UINT','signed_int',2,'p_signed_int','cds.py',248),
65-
('signed_float -> sign UINT','signed_float',2,'p_signed_float','cds.py',254),
66-
('signed_float -> sign UFLOAT','signed_float',2,'p_signed_float','cds.py',255),
40+
('main -> factor combined_units','main',2,'p_main','cds.py',143),
41+
('main -> combined_units','main',1,'p_main','cds.py',144),
42+
('main -> DIMENSIONLESS','main',1,'p_main','cds.py',145),
43+
('main -> OPEN_BRACKET combined_units CLOSE_BRACKET','main',3,'p_main','cds.py',146),
44+
('main -> OPEN_BRACKET DIMENSIONLESS CLOSE_BRACKET','main',3,'p_main','cds.py',147),
45+
('main -> factor','main',1,'p_main','cds.py',148),
46+
('combined_units -> product_of_units','combined_units',1,'p_combined_units','cds.py',162),
47+
('combined_units -> division_of_units','combined_units',1,'p_combined_units','cds.py',163),
48+
('product_of_units -> unit_expression PRODUCT combined_units','product_of_units',3,'p_product_of_units','cds.py',169),
49+
('product_of_units -> unit_expression','product_of_units',1,'p_product_of_units','cds.py',170),
50+
('division_of_units -> DIVISION unit_expression','division_of_units',2,'p_division_of_units','cds.py',179),
51+
('division_of_units -> combined_units DIVISION unit_expression','division_of_units',3,'p_division_of_units','cds.py',180),
52+
('unit_expression -> unit_with_power','unit_expression',1,'p_unit_expression','cds.py',189),
53+
('unit_expression -> OPEN_PAREN combined_units CLOSE_PAREN','unit_expression',3,'p_unit_expression','cds.py',190),
54+
('factor -> signed_float X UINT signed_int','factor',4,'p_factor','cds.py',199),
55+
('factor -> UINT X UINT signed_int','factor',4,'p_factor','cds.py',200),
56+
('factor -> UINT signed_int','factor',2,'p_factor','cds.py',201),
57+
('factor -> UINT','factor',1,'p_factor','cds.py',202),
58+
('factor -> signed_float','factor',1,'p_factor','cds.py',203),
59+
('unit_with_power -> UNIT numeric_power','unit_with_power',2,'p_unit_with_power','cds.py',218),
60+
('unit_with_power -> UNIT','unit_with_power',1,'p_unit_with_power','cds.py',219),
61+
('numeric_power -> sign UINT','numeric_power',2,'p_numeric_power','cds.py',228),
62+
('sign -> SIGN','sign',1,'p_sign','cds.py',234),
63+
('sign -> <empty>','sign',0,'p_sign','cds.py',235),
64+
('signed_int -> SIGN UINT','signed_int',2,'p_signed_int','cds.py',244),
65+
('signed_float -> sign UINT','signed_float',2,'p_signed_float','cds.py',250),
66+
('signed_float -> sign UFLOAT','signed_float',2,'p_signed_float','cds.py',251),
6767
]

astropy/units/format/generic.py

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -110,8 +110,11 @@ def t_FUNCNAME(t):
110110
r"((sqrt)|(ln)|(exp)|(log)|(mag)|(dB)|(dex))(?=\ *\()"
111111
return t
112112

113+
# A possible unit is something that consists of characters not used
114+
# for anything else: no spaces, no digits, signs, periods, stars,
115+
# carets, parentheses or commas.
113116
def t_UNIT(t):
114-
"%|([YZEPTGMkhdcmu\N{MICRO SIGN}npfazy]?'((?!\\d)\\w)+')|((?!\\d)\\w)+"
117+
r"[^\s\d+\-\./\*\^\(\)\,]+"
115118
t.value = cls._get_unit(t)
116119
return t
117120

@@ -457,6 +460,8 @@ def _parse_unit(cls, s: str, detailed_exception: bool = True) -> UnitBase:
457460
elif not s.isascii():
458461
if s[0] == "\N{MICRO SIGN}":
459462
s = "u" + s[1:]
463+
elif s[0] == "°":
464+
s = "deg" if len(s) == 1 else "deg_" + s[1:]
460465
if s[-1] in cls._prefixable_unit_symbols:
461466
s = s[:-1] + cls._prefixable_unit_symbols[s[-1]]
462467
elif len(s) > 1 and s[-1] in cls._unit_suffix_symbols:
@@ -529,18 +534,11 @@ def _parse_unit(cls, s: str, detailed_exception: bool = True) -> UnitBase:
529534
_regex_superscript: ClassVar[Pattern[str]] = re.compile(
530535
f"[{_superscripts}]?[{_superscripts[2:]}]+"
531536
)
532-
_regex_deg: ClassVar[Pattern[str]] = re.compile("°([CF])?")
533537

534538
@classmethod
535539
def _convert_superscript(cls, m: Match[str]) -> str:
536540
return f"({m.group().translate(cls._superscript_translations)})"
537541

538-
@classmethod
539-
def _convert_deg(cls, m: Match[str]) -> str:
540-
if len(m.string) == 1:
541-
return "deg"
542-
return m.string.replace("°", "deg_")
543-
544542
@classmethod
545543
def parse(cls, s: str, debug: bool = False) -> UnitBase:
546544
if not isinstance(s, str):
@@ -559,8 +557,6 @@ def parse(cls, s: str, debug: bool = False) -> UnitBase:
559557
# Translate superscripts to parenthesized numbers; this ensures
560558
# that mixes of superscripts and regular numbers fail.
561559
s = cls._regex_superscript.sub(cls._convert_superscript, s)
562-
# Translate possible degrees.
563-
s = cls._regex_deg.sub(cls._convert_deg, s)
564560

565561
result = cls._do_parse(s, debug=debug)
566562
# Check for excess solidi, but exclude fractional exponents (accepted)

astropy/units/format/generic_lextab.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
_lexreflags = 32
1515
_lexliterals = ''
1616
_lexstateinfo = {'INITIAL': 'inclusive'}
17-
_lexstatere = {'INITIAL': [("(?P<t_UFLOAT>((\\d+\\.?\\d*)|(\\.\\d+))([eE][+-]?\\d+)?)|(?P<t_UINT>\\d+)|(?P<t_SIGN>[+-](?=\\d))|(?P<t_FUNCNAME>((sqrt)|(ln)|(exp)|(log)|(mag)|(dB)|(dex))(?=\\ *\\())|(?P<t_UNIT>%|([YZEPTGMkhdcmuµnpfazy]?'((?!\\d)\\w)+')|((?!\\d)\\w)+)|(?P<t_DOUBLE_STAR>\\*\\*)|(?P<t_COMMA>\\,)|(?P<t_STAR>\\*)|(?P<t_PERIOD>\\.)|(?P<t_CARET>\\^)|(?P<t_OPEN_PAREN>\\()|(?P<t_CLOSE_PAREN>\\))|(?P<t_SOLIDUS>/)", [None, ('t_UFLOAT', 'UFLOAT'), None, None, None, None, ('t_UINT', 'UINT'), ('t_SIGN', 'SIGN'), ('t_FUNCNAME', 'FUNCNAME'), None, None, None, None, None, None, None, None, ('t_UNIT', 'UNIT'), None, None, None, (None, 'DOUBLE_STAR'), (None, 'COMMA'), (None, 'STAR'), (None, 'PERIOD'), (None, 'CARET'), (None, 'OPEN_PAREN'), (None, 'CLOSE_PAREN'), (None, 'SOLIDUS')])]}
17+
_lexstatere = {'INITIAL': [('(?P<t_UFLOAT>((\\d+\\.?\\d*)|(\\.\\d+))([eE][+-]?\\d+)?)|(?P<t_UINT>\\d+)|(?P<t_SIGN>[+-](?=\\d))|(?P<t_FUNCNAME>((sqrt)|(ln)|(exp)|(log)|(mag)|(dB)|(dex))(?=\\ *\\())|(?P<t_UNIT>[^\\s\\d+\\-\\./\\*\\^\\(\\)\\,]+)|(?P<t_DOUBLE_STAR>\\*\\*)|(?P<t_COMMA>\\,)|(?P<t_STAR>\\*)|(?P<t_PERIOD>\\.)|(?P<t_CARET>\\^)|(?P<t_OPEN_PAREN>\\()|(?P<t_CLOSE_PAREN>\\))|(?P<t_SOLIDUS>/)', [None, ('t_UFLOAT', 'UFLOAT'), None, None, None, None, ('t_UINT', 'UINT'), ('t_SIGN', 'SIGN'), ('t_FUNCNAME', 'FUNCNAME'), None, None, None, None, None, None, None, None, ('t_UNIT', 'UNIT'), (None, 'DOUBLE_STAR'), (None, 'COMMA'), (None, 'STAR'), (None, 'PERIOD'), (None, 'CARET'), (None, 'OPEN_PAREN'), (None, 'CLOSE_PAREN'), (None, 'SOLIDUS')])]}
1818
_lexstateignore = {'INITIAL': ' '}
1919
_lexstateerrorf = {'INITIAL': 't_error'}
2020
_lexstateeoff = {}

0 commit comments

Comments
 (0)