Skip to content

Commit 5072669

Browse files
committed
Drastically restrict the grammar of tuple indices
1 parent fda6d37 commit 5072669

29 files changed

Lines changed: 740 additions & 396 deletions

compiler/rustc_parse/src/parser/expr.rs

Lines changed: 85 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ pub(super) enum DestructuredFloat {
4444
/// 1.
4545
TrailingDot(Symbol, Span, Span),
4646
/// 1.2 | 1.2e3
47-
MiddleDot(Symbol, Span, Span, Symbol, Span),
47+
MiddleDot(Symbol, Span, Symbol, Span),
4848
/// Invalid
4949
Error,
5050
}
@@ -961,6 +961,7 @@ impl<'a> Parser<'a> {
961961
token::Ident(..) => self.parse_dot_suffix(base, lo),
962962
token::Literal(token::Lit { kind: token::Integer, symbol, suffix }) => {
963963
let ident_span = self.token.span;
964+
let symbol = self.validate_tuple_index(symbol, ident_span);
964965
self.bump();
965966
Ok(self.mk_expr_tuple_field_access(lo, ident_span, base, symbol, suffix))
966967
}
@@ -986,13 +987,9 @@ impl<'a> Parser<'a> {
986987
self.mk_expr_tuple_field_access(lo, ident_span, base, sym, None)
987988
}
988989
// 1.2 | 1.2e3
989-
DestructuredFloat::MiddleDot(
990-
sym1,
991-
ident1_span,
992-
_dot_span,
993-
sym2,
994-
ident2_span,
995-
) => {
990+
// FIXME(fmease): (preexisting) For some reason for `x.0.0xyz` (i.e., suffixed)
991+
// highlight `0.0xyz` when we should just highlight `0xyz`.
992+
DestructuredFloat::MiddleDot(sym1, ident1_span, sym2, ident2_span) => {
996993
// `foo.1.2` (or `foo.1.2e3`): two complete dot accesses. We end up with
997994
// the `sym2` (`2` or `2e3`) token in `self.prev_token` and the following
998995
// token in `self.token`.
@@ -1055,30 +1052,68 @@ impl<'a> Parser<'a> {
10551052
// support pushing "future tokens" (would be also helpful to `break_and_eat`), or
10561053
// we should break everything including floats into more basic proc-macro style
10571054
// tokens in the lexer (probably preferable).
1055+
// FIXME(fmease): De-jank the impl.
10581056
pub(super) fn break_up_float(&self, float: Symbol, span: Span) -> DestructuredFloat {
10591057
#[derive(Debug)]
10601058
enum FloatComponent {
1061-
IdentLike(String),
1059+
IdentLike(IdentLike),
10621060
Punct(char),
10631061
}
10641062
use FloatComponent::*;
10651063

1064+
#[derive(Debug, Default)]
1065+
struct IdentLike {
1066+
str: String,
1067+
len: usize,
1068+
poisoned: bool,
1069+
}
1070+
10661071
let float_str = float.as_str();
10671072
let mut components = Vec::new();
1068-
let mut ident_like = String::new();
1073+
let mut ident_like = IdentLike::default();
1074+
let mut zero = false;
1075+
10691076
for c in float_str.chars() {
1070-
if c == '_' || c.is_ascii_alphanumeric() {
1071-
ident_like.push(c);
1072-
} else if matches!(c, '.' | '+' | '-') {
1073-
if !ident_like.is_empty() {
1074-
components.push(IdentLike(mem::take(&mut ident_like)));
1077+
match c {
1078+
'0'..='9' => {
1079+
ident_like.len += 1;
1080+
if zero {
1081+
ident_like.poisoned = true;
1082+
}
1083+
zero = c == '0' && ident_like.str.is_empty();
1084+
if !zero {
1085+
ident_like.str.push(c);
1086+
}
10751087
}
1076-
components.push(Punct(c));
1077-
} else {
1078-
panic!("unexpected character in a float token: {c:?}")
1088+
'_' | 'b' | 'o' | 'x' => {
1089+
ident_like.len += 1;
1090+
ident_like.poisoned = true;
1091+
}
1092+
'e' | 'E' => {
1093+
ident_like.len += 1;
1094+
ident_like.poisoned = true;
1095+
if mem::take(&mut zero) {
1096+
ident_like.str.push('0');
1097+
}
1098+
ident_like.str.push(c);
1099+
}
1100+
'.' | '+' | '-' => {
1101+
if mem::take(&mut zero) {
1102+
ident_like.str.push('0');
1103+
}
1104+
if !ident_like.str.is_empty() {
1105+
components.push(IdentLike(mem::take(&mut ident_like)));
1106+
}
1107+
components.push(Punct(c));
1108+
}
1109+
_ => panic!("unexpected character in a float token: {c:?}"),
10791110
}
10801111
}
1081-
if !ident_like.is_empty() {
1112+
1113+
if zero {
1114+
ident_like.str.push('0');
1115+
}
1116+
if !ident_like.str.is_empty() {
10821117
components.push(IdentLike(ident_like));
10831118
}
10841119

@@ -1090,44 +1125,58 @@ impl<'a> Parser<'a> {
10901125

10911126
match &*components {
10921127
// 1e2
1093-
[IdentLike(i)] => {
1094-
DestructuredFloat::Single(Symbol::intern(i), span)
1095-
}
1128+
[IdentLike(ident)] => {
1129+
if ident.poisoned {
1130+
self.dcx().span_err(span, "invalid tuple index");
1131+
}
1132+
1133+
DestructuredFloat::Single(Symbol::intern(&ident.str), span) },
10961134
// 1.
10971135
[IdentLike(left), Punct('.')] => {
10981136
let (left_span, dot_span) = if can_take_span_apart() {
1099-
let left_span = span.with_hi(span.lo() + BytePos::from_usize(left.len()));
1137+
let left_span = span.with_hi(span.lo() + BytePos::from_usize(left.len));
11001138
let dot_span = span.with_lo(left_span.hi());
11011139
(left_span, dot_span)
11021140
} else {
11031141
(span, span)
11041142
};
1105-
let left = Symbol::intern(left);
1143+
if left.poisoned {
1144+
self.dcx().span_err(left_span, "invalid tuple index");
1145+
}
1146+
let left = Symbol::intern(&left.str);
11061147
DestructuredFloat::TrailingDot(left, left_span, dot_span)
11071148
}
11081149
// 1.2 | 1.2e3
11091150
[IdentLike(left), Punct('.'), IdentLike(right)] => {
1110-
let (left_span, dot_span, right_span) = if can_take_span_apart() {
1111-
let left_span = span.with_hi(span.lo() + BytePos::from_usize(left.len()));
1151+
let (left_span, right_span) = if can_take_span_apart() {
1152+
let left_span = span.with_hi(span.lo() + BytePos::from_usize(left.len));
11121153
let dot_span = span.with_lo(left_span.hi()).with_hi(left_span.hi() + BytePos(1));
11131154
let right_span = span.with_lo(dot_span.hi());
1114-
(left_span, dot_span, right_span)
1155+
(left_span, right_span)
11151156
} else {
1116-
(span, span, span)
1157+
(span, span)
11171158
};
1118-
let left = Symbol::intern(left);
1119-
let right = Symbol::intern(right);
1120-
DestructuredFloat::MiddleDot(left, left_span, dot_span, right, right_span)
1159+
if left.poisoned {
1160+
self.dcx().span_err(left_span, "invalid tuple index");
1161+
}
1162+
let left = Symbol::intern(&left.str);
1163+
if right.poisoned {
1164+
self.dcx().span_err(right_span, "invalid tuple index");
1165+
}
1166+
let right = Symbol::intern(&right.str);
1167+
DestructuredFloat::MiddleDot(left, left_span, right, right_span)
11211168
}
11221169
// 1e+ | 1e- (recovered)
1123-
[IdentLike(_), Punct('+' | '-')] |
1170+
[IdentLike(..), Punct('+' | '-')] |
11241171
// 1e+2 | 1e-2
1125-
[IdentLike(_), Punct('+' | '-'), IdentLike(_)] |
1172+
[IdentLike(..), Punct('+' | '-'), IdentLike(..)] |
11261173
// 1.2e+ | 1.2e-
1127-
[IdentLike(_), Punct('.'), IdentLike(_), Punct('+' | '-')] |
1174+
[IdentLike(..), Punct('.'), IdentLike(..), Punct('+' | '-')] |
11281175
// 1.2e+3 | 1.2e-3
1129-
[IdentLike(_), Punct('.'), IdentLike(_), Punct('+' | '-'), IdentLike(_)] => {
1176+
[IdentLike(..), Punct('.'), IdentLike(..), Punct('+' | '-'), IdentLike(..)] => {
11301177
// See the FIXME about `TokenCursor` above.
1178+
// FIXME(fmease): We report 2 errors on `x.0e+1`.
1179+
// FIXME(fmease): (preexisting) Too many confusing errs in cases like `x.0x0.0`.
11311180
self.error_unexpected_after_dot();
11321181
DestructuredFloat::Error
11331182
}
@@ -1187,13 +1236,7 @@ impl<'a> Parser<'a> {
11871236
fields.insert(start_idx, Ident::new(sym, sym_span));
11881237
}
11891238
// 1.2 | 1.2e3
1190-
DestructuredFloat::MiddleDot(
1191-
symbol1,
1192-
span1,
1193-
_dot_span,
1194-
symbol2,
1195-
span2,
1196-
) => {
1239+
DestructuredFloat::MiddleDot(symbol1, span1, symbol2, span2) => {
11971240
trailing_dot = None;
11981241
fields.insert(start_idx, Ident::new(symbol2, span2));
11991242
fields.insert(start_idx, Ident::new(symbol1, span1));

compiler/rustc_parse/src/parser/mod.rs

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1307,7 +1307,11 @@ impl<'a> Parser<'a> {
13071307
if self.eat_keyword(exp!(Mut)) { Mutability::Mut } else { Mutability::Not }
13081308
}
13091309

1310-
/// Parses reference binding mode (`ref`, `ref mut`, `ref pin const`, `ref pin mut`, or nothing).
1310+
/// Parse a reference binding mode.
1311+
///
1312+
/// ```
1313+
/// ByRef = ("ref" ("mut" | "pin" ("const" | "mut"))?)?
1314+
/// ```
13111315
fn parse_byref(&mut self) -> ByRef {
13121316
if self.eat_keyword(exp!(Ref)) {
13131317
let (pinnedness, mutability) = self.parse_pin_and_mut();
@@ -1328,22 +1332,49 @@ impl<'a> Parser<'a> {
13281332
}
13291333
}
13301334

1335+
/// Parse a field name.
1336+
///
1337+
/// ```enbf
1338+
/// FieldName = TupleIndex | Ident
1339+
/// TupleIndex = re"0|[1-9][0-9]*"
1340+
/// ```
13311341
fn parse_field_name(&mut self) -> PResult<'a, Ident> {
1342+
// FIXME(fmease): It would be nice if we could emit a custom error when encountering
1343+
// float literals. E.g., ideally, we'd emit "invalid tuple index" for `1e1`.
1344+
// I'm even thinking about breaking up float lits here, just so we can emit
1345+
// unexpected token `.` for `1.2` etc.
13321346
if let token::Literal(token::Lit { kind: token::Integer, symbol, suffix }) = self.token.kind
13331347
{
1348+
let ident_span = self.token.span;
1349+
let symbol = self.validate_tuple_index(symbol, ident_span);
13341350
if let Some(suffix) = suffix {
13351351
self.dcx().emit_err(errors::InvalidLiteralSuffixOnTupleIndex {
1336-
span: self.token.span,
1352+
span: ident_span,
13371353
suffix,
13381354
});
13391355
}
13401356
self.bump();
1341-
Ok(Ident::new(symbol, self.prev_token.span))
1357+
Ok(Ident::new(symbol, ident_span))
13421358
} else {
13431359
self.parse_ident_common(true)
13441360
}
13451361
}
13461362

1363+
// FIXME(fmease): De-jank this impl.
1364+
fn validate_tuple_index(&mut self, symbol: Symbol, span: Span) -> Symbol {
1365+
let str = symbol.as_str();
1366+
1367+
if str.contains(|c: char| !c.is_ascii_digit()) || matches!(str.as_bytes(), [b'0', _, ..]) {
1368+
self.dcx().span_err(span, "invalid tuple index");
1369+
let str = str.replace(|c: char| !c.is_ascii_digit(), "");
1370+
let str = str.trim_start_matches('0');
1371+
let str = if str.is_empty() { "0" } else { str };
1372+
return Symbol::intern(str);
1373+
}
1374+
1375+
symbol
1376+
}
1377+
13471378
fn parse_delim_args(&mut self) -> PResult<'a, Box<DelimArgs>> {
13481379
if let Some(args) = self.parse_delim_args_inner() {
13491380
Ok(Box::new(args))

compiler/rustc_parse/src/parser/pat.rs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1731,11 +1731,14 @@ impl<'a> Parser<'a> {
17311731
self.dcx().emit_err(DotDotDotForRemainingFields { span: self.token.span, token_str });
17321732
}
17331733

1734+
/// Parse a field in a struct pattern.
1735+
///
1736+
/// ```ebnf
1737+
/// PatField = FieldName ":" Pat | "box"? "mut"? ByRef Ident
1738+
/// ```
17341739
fn parse_pat_field(&mut self, lo: Span, attrs: AttrVec) -> PResult<'a, PatField> {
1735-
// Check if a colon exists one ahead. This means we're parsing a fieldname.
17361740
let hi;
17371741
let (subpat, fieldname, is_shorthand) = if self.look_ahead(1, |t| t == &token::Colon) {
1738-
// Parsing a pattern of the form `fieldname: pat`.
17391742
let fieldname = self.parse_field_name()?;
17401743
self.bump();
17411744
let pat = self.parse_pat_allow_top_guard(
@@ -1747,13 +1750,12 @@ impl<'a> Parser<'a> {
17471750
hi = pat.span;
17481751
(pat, fieldname, false)
17491752
} else {
1750-
// Parsing a pattern of the form `(box) (ref) (mut) fieldname`.
17511753
let is_box = self.eat_keyword(exp!(Box));
17521754
let boxed_span = self.token.span;
17531755
let mutability = self.parse_mutability();
17541756
let by_ref = self.parse_byref();
17551757

1756-
let fieldname = self.parse_field_name()?;
1758+
let fieldname = self.parse_ident_common(false)?;
17571759
hi = self.prev_token.span;
17581760
let ann = BindingMode(by_ref, mutability);
17591761
let fieldpat = self.mk_pat_ident(boxed_span.to(hi), ann, fieldname);

tests/ui/numeric/numeric-fields.rs

Lines changed: 0 additions & 10 deletions
This file was deleted.

tests/ui/numeric/numeric-fields.stderr

Lines changed: 0 additions & 28 deletions
This file was deleted.

tests/ui/offset-of/offset-of-tuple-field.rs

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,19 +4,22 @@ use std::mem::offset_of;
44

55
fn main() {
66
offset_of!((u8, u8), _0); //~ ERROR no field `_0`
7-
offset_of!((u8, u8), 01); //~ ERROR no field `01`
8-
offset_of!((u8, u8), 1e2); //~ ERROR no field `1e2`
9-
offset_of!((u8, u8), 1_u8); //~ ERROR no field `1_`
7+
offset_of!((u8, u8), 01); //~ ERROR invalid tuple index
8+
offset_of!((u8, u8), 1e2); //~ ERROR invalid tuple index
9+
//~^ ERROR no field `1e2`
10+
offset_of!((u8, u8), 1_u8); //~ ERROR invalid tuple index
1011
//~| ERROR suffixes on a tuple index
1112

12-
builtin # offset_of((u8, u8), 1e2); //~ ERROR no field `1e2`
13+
builtin # offset_of((u8, u8), 1e2); //~ ERROR invalid tuple index
14+
//~^ ERROR no field `1e2`
1315
builtin # offset_of((u8, u8), _0); //~ ERROR no field `_0`
14-
builtin # offset_of((u8, u8), 01); //~ ERROR no field `01`
15-
builtin # offset_of((u8, u8), 1_u8); //~ ERROR no field `1_`
16+
builtin # offset_of((u8, u8), 01); //~ ERROR invalid tuple index
17+
builtin # offset_of((u8, u8), 1_u8); //~ ERROR invalid tuple index
1618
//~| ERROR suffixes on a tuple index
1719

1820
offset_of!(((u8, u16), (u32, u16, u8)), 0.2); //~ ERROR no field `2`
19-
offset_of!(((u8, u16), (u32, u16, u8)), 0.1e2); //~ ERROR no field `1e2`
21+
offset_of!(((u8, u16), (u32, u16, u8)), 0.1e2); //~ ERROR invalid tuple index
22+
//~^ ERROR no field `1e2`
2023
offset_of!(((u8, u16), (u32, u16, u8)), 1.2);
2124
offset_of!(((u8, u16), (u32, u16, u8)), 1.2.0); //~ ERROR no field `0`
2225
}

0 commit comments

Comments
 (0)