Skip to content

Commit 4ff375e

Browse files
committed
Add FSC option for retrieving raw lexer tokens
1 parent 7118397 commit 4ff375e

4 files changed

Lines changed: 34 additions & 13 deletions

File tree

src/fsharp/CompilerConfig.fs

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,11 @@ type LStatus =
266266
| Unprocessed
267267
| Processed
268268

269+
type TokenizeOption =
270+
| AndCompile
271+
| Only
272+
| Unfiltered
273+
269274
type PackageManagerLine =
270275
{ Directive: Directive
271276
LineStatus: LStatus
@@ -362,7 +367,7 @@ type TcConfigBuilder =
362367
mutable importAllReferencesOnly: bool
363368
mutable simulateException: string option
364369
mutable printAst: bool
365-
mutable tokenizeOnly: bool
370+
mutable tokenize: TokenizeOption
366371
mutable testInteractionParser: bool
367372
mutable reportNumDecls: bool
368373
mutable printSignature: bool
@@ -526,7 +531,7 @@ type TcConfigBuilder =
526531
importAllReferencesOnly = false
527532
simulateException = None
528533
printAst = false
529-
tokenizeOnly = false
534+
tokenize = TokenizeOption.AndCompile
530535
testInteractionParser = false
531536
reportNumDecls = false
532537
printSignature = false
@@ -923,7 +928,7 @@ type TcConfig private (data: TcConfigBuilder, validate: bool) =
923928
member x.simulateException = data.simulateException
924929
member x.printAst = data.printAst
925930
member x.targetFrameworkVersion = targetFrameworkVersionValue
926-
member x.tokenizeOnly = data.tokenizeOnly
931+
member x.tokenize = data.tokenize
927932
member x.testInteractionParser = data.testInteractionParser
928933
member x.reportNumDecls = data.reportNumDecls
929934
member x.printSignature = data.printSignature

src/fsharp/CompilerConfig.fsi

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,11 @@ type LStatus =
119119
| Unprocessed
120120
| Processed
121121

122+
type TokenizeOption =
123+
| AndCompile
124+
| Only
125+
| Unfiltered
126+
122127
type PackageManagerLine =
123128
{ Directive: Directive
124129
LineStatus: LStatus
@@ -180,7 +185,7 @@ type TcConfigBuilder =
180185
mutable importAllReferencesOnly: bool
181186
mutable simulateException: string option
182187
mutable printAst: bool
183-
mutable tokenizeOnly: bool
188+
mutable tokenize: TokenizeOption
184189
mutable testInteractionParser: bool
185190
mutable reportNumDecls: bool
186191
mutable printSignature: bool
@@ -358,7 +363,7 @@ type TcConfig =
358363
member importAllReferencesOnly: bool
359364
member simulateException: string option
360365
member printAst: bool
361-
member tokenizeOnly: bool
366+
member tokenize: TokenizeOption
362367
member testInteractionParser: bool
363368
member reportNumDecls: bool
364369
member printSignature: bool

src/fsharp/CompilerOptions.fs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1137,8 +1137,13 @@ let internalFlags (tcConfigB:TcConfigBuilder) =
11371137

11381138
CompilerOption
11391139
("tokenize", tagNone,
1140-
OptionUnit (fun () -> tcConfigB.tokenizeOnly <- true),
1140+
OptionUnit (fun () -> tcConfigB.tokenize <- TokenizeOption.Only),
11411141
Some(InternalCommandLineOption("--tokenize", rangeCmdArgs)), None)
1142+
1143+
CompilerOption
1144+
("tokenize-unfiltered", tagNone,
1145+
OptionUnit (fun () -> tcConfigB.tokenize <- TokenizeOption.Unfiltered),
1146+
Some(InternalCommandLineOption("--tokenize-unfiltered", rangeCmdArgs)), None)
11421147

11431148
CompilerOption
11441149
("testInteractionParser", tagNone,

src/fsharp/ParseAndCheckInputs.fs

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -286,21 +286,23 @@ let ParseInput (lexer, errorLogger: ErrorLogger, lexbuf: UnicodeLexing.Lexbuf, d
286286
let filteringErrorLogger = GetErrorLoggerFilteringByScopedPragmas(false, scopedPragmas, errorLogger)
287287
delayLogger.CommitDelayedDiagnostics filteringErrorLogger
288288

289+
type Tokenizer = unit -> Parser.token
290+
289291
// Show all tokens in the stream, for testing purposes
290-
let ShowAllTokensAndExit (shortFilename, tokenizer: LexFilter.LexFilter, lexbuf: LexBuffer<char>) =
292+
let ShowAllTokensAndExit (shortFilename, tokenizer: Tokenizer, lexbuf: LexBuffer<char>) =
291293
while true do
292294
printf "tokenize - getting one token from %s\n" shortFilename
293-
let t = tokenizer.GetToken()
295+
let t = tokenizer ()
294296
printf "tokenize - got %s @ %a\n" (Parser.token_to_string t) outputRange lexbuf.LexemeRange
295297
match t with
296298
| Parser.EOF _ -> exit 0
297299
| _ -> ()
298300
if lexbuf.IsPastEndOfStream then printf "!!! at end of stream\n"
299301

300302
// Test one of the parser entry points, just for testing purposes
301-
let TestInteractionParserAndExit (tokenizer: LexFilter.LexFilter, lexbuf: LexBuffer<char>) =
303+
let TestInteractionParserAndExit (tokenizer: Tokenizer, lexbuf: LexBuffer<char>) =
302304
while true do
303-
match (Parser.interaction (fun _ -> tokenizer.GetToken()) lexbuf) with
305+
match (Parser.interaction (fun _ -> tokenizer ()) lexbuf) with
304306
| IDefns(l, m) -> printfn "Parsed OK, got %d defs @ %a" l.Length outputRange m
305307
| IHash (_, m) -> printfn "Parsed OK, got hash @ %a" outputRange m
306308
exit 0
@@ -341,18 +343,22 @@ let ParseOneInputLexbuf (tcConfig: TcConfig, lexResourceManager, conditionalComp
341343
Lexhelp.usingLexbufForParsing (lexbuf, filename) (fun lexbuf ->
342344

343345
// Set up the LexFilter over the token stream
344-
let tokenizer = LexFilter.LexFilter(lightStatus, tcConfig.compilingFslib, Lexer.token lexargs skipWhitespaceTokens, lexbuf)
346+
let tokenizer,tokenizeOnly =
347+
match tcConfig.tokenize with
348+
| Unfiltered -> (fun () -> Lexer.token lexargs skipWhitespaceTokens lexbuf), true
349+
| Only -> LexFilter.LexFilter(lightStatus, tcConfig.compilingFslib, Lexer.token lexargs skipWhitespaceTokens, lexbuf).GetToken, true
350+
| _ -> LexFilter.LexFilter(lightStatus, tcConfig.compilingFslib, Lexer.token lexargs skipWhitespaceTokens, lexbuf).GetToken, false
345351

346352
// If '--tokenize' then show the tokens now and exit
347-
if tcConfig.tokenizeOnly then
353+
if tokenizeOnly then
348354
ShowAllTokensAndExit(shortFilename, tokenizer, lexbuf)
349355

350356
// Test hook for one of the parser entry points
351357
if tcConfig.testInteractionParser then
352358
TestInteractionParserAndExit (tokenizer, lexbuf)
353359

354360
// Parse the input
355-
let res = ParseInput((fun _ -> tokenizer.GetToken()), errorLogger, lexbuf, None, filename, isLastCompiland)
361+
let res = ParseInput((fun _ -> tokenizer ()), errorLogger, lexbuf, None, filename, isLastCompiland)
356362

357363
// Report the statistics for testing purposes
358364
if tcConfig.reportNumDecls then

0 commit comments

Comments
 (0)