fix: don't emit empty tail tokens (#1121)

Gusted · web-flow · commit e9ffd5a79aa0 · 2025-08-27T19:51:01.000+10:00
When tokens are split into lines, tokens that end with a newline are emitted again as an empty token, which is confusing and doesn't have any benefit. This conversion shouldn't emit a empty tail token. Adds a test. I noticed this issue by a CI failure for the output of a Go program that was changed because of d0ad679 and the new output contained a empty whitespace token at the beginning of most lines.
diff --git a/formatters/html/html_test.go b/formatters/html/html_test.go
@@ -49,6 +49,48 @@ func TestSplitTokensIntoLines(t *testing.T) {
 	}
 	actual := chroma.SplitTokensIntoLines(in)
 	assert.Equal(t, expected, actual)
+
+	in = []chroma.Token{
+		{Value: "func", Type: chroma.KeywordDeclaration},
+		{Value: " ", Type: chroma.TextWhitespace},
+		{Value: "main", Type: chroma.NameFunction},
+		{Value: "()", Type: chroma.Punctuation},
+		{Value: " ", Type: chroma.TextWhitespace},
+		{Value: "{", Type: chroma.Punctuation},
+		{Value: "\n\t", Type: chroma.TextWhitespace},
+		{Value: "println", Type: chroma.NameBuiltin},
+		{Value: "(", Type: chroma.Punctuation},
+		{Value: `"mark this"`, Type: chroma.LiteralString},
+		{Value: ")", Type: chroma.Punctuation},
+		{Value: "\n", Type: chroma.TextWhitespace},
+		{Value: "}", Type: chroma.Punctuation},
+		{Value: "\n", Type: chroma.TextWhitespace},
+	}
+	expected = [][]chroma.Token{
+		{
+			{Type: chroma.KeywordDeclaration, Value: "func"},
+			{Type: chroma.TextWhitespace, Value: " "},
+			{Type: chroma.NameFunction, Value: "main"},
+			{Type: chroma.Punctuation, Value: "()"},
+			{Type: chroma.TextWhitespace, Value: " "},
+			{Type: chroma.Punctuation, Value: "{"},
+			{Type: chroma.TextWhitespace, Value: "\n"},
+		},
+		{
+			{Type: chroma.TextWhitespace, Value: "\t"},
+			{Type: chroma.NameBuiltin, Value: "println"},
+			{Type: chroma.Punctuation, Value: "("},
+			{Type: chroma.LiteralString, Value: `"mark this"`},
+			{Type: chroma.Punctuation, Value: ")"},
+			{Type: chroma.TextWhitespace, Value: "\n"},
+		},
+		{
+			{Type: chroma.Punctuation, Value: "}"},
+			{Type: chroma.TextWhitespace, Value: "\n"},
+		},
+	}
+	actual = chroma.SplitTokensIntoLines(in)
+	assert.Equal(t, expected, actual)
 }
 
 func TestFormatterStyleToCSS(t *testing.T) {
diff --git a/iterator.go b/iterator.go
@@ -58,6 +58,7 @@ func Literator(tokens ...Token) Iterator {
 // SplitTokensIntoLines splits tokens containing newlines in two.
 func SplitTokensIntoLines(tokens []Token) (out [][]Token) {
 	var line []Token // nolint: prealloc
+tokenLoop:
 	for _, token := range tokens {
 		for strings.Contains(token.Value, "\n") {
 			parts := strings.SplitAfterN(token.Value, "\n", 2)
@@ -70,6 +71,11 @@ func SplitTokensIntoLines(tokens []Token) (out [][]Token) {
 			line = append(line, clone)
 			out = append(out, line)
 			line = nil
+
+			// If the tail token is empty, don't emit it.
+			if len(token.Value) == 0 {
+				continue tokenLoop
+			}
 		}
 		line = append(line, token)
 	}
@@ -83,5 +89,5 @@ func SplitTokensIntoLines(tokens []Token) (out [][]Token) {
 			out = out[:len(out)-1]
 		}
 	}
-	return
+	return out
 }

Original file line number	Diff line number	Diff line change
`@@ -58,6 +58,7 @@ func Literator(tokens ...Token) Iterator {`
`58`	`58`	`// SplitTokensIntoLines splits tokens containing newlines in two.`
`59`	`59`	`func SplitTokensIntoLines(tokens []Token) (out [][]Token) {`
`60`	`60`	`var line []Token // nolint: prealloc`
	`61`	`+tokenLoop:`
`61`	`62`	`for _, token := range tokens {`
`62`	`63`	`for strings.Contains(token.Value, "\n") {`
`63`	`64`	`parts := strings.SplitAfterN(token.Value, "\n", 2)`
`@@ -70,6 +71,11 @@ func SplitTokensIntoLines(tokens []Token) (out [][]Token) {`
`70`	`71`	`line = append(line, clone)`
`71`	`72`	`out = append(out, line)`
`72`	`73`	`line = nil`
	`74`	`+`
	`75`	`+ // If the tail token is empty, don't emit it.`
	`76`	`+ if len(token.Value) == 0 {`
	`77`	`+ continue tokenLoop`
	`78`	`+ }`
`73`	`79`	`}`
`74`	`80`	`line = append(line, token)`
`75`	`81`	`}`
`@@ -83,5 +89,5 @@ func SplitTokensIntoLines(tokens []Token) (out [][]Token) {`
`83`	`89`	`out = out[:len(out)-1]`
`84`	`90`	`}`
`85`	`91`	`}`
`86`		`- return`
	`92`	`+ return out`
`87`	`93`	`}`