Skip to content

Commit 93e3eb0

Browse files
alextsui05jekyllbot
authored andcommitted
Add latin mode to slugify (#6509)
Merge pull request 6509
1 parent 53d48d5 commit 93e3eb0

File tree

5 files changed

+67
-22
lines changed

5 files changed

+67
-22
lines changed

docs/_docs/templates.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,18 @@ you come up with your own tags via plugins.
293293
<p>
294294
<code class="output">the-_config.yml-file</code>
295295
</p>
296+
<p>
297+
<code class="filter">{% raw %}{{ "The _cönfig.yml file" | slugify: 'ascii' }}{% endraw %}</code>
298+
</p>
299+
<p>
300+
<code class="output">the-c-nfig-yml-file</code>
301+
</p>
302+
<p>
303+
<code class="filter">{% raw %}{{ "The cönfig.yml file" | slugify: 'latin' }}{% endraw %}</code>
304+
</p>
305+
<p>
306+
<code class="output">the-config-yml-file</code>
307+
</p>
296308
</td>
297309
</tr>
298310
<tr>
@@ -416,6 +428,8 @@ The default is `default`. They are as follows (with what they filter):
416428
- `raw`: spaces
417429
- `default`: spaces and non-alphanumeric characters
418430
- `pretty`: spaces and non-alphanumeric characters except for `._~!$&'()+,;=@`
431+
- `ascii`: spaces, non-alphanumeric, and non-ASCII characters
432+
- `latin`: like `default`, except Latin characters are first transliterated (e.g. `àèïòü` to `aeiou`)
419433

420434
## Tags
421435

jekyll.gemspec

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ Gem::Specification.new do |s|
3232

3333
s.add_runtime_dependency("addressable", "~> 2.4")
3434
s.add_runtime_dependency("colorator", "~> 1.0")
35+
s.add_runtime_dependency("i18n", "~> 0.7")
3536
s.add_runtime_dependency("jekyll-sass-converter", "~> 1.0")
3637
s.add_runtime_dependency("jekyll-watch", "~> 1.1")
3738
s.add_runtime_dependency("kramdown", "~> 1.14")

lib/jekyll.rb

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,10 @@ def require_all(path)
3232
require "liquid"
3333
require "kramdown"
3434
require "colorator"
35+
require "i18n"
3536

3637
SafeYAML::OPTIONS[:suppress_warnings] = true
38+
I18n.config.available_locales = :en
3739

3840
module Jekyll
3941
# internal requires

lib/jekyll/utils.rb

Lines changed: 39 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
21
# frozen_string_literal: true
32

43
module Jekyll
@@ -12,7 +11,7 @@ module Utils
1211
autoload :WinTZ, "jekyll/utils/win_tz"
1312

1413
# Constants for use in #slugify
15-
SLUGIFY_MODES = %w(raw default pretty ascii).freeze
14+
SLUGIFY_MODES = %w(raw default pretty ascii latin).freeze
1615
SLUGIFY_RAW_REGEXP = Regexp.new('\\s+').freeze
1716
SLUGIFY_DEFAULT_REGEXP = Regexp.new("[^[:alnum:]]+").freeze
1817
SLUGIFY_PRETTY_REGEXP = Regexp.new("[^[:alnum:]._~!$&'()+,;=@]+").freeze
@@ -170,6 +169,10 @@ def has_yaml_header?(file)
170169
# When mode is "ascii", some everything else except ASCII characters
171170
# a-z (lowercase), A-Z (uppercase) and 0-9 (numbers) are not replaced with hyphen.
172171
#
172+
# When mode is "latin", the input string is first preprocessed so that
173+
# any letters with accents are replaced with the plain letter. Afterwards,
174+
# it follows the "default" mode of operation.
175+
#
173176
# If cased is true, all uppercase letters in the result string are
174177
# replaced with their lowercase counterparts.
175178
#
@@ -184,7 +187,10 @@ def has_yaml_header?(file)
184187
# # => "The-_config.yml file"
185188
#
186189
# slugify("The _config.yml file", "ascii")
187-
# # => "the-config.yml-file"
190+
# # => "the-config-yml-file"
191+
#
192+
# slugify("The _config.yml file", "latin")
193+
# # => "the-config-yml-file"
188194
#
189195
# Returns the slugified string.
190196
def slugify(string, mode: nil, cased: false)
@@ -195,26 +201,10 @@ def slugify(string, mode: nil, cased: false)
195201
return cased ? string : string.downcase
196202
end
197203

198-
# Replace each character sequence with a hyphen
199-
re =
200-
case mode
201-
when "raw"
202-
SLUGIFY_RAW_REGEXP
203-
when "default"
204-
SLUGIFY_DEFAULT_REGEXP
205-
when "pretty"
206-
# "._~!$&'()+,;=@" is human readable (not URI-escaped) in URL
207-
# and is allowed in both extN and NTFS.
208-
SLUGIFY_PRETTY_REGEXP
209-
when "ascii"
210-
# For web servers not being able to handle Unicode, the safe
211-
# method is to ditch anything else but latin letters and numeric
212-
# digits.
213-
SLUGIFY_ASCII_REGEXP
214-
end
204+
# Drop accent marks from latin characters. Everything else turns to ?
205+
string = ::I18n.transliterate(string) if mode == "latin"
215206

216-
# Strip according to the mode
217-
slug = string.gsub(re, "-")
207+
slug = replace_character_sequence_with_hyphen(string, :mode => mode)
218208

219209
# Remove leading/trailing hyphen
220210
slug.gsub!(%r!^\-|\-$!i, "")
@@ -337,5 +327,32 @@ def duplicate_frozen_values(target)
337327
target[key] = val.dup if val.frozen? && duplicable?(val)
338328
end
339329
end
330+
331+
# Replace each character sequence with a hyphen.
332+
#
333+
# See Utils#slugify for a description of the character sequence specified
334+
# by each mode.
335+
private
336+
def replace_character_sequence_with_hyphen(string, mode: "default")
337+
replaceable_char =
338+
case mode
339+
when "raw"
340+
SLUGIFY_RAW_REGEXP
341+
when "pretty"
342+
# "._~!$&'()+,;=@" is human readable (not URI-escaped) in URL
343+
# and is allowed in both extN and NTFS.
344+
SLUGIFY_PRETTY_REGEXP
345+
when "ascii"
346+
# For web servers not being able to handle Unicode, the safe
347+
# method is to ditch anything else but latin letters and numeric
348+
# digits.
349+
SLUGIFY_ASCII_REGEXP
350+
else
351+
SLUGIFY_DEFAULT_REGEXP
352+
end
353+
354+
# Strip according to the mode
355+
string.gsub(replaceable_char, "-")
356+
end
340357
end
341358
end

test/test_utils.rb

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,17 @@ class TestUtils < JekyllUnitTest
207207
Utils.slugify("fürtive glance!!!!", :mode => "ascii")
208208
end
209209

210+
should "map accented latin characters to ASCII characters" do
211+
assert_equal "the-config-yml-file",
212+
Utils.slugify("The _config.yml file?", :mode => "latin")
213+
assert_equal "furtive-glance",
214+
Utils.slugify("fürtive glance!!!!", :mode => "latin")
215+
assert_equal "aaceeiioouu",
216+
Utils.slugify("àáçèéíïòóúü", :mode => "latin")
217+
assert_equal "a-z",
218+
Utils.slugify("Aあわれ鬱господинZ", :mode => "latin")
219+
end
220+
210221
should "only replace whitespace if mode is raw" do
211222
assert_equal(
212223
"the-_config.yml-file?",

0 commit comments

Comments
 (0)