1-
21# frozen_string_literal: true
32
43module Jekyll
@@ -12,7 +11,7 @@ module Utils
1211 autoload :WinTZ , "jekyll/utils/win_tz"
1312
1413 # Constants for use in #slugify
15- SLUGIFY_MODES = %w( raw default pretty ascii ) . freeze
14+ SLUGIFY_MODES = %w( raw default pretty ascii latin ) . freeze
1615 SLUGIFY_RAW_REGEXP = Regexp . new ( '\\s+' ) . freeze
1716 SLUGIFY_DEFAULT_REGEXP = Regexp . new ( "[^[:alnum:]]+" ) . freeze
1817 SLUGIFY_PRETTY_REGEXP = Regexp . new ( "[^[:alnum:]._~!$&'()+,;=@]+" ) . freeze
@@ -170,6 +169,10 @@ def has_yaml_header?(file)
170169 # When mode is "ascii", some everything else except ASCII characters
171170 # a-z (lowercase), A-Z (uppercase) and 0-9 (numbers) are not replaced with hyphen.
172171 #
172+ # When mode is "latin", the input string is first preprocessed so that
173+ # any letters with accents are replaced with the plain letter. Afterwards,
174+ # it follows the "default" mode of operation.
175+ #
173176 # If cased is true, all uppercase letters in the result string are
174177 # replaced with their lowercase counterparts.
175178 #
@@ -184,7 +187,10 @@ def has_yaml_header?(file)
184187 # # => "The-_config.yml file"
185188 #
186189 # slugify("The _config.yml file", "ascii")
187- # # => "the-config.yml-file"
190+ # # => "the-config-yml-file"
191+ #
192+ # slugify("The _config.yml file", "latin")
193+ # # => "the-config-yml-file"
188194 #
189195 # Returns the slugified string.
190196 def slugify ( string , mode : nil , cased : false )
@@ -195,26 +201,10 @@ def slugify(string, mode: nil, cased: false)
195201 return cased ? string : string . downcase
196202 end
197203
198- # Replace each character sequence with a hyphen
199- re =
200- case mode
201- when "raw"
202- SLUGIFY_RAW_REGEXP
203- when "default"
204- SLUGIFY_DEFAULT_REGEXP
205- when "pretty"
206- # "._~!$&'()+,;=@" is human readable (not URI-escaped) in URL
207- # and is allowed in both extN and NTFS.
208- SLUGIFY_PRETTY_REGEXP
209- when "ascii"
210- # For web servers not being able to handle Unicode, the safe
211- # method is to ditch anything else but latin letters and numeric
212- # digits.
213- SLUGIFY_ASCII_REGEXP
214- end
204+ # Drop accent marks from latin characters. Everything else turns to ?
205+ string = ::I18n . transliterate ( string ) if mode == "latin"
215206
216- # Strip according to the mode
217- slug = string . gsub ( re , "-" )
207+ slug = replace_character_sequence_with_hyphen ( string , :mode => mode )
218208
219209 # Remove leading/trailing hyphen
220210 slug . gsub! ( %r!^\- |\- $!i , "" )
@@ -337,5 +327,32 @@ def duplicate_frozen_values(target)
337327 target [ key ] = val . dup if val . frozen? && duplicable? ( val )
338328 end
339329 end
330+
331+ # Replace each character sequence with a hyphen.
332+ #
333+ # See Utils#slugify for a description of the character sequence specified
334+ # by each mode.
335+ private
336+ def replace_character_sequence_with_hyphen ( string , mode : "default" )
337+ replaceable_char =
338+ case mode
339+ when "raw"
340+ SLUGIFY_RAW_REGEXP
341+ when "pretty"
342+ # "._~!$&'()+,;=@" is human readable (not URI-escaped) in URL
343+ # and is allowed in both extN and NTFS.
344+ SLUGIFY_PRETTY_REGEXP
345+ when "ascii"
346+ # For web servers not being able to handle Unicode, the safe
347+ # method is to ditch anything else but latin letters and numeric
348+ # digits.
349+ SLUGIFY_ASCII_REGEXP
350+ else
351+ SLUGIFY_DEFAULT_REGEXP
352+ end
353+
354+ # Strip according to the mode
355+ string . gsub ( replaceable_char , "-" )
356+ end
340357 end
341358end
0 commit comments