Skip to content

Commit 7ef3936

Browse files
Definition import: strip leading UTF BOM
Originally proposed in #15527. Borrows a function from our rabbitmq.conf parser, see Kyorai/cuttlefish#64. Fixes #13748.
1 parent 26b7bc1 commit 7ef3936

4 files changed

Lines changed: 34 additions & 9 deletions

File tree

deps/rabbit/src/rabbit_definitions.erl

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -439,10 +439,8 @@ decode(Keys, Body) ->
439439

440440
decode(<<"">>) ->
441441
{ok, #{}};
442-
%% Strip the UTF-8 BOM if present.
443-
decode(<<16#EF, 16#BB, 16#BF, Rest/binary>>) ->
444-
decode(Rest);
445-
decode(Body) ->
442+
decode(Body0) ->
443+
Body = rabbit_misc:strip_bom(Body0),
446444
try
447445
Decoded = rabbit_json:decode(Body),
448446
Normalised = atomise_map_keys(Decoded),

deps/rabbit/test/definition_import_SUITE.erl

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,8 @@ groups() ->
5454
import_case19,
5555
import_case20,
5656
import_case21,
57-
import_case22
57+
import_case22,
58+
import_case23
5859
]},
5960

6061
{boot_time_import_using_classic_source, [], [
@@ -341,6 +342,20 @@ import_case22(Config) ->
341342

342343
ok.
343344

345+
%% Tests that definition files with any Unicode BOM prefix are imported successfully.
346+
import_case23(Config) ->
347+
{ok, Body} = file:read_file(filename:join(?config(data_dir, Config), "case2.json")),
348+
%% UTF-8
349+
import_raw(Config, <<239, 187, 191, Body/binary>>),
350+
%% UTF-16 BE
351+
import_raw(Config, <<254, 255, Body/binary>>),
352+
%% UTF-32 LE
353+
import_raw(Config, <<255, 254, 0, 0, Body/binary>>),
354+
%% UTF-16 LE
355+
import_raw(Config, <<255, 254, Body/binary>>),
356+
%% UTF-32 BE
357+
import_raw(Config, <<0, 0, 254, 255, Body/binary>>).
358+
344359
export_import_round_trip_case1(Config) ->
345360
case rabbit_ct_helpers:is_mixed_versions() of
346361
false ->

deps/rabbit_common/src/rabbit_misc.erl

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@
7777
-export([get_gc_info/1]).
7878
-export([group_proplists_by/2]).
7979
-export([raw_read_file/1]).
80+
-export([strip_bom/1]).
8081
-export([find_child/2]).
8182
-export([is_regular_file/1]).
8283
-export([safe_ets_update_counter/3, safe_ets_update_counter/4, safe_ets_update_counter/5,
@@ -1238,6 +1239,19 @@ raw_read_file(File) ->
12381239
error:{badmatch, Error} -> Error
12391240
end.
12401241

1242+
-spec strip_bom(binary()) -> binary().
1243+
%% UTF-8
1244+
strip_bom(<<239, 187, 191, Rest/binary>>) -> Rest;
1245+
%% UTF-16 BE
1246+
strip_bom(<<254, 255, Rest/binary>>) -> Rest;
1247+
%% UTF-32 LE (must precede UTF-16 LE to avoid partial match)
1248+
strip_bom(<<255, 254, 0, 0, Rest/binary>>) -> Rest;
1249+
%% UTF-16 LE
1250+
strip_bom(<<255, 254, Rest/binary>>) -> Rest;
1251+
%% UTF-32 BE
1252+
strip_bom(<<0, 0, 254, 255, Rest/binary>>) -> Rest;
1253+
strip_bom(Bin) -> Bin.
1254+
12411255
-spec is_regular_file(Name) -> boolean() when
12421256
Name :: file:filename_all().
12431257
is_regular_file(Name) ->

deps/rabbitmq_management/src/rabbit_mgmt_wm_definitions.erl

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -176,10 +176,8 @@ is_authorized(ReqData, Context) ->
176176

177177
decode(<<"">>) ->
178178
{ok, #{}};
179-
%% Strip the UTF-8 BOM if present.
180-
decode(<<16#EF, 16#BB, 16#BF, Rest/binary>>) ->
181-
decode(Rest);
182-
decode(Body) ->
179+
decode(Body0) ->
180+
Body = rabbit_misc:strip_bom(Body0),
183181
try
184182
Decoded = rabbit_json:decode(Body),
185183
Normalised = maps:fold(fun(K, V, Acc) ->

0 commit comments

Comments
 (0)