Skip to content

Commit 5fc2e95

Browse files
authored
flambda-backend: Allow CSE of immutable loads across stores (port upstream PR#9562) (oxcaml#562)
1 parent 2a650de commit 5fc2e95

23 files changed

Lines changed: 62 additions & 50 deletions

.depend

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2093,15 +2093,18 @@ asmcomp/CSEgen.cmo : \
20932093
asmcomp/proc.cmi \
20942094
asmcomp/mach.cmi \
20952095
asmcomp/cmm.cmi \
2096+
parsing/asttypes.cmi \
20962097
asmcomp/CSEgen.cmi
20972098
asmcomp/CSEgen.cmx : \
20982099
asmcomp/reg.cmx \
20992100
asmcomp/proc.cmx \
21002101
asmcomp/mach.cmx \
21012102
asmcomp/cmm.cmx \
2103+
parsing/asttypes.cmi \
21022104
asmcomp/CSEgen.cmi
21032105
asmcomp/CSEgen.cmi : \
2104-
asmcomp/mach.cmi
2106+
asmcomp/mach.cmi \
2107+
parsing/asttypes.cmi
21052108
asmcomp/afl_instrument.cmo : \
21062109
lambda/lambda.cmi \
21072110
asmcomp/cmm.cmi \
@@ -2719,6 +2722,7 @@ asmcomp/mach.cmo : \
27192722
lambda/debuginfo.cmi \
27202723
asmcomp/cmm.cmi \
27212724
middle_end/backend_var.cmi \
2725+
parsing/asttypes.cmi \
27222726
asmcomp/arch.cmo \
27232727
asmcomp/mach.cmi
27242728
asmcomp/mach.cmx : \
@@ -2729,6 +2733,7 @@ asmcomp/mach.cmx : \
27292733
lambda/debuginfo.cmx \
27302734
asmcomp/cmm.cmx \
27312735
middle_end/backend_var.cmx \
2736+
parsing/asttypes.cmi \
27322737
asmcomp/arch.cmx \
27332738
asmcomp/mach.cmi
27342739
asmcomp/mach.cmi : \
@@ -2738,6 +2743,7 @@ asmcomp/mach.cmi : \
27382743
lambda/debuginfo.cmi \
27392744
asmcomp/cmm.cmi \
27402745
middle_end/backend_var.cmi \
2746+
parsing/asttypes.cmi \
27412747
asmcomp/arch.cmo
27422748
asmcomp/printcmm.cmo : \
27432749
utils/targetint.cmi \

asmcomp/CSEgen.ml

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ type valnum = int
2525
type op_class =
2626
| Op_pure (* pure arithmetic, produce one or several result *)
2727
| Op_checkbound (* checkbound-style: no result, can raise an exn *)
28-
| Op_load (* memory load *)
28+
| Op_load of Asttypes.mutable_flag (* memory load *)
2929
| Op_store of bool (* memory store, false = init, true = assign *)
3030
| Op_other (* anything else that does not allocate nor store in memory *)
3131

@@ -40,29 +40,30 @@ module Equations = struct
4040
Map.Make(struct type t = rhs let compare = Stdlib.compare end)
4141

4242
type 'a t =
43-
{ load_equations : 'a Rhs_map.t;
43+
{ mutable_load_equations : 'a Rhs_map.t;
4444
other_equations : 'a Rhs_map.t }
4545

4646
let empty =
47-
{ load_equations = Rhs_map.empty;
47+
{ mutable_load_equations = Rhs_map.empty;
4848
other_equations = Rhs_map.empty }
4949

5050
let add op_class op v m =
5151
match op_class with
52-
| Op_load ->
53-
{ m with load_equations = Rhs_map.add op v m.load_equations }
52+
| Op_load Mutable ->
53+
{ m with mutable_load_equations =
54+
Rhs_map.add op v m.mutable_load_equations }
5455
| _ ->
5556
{ m with other_equations = Rhs_map.add op v m.other_equations }
5657

5758
let find op_class op m =
5859
match op_class with
59-
| Op_load ->
60-
Rhs_map.find op m.load_equations
60+
| Op_load Mutable ->
61+
Rhs_map.find op m.mutable_load_equations
6162
| _ ->
6263
Rhs_map.find op m.other_equations
6364

64-
let remove_loads m =
65-
{ load_equations = Rhs_map.empty;
65+
let remove_mutable_loads m =
66+
{ mutable_load_equations = Rhs_map.empty;
6667
other_equations = m.other_equations }
6768
end
6869

@@ -190,8 +191,8 @@ let set_unknown_regs n rs =
190191

191192
(* Keep only the equations satisfying the given predicate. *)
192193

193-
let remove_load_numbering n =
194-
{ n with num_eqs = Equations.remove_loads n.num_eqs }
194+
let remove_mutable_load_numbering n =
195+
{ n with num_eqs = Equations.remove_mutable_loads n.num_eqs }
195196

196197
(* Forget everything we know about registers of type [Addr]. *)
197198

@@ -225,7 +226,7 @@ method class_of_operation op =
225226
| Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _
226227
| Iextcall _ | Iprobe _ | Iopaque -> assert false (* treated specially *)
227228
| Istackoffset _ -> Op_other
228-
| Iload(_,_) -> Op_load
229+
| Iload(_,_,mut) -> Op_load mut
229230
| Istore(_,_,asg) -> Op_store asg
230231
| Ialloc _ -> assert false (* treated specially *)
231232
| Iintop(Icheckbound) -> Op_checkbound
@@ -246,11 +247,11 @@ method is_cheap_operation op =
246247
| Iconst_int _ -> true
247248
| _ -> false
248249

249-
(* Forget all equations involving memory loads. Performed after a
250-
non-initializing store *)
250+
(* Forget all equations involving mutable memory loads.
251+
Performed after a non-initializing store *)
251252

252253
method private kill_loads n =
253-
remove_load_numbering n
254+
remove_mutable_load_numbering n
254255

255256
(* Perform CSE on the given instruction [i] and its successors.
256257
[n] is the value numbering current at the beginning of [i]. *)
@@ -292,13 +293,13 @@ method private cse n i =
292293
Moreover, allocation can trigger the asynchronous execution
293294
of arbitrary Caml code (finalizer, signal handler, context
294295
switch), which can contain non-initializing stores.
295-
Hence, all equations over loads must be removed. *)
296+
Hence, all equations over mutable loads must be removed. *)
296297
let n1 = kill_addr_regs (self#kill_loads n) in
297298
let n2 = set_unknown_regs n1 i.res in
298299
{i with next = self#cse n2 i.next}
299300
| Iop op ->
300301
begin match self#class_of_operation op with
301-
| (Op_pure | Op_checkbound | Op_load) as op_class ->
302+
| (Op_pure | Op_checkbound | Op_load _) as op_class ->
302303
let (n1, varg) = valnum_regs n i.arg in
303304
let n2 = set_unknown_regs n1 (Proc.destroyed_at_oper i.desc) in
304305
begin match find_equation op_class n1 (op, varg) with
@@ -336,7 +337,7 @@ method private cse n i =
336337
{i with next = self#cse n2 i.next}
337338
| Op_store true ->
338339
(* A non-initializing store can invalidate
339-
anything we know about prior loads. *)
340+
anything we know about prior mutable loads. *)
340341
let n1 = set_unknown_regs n (Proc.destroyed_at_oper i.desc) in
341342
let n2 = set_unknown_regs n1 i.res in
342343
let n3 = self#kill_loads n2 in

asmcomp/CSEgen.mli

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
type op_class =
2020
| Op_pure (* pure, produce one result *)
2121
| Op_checkbound (* checkbound-style: no result, can raise an exn *)
22-
| Op_load (* memory load *)
22+
| Op_load of Asttypes.mutable_flag (* memory load *)
2323
| Op_store of bool (* memory store, false = init, true = assign *)
2424
| Op_other (* anything else that does not allocate nor store in memory *)
2525

asmcomp/amd64/CSE.ml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ method! class_of_operation op =
3030
| Ilea _ | Isextend32 | Izextend32 -> Op_pure
3131
| Istore_int(_, _, is_asg) -> Op_store is_asg
3232
| Ioffset_loc(_, _) -> Op_store true
33-
| Ifloatarithmem _ | Ifloatsqrtf _ -> Op_load
33+
| Ifloatarithmem _ | Ifloatsqrtf _ -> Op_load Mutable
3434
| Ibswap _ | Isqrtf -> super#class_of_operation op
3535
end
3636
| _ -> super#class_of_operation op

asmcomp/amd64/emit.mlp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -717,7 +717,7 @@ let emit_instr fallthrough i =
717717
end
718718
| Lop(Istackoffset n) ->
719719
emit_stack_offset n
720-
| Lop(Iload(chunk, addr)) ->
720+
| Lop(Iload(chunk, addr, _mut)) ->
721721
let dest = res i 0 in
722722
begin match chunk with
723723
| Word_int | Word_val ->

asmcomp/arm/emit.mlp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -580,10 +580,10 @@ let emit_instr i =
580580
let ninstr = emit_stack_adjustment (-n) in
581581
stack_offset := !stack_offset + n;
582582
ninstr
583-
| Lop(Iload(Single, addr)) when !fpu >= VFPv2 ->
583+
| Lop(Iload(Single, addr, _mut)) when !fpu >= VFPv2 ->
584584
` flds s14, {emit_addressing addr i.arg 0}\n`;
585585
` fcvtds {emit_reg i.res.(0)}, s14\n`; 2
586-
| Lop(Iload(Double, addr)) when !fpu = Soft ->
586+
| Lop(Iload(Double, addr, _mut)) when !fpu = Soft ->
587587
(* Use LDM or LDRD if possible *)
588588
begin match i.res.(0), i.res.(1), addr with
589589
{loc = Reg rt}, {loc = Reg rt2}, Iindexed 0
@@ -602,7 +602,7 @@ let emit_instr i =
602602
` ldr {emit_reg i.res.(0)}, {emit_addressing addr i.arg 0}\n`
603603
end; 2
604604
end
605-
| Lop(Iload(size, addr)) ->
605+
| Lop(Iload(size, addr, _mut)) ->
606606
let r = i.res.(0) in
607607
let instr =
608608
match size with

asmcomp/arm/proc.ml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,8 @@ let destroyed_at_oper = function
311311
| Iop(Iintop (Icomp _) | Iintop_imm(Icomp _, _))
312312
when !arch >= ARMv8 && !thumb ->
313313
[| phys_reg 3 |] (* r3 destroyed *)
314-
| Iop(Iintoffloat | Ifloatofint | Iload(Single, _) | Istore(Single, _, _)) ->
314+
| Iop(Iintoffloat | Ifloatofint
315+
| Iload(Single, _, _) | Istore(Single, _, _)) ->
315316
[| phys_reg 107 |] (* d7 (s14-s15) destroyed *)
316317
| _ -> [||]
317318

@@ -335,7 +336,7 @@ let max_register_pressure = function
335336
| Ialloc _ -> if abi = EABI then [| 7; 0; 0 |] else [| 7; 8; 8 |]
336337
| Iconst_symbol _ when !Clflags.pic_code -> [| 7; 16; 32 |]
337338
| Iintoffloat | Ifloatofint
338-
| Iload(Single, _) | Istore(Single, _, _) -> [| 9; 15; 31 |]
339+
| Iload(Single, _, _) | Istore(Single, _, _) -> [| 9; 15; 31 |]
339340
| Iintop Imulh when !arch < ARMv6 -> [| 8; 16; 32 |]
340341
| _ -> [| 9; 16; 32 |]
341342

asmcomp/arm/scheduling.ml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ method oper_latency = function
2929
(* Loads have a latency of two cycles in general *)
3030
Iconst_symbol _
3131
| Iconst_float _
32-
| Iload(_, _)
32+
| Iload(_, _, _)
3333
| Ireload
3434
| Ifloatofint (* mcr/mrc count as memory access *)
3535
| Iintoffloat -> 2

asmcomp/arm64/emit.mlp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -497,7 +497,7 @@ module BR = Branch_relaxation.Make (struct
497497
| Lop (Iextcall { alloc = false; }) -> 1
498498
| Lop (Iextcall { alloc = true; }) -> 3
499499
| Lop (Istackoffset _) -> 2
500-
| Lop (Iload (size, addr)) | Lop (Istore (size, addr, _)) ->
500+
| Lop (Iload (size, addr, _)) | Lop (Istore (size, addr, _)) ->
501501
let based = match addr with Iindexed _ -> 0 | Ibased _ -> 1 in
502502
based + begin match size with Single -> 2 | _ -> 1 end
503503
| Lop (Ialloc _) when !fastcode_flag -> 5
@@ -721,7 +721,7 @@ let emit_instr i =
721721
assert (n mod 16 = 0);
722722
emit_stack_adjustment (-n);
723723
stack_offset := !stack_offset + n
724-
| Lop(Iload(size, addr)) ->
724+
| Lop(Iload(size, addr, _mut)) ->
725725
let dst = i.res.(0) in
726726
let base =
727727
match addr with

asmcomp/arm64/proc.ml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,8 @@ let destroyed_at_oper = function
269269
destroyed_at_c_call
270270
| Iop(Ialloc _) ->
271271
[| reg_x8 |]
272-
| Iop(Iintoffloat | Ifloatofint | Iload(Single, _) | Istore(Single, _, _)) ->
272+
| Iop( Iintoffloat | Ifloatofint
273+
| Iload(Single, _, _) | Istore(Single, _, _)) ->
273274
[| reg_d7 |] (* d7 / s7 destroyed *)
274275
| _ -> [||]
275276

@@ -288,7 +289,7 @@ let max_register_pressure = function
288289
| Iextcall _ -> [| 7; 8 |] (* 7 integer callee-saves, 8 FP callee-saves *)
289290
| Ialloc _ -> [| 22; 32 |]
290291
| Iintoffloat | Ifloatofint
291-
| Iload(Single, _) | Istore(Single, _, _) -> [| 23; 31 |]
292+
| Iload(Single, _, _) | Istore(Single, _, _) -> [| 23; 31 |]
292293
| _ -> [| 23; 32 |]
293294

294295
(* Pure operations (without any side effect besides updating their result

0 commit comments

Comments
 (0)