Skip to content

Commit 58c72d5

Browse files
authored
flambda-backend: Backport ocaml/ocaml#10595 from upstream/trunk (oxcaml#471)
1 parent 1010539 commit 58c72d5

24 files changed

Lines changed: 336 additions & 137 deletions

File tree

Changes

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,15 @@
1+
2+
OCaml 4.14, maintenance version
3+
-------------------------------
4+
5+
### Code generation and optimizations:
6+
7+
- #10595: Tail calls with up to 64 arguments are guaranteed to be compiled
8+
as tail calls. To this end, memory locations in the domain state
9+
are used for passing arguments that do not fit in registers.
10+
(Xavier Leroy, review by Vincent Laviron)
11+
12+
113
OCaml 4.12, maintenance version
214
-------------------------------
315

asmcomp/amd64/emit.mlp

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ let slot_offset loc cl =
9797
then !stack_offset + n * 8
9898
else !stack_offset + (num_stack_slots.(0) + n) * 8
9999
| Outgoing n -> n
100+
| Domainstate _ -> assert false (* not a stack slot *)
100101

101102
let emit_stack_offset n =
102103
if n < 0
@@ -200,14 +201,18 @@ let emit_Llabel fallthrough lbl =
200201

201202
(* Output a pseudo-register *)
202203

204+
let x86_data_type_for_stack_slot = function
205+
| Float -> REAL8
206+
| _ -> QWORD
207+
203208
let reg = function
204209
| { loc = Reg.Reg r } -> register_name r
205-
| { loc = Stack s; typ = Float } as r ->
206-
let ofs = slot_offset s (register_class r) in
207-
mem64 REAL8 ofs RSP
208-
| { loc = Stack s } as r ->
210+
| { loc = Stack (Domainstate n); typ = ty } ->
211+
let ofs = n + Domainstate.(idx_of_field Domain_extra_params) * 8 in
212+
mem64 (x86_data_type_for_stack_slot ty) ofs R14
213+
| { loc = Stack s; typ = ty } as r ->
209214
let ofs = slot_offset s (register_class r) in
210-
mem64 QWORD ofs RSP
215+
mem64 (x86_data_type_for_stack_slot ty) ofs RSP
211216
| { loc = Unknown } ->
212217
assert false
213218

asmcomp/amd64/proc.ml

Lines changed: 25 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,6 @@ let win64 = Arch.win64
7676
the GC regs block).
7777
*)
7878

79-
let max_arguments_for_tailcalls = 10
80-
8179
let int_reg_name =
8280
match Config.ccomp_type with
8381
| "msvc" ->
@@ -157,12 +155,15 @@ let word_addressed = false
157155

158156
(* Calling conventions *)
159157

160-
let calling_conventions first_int last_int first_float last_float make_stack
158+
let size_domainstate_args = 64 * size_int
159+
160+
let calling_conventions first_int last_int first_float last_float
161+
make_stack first_stack
161162
arg =
162163
let loc = Array.make (Array.length arg) Reg.dummy in
163164
let int = ref first_int in
164165
let float = ref first_float in
165-
let ofs = ref 0 in
166+
let ofs = ref first_stack in
166167
for i = 0 to Array.length arg - 1 do
167168
match arg.(i) with
168169
| Val | Int | Addr as ty ->
@@ -183,21 +184,32 @@ let calling_conventions first_int last_int first_float last_float make_stack
183184
ofs := !ofs + size_float
184185
end
185186
done;
186-
(loc, Misc.align !ofs 16) (* keep stack 16-aligned *)
187-
188-
let incoming ofs = Incoming ofs
189-
let outgoing ofs = Outgoing ofs
187+
(loc, Misc.align (max 0 !ofs) 16) (* keep stack 16-aligned *)
188+
189+
let incoming ofs =
190+
if ofs >= 0
191+
then Incoming ofs
192+
else Domainstate (ofs + size_domainstate_args)
193+
let outgoing ofs =
194+
if ofs >= 0
195+
then Outgoing ofs
196+
else Domainstate (ofs + size_domainstate_args)
190197
let not_supported _ofs = fatal_error "Proc.loc_results: cannot call"
191198

192199
let loc_arguments arg =
193-
calling_conventions 0 9 100 109 outgoing arg
200+
calling_conventions 0 9 100 109 outgoing (- size_domainstate_args) arg
194201
let loc_parameters arg =
195202
let (loc, _ofs) =
196-
calling_conventions 0 9 100 109 incoming arg
203+
calling_conventions 0 9 100 109 incoming (- size_domainstate_args) arg
197204
in
198205
loc
206+
199207
let loc_results res =
200-
let (loc, _ofs) = calling_conventions 0 0 100 100 not_supported res in loc
208+
let (loc, _ofs) =
209+
calling_conventions 0 0 100 100 not_supported 0 res
210+
in loc
211+
212+
let max_arguments_for_tailcalls = 10 (* in regs *) + 64 (* in domain state *)
201213

202214
(* C calling conventions under Unix:
203215
first integer args in rdi, rsi, rdx, rcx, r8, r9
@@ -213,10 +225,10 @@ let loc_results res =
213225
Return value in rax or xmm0. *)
214226

215227
let loc_external_results res =
216-
let (loc, _ofs) = calling_conventions 0 0 100 100 not_supported res in loc
228+
let (loc, _ofs) = calling_conventions 0 0 100 100 not_supported 0 res in loc
217229

218230
let unix_loc_external_arguments arg =
219-
calling_conventions 2 7 100 107 outgoing arg
231+
calling_conventions 2 7 100 107 outgoing 0 arg
220232

221233
let win64_int_external_arguments =
222234
[| 5 (*rcx*); 4 (*rdx*); 6 (*r8*); 7 (*r9*) |]

asmcomp/arm/emit.mlp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,13 +87,18 @@ let slot_offset loc cl =
8787
| Outgoing n ->
8888
assert (n >= 0);
8989
n
90+
| Domainstate _ -> assert false (* not a stack slot *)
9091

9192
(* Output a stack reference *)
9293

9394
let emit_stack r =
9495
match r.loc with
96+
| Stack (Domainstate n) ->
97+
let ofs =n + Domainstate.(idx_of_field Domain_extra_params) * 8 in
98+
`[domain_state_ptr, #{emit_int ofs}]`
9599
| Stack s ->
96-
let ofs = slot_offset s (register_class r) in `[sp, #{emit_int ofs}]`
100+
let ofs = slot_offset s (register_class r) in
101+
`[sp, #{emit_int ofs}]`
97102
| _ -> fatal_error "Emit_arm.emit_stack"
98103

99104
(* Output an addressing mode *)

asmcomp/arm/proc.ml

Lines changed: 23 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,8 @@ let stack_slot slot ty =
109109

110110
(* Calling conventions *)
111111

112+
let size_domainstate_args = 64 * size_int
113+
112114
let loc_int last_int make_stack int ofs =
113115
if !int <= last_int then begin
114116
let l = phys_reg !int in
@@ -149,41 +151,49 @@ let loc_int_pair last_int make_stack int ofs =
149151
[| stack_lower; stack_upper |]
150152
end
151153

152-
let calling_conventions first_int last_int first_float last_float make_stack
153-
arg =
154+
let calling_conventions first_int last_int first_float last_float
155+
make_stack first_stack arg =
154156
let loc = Array.make (Array.length arg) Reg.dummy in
155157
let int = ref first_int in
156158
let float = ref first_float in
157-
let ofs = ref 0 in
159+
let ofs = ref first_stack in
158160
for i = 0 to Array.length arg - 1 do
159161
match arg.(i) with
160162
| Val | Int | Addr ->
161163
loc.(i) <- loc_int last_int make_stack int ofs
162164
| Float ->
163165
loc.(i) <- loc_float last_float make_stack float ofs
164166
done;
165-
(loc, Misc.align !ofs 8) (* keep stack 8-aligned *)
166-
167-
let incoming ofs = Incoming ofs
168-
let outgoing ofs = Outgoing ofs
167+
(loc, Misc.align (max 0 !ofs) 8) (* keep stack 8-aligned *)
168+
169+
let incoming ofs =
170+
if ofs >= 0
171+
then Incoming ofs
172+
else Domainstate (ofs + size_domainstate_args)
173+
let outgoing ofs =
174+
if ofs >= 0
175+
then Outgoing ofs
176+
else Domainstate (ofs + size_domainstate_args)
169177
let not_supported _ofs = fatal_error "Proc.loc_results: cannot call"
170178

171179
(* OCaml calling convention:
172180
first integer args in r0...r7
173181
first float args in d0...d15 (EABI+VFP)
174-
remaining args on stack.
182+
remaining args in domain state area, then on stack.
175183
Return values in r0...r7 or d0...d15. *)
176184

177-
let max_arguments_for_tailcalls = 8
185+
let max_arguments_for_tailcalls = 8 (* in regs *) + 64 (* in domain state *)
178186

179187
let loc_arguments arg =
180-
calling_conventions 0 7 100 115 outgoing arg
188+
calling_conventions 0 7 100 115 outgoing (- size_domainstate_args) arg
181189

182190
let loc_parameters arg =
183-
let (loc, _) = calling_conventions 0 7 100 115 incoming arg in loc
191+
let (loc, _) =
192+
calling_conventions 0 7 100 115 incoming (- size_domainstate_args) arg
193+
in loc
184194

185195
let loc_results res =
186-
let (loc, _) = calling_conventions 0 7 100 115 not_supported res in loc
196+
let (loc, _) = calling_conventions 0 7 100 115 not_supported 0 res in loc
187197

188198
(* C calling convention:
189199
first integer args in r0...r3
@@ -218,7 +228,7 @@ let loc_external_arguments ty_args =
218228
external_calling_conventions 0 3 100 107 outgoing ty_args
219229

220230
let loc_external_results res =
221-
let (loc, _) = calling_conventions 0 1 100 100 not_supported res
231+
let (loc, _) = calling_conventions 0 1 100 100 not_supported 0 res
222232
in loc
223233

224234
let loc_exn_bucket = phys_reg 0

asmcomp/arm64/emit.mlp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,13 +118,18 @@ let slot_offset loc cl =
118118
| Outgoing n ->
119119
assert (n >= 0);
120120
n
121+
| Domainstate _ -> assert false (* not a satck slot *)
121122

122123
(* Output a stack reference *)
123124

124125
let emit_stack r =
125126
match r.loc with
127+
| Stack (Domainstate n) ->
128+
let ofs = n + Domainstate.(idx_of_field Domain_extra_params) * 8 in
129+
`[{emit_reg reg_domain_state_ptr}, #{emit_int ofs}]`
126130
| Stack s ->
127-
let ofs = slot_offset s (register_class r) in `[sp, #{emit_int ofs}]`
131+
let ofs = slot_offset s (register_class r) in
132+
`[sp, #{emit_int ofs}]`
128133
| _ -> fatal_error "Emit.emit_stack"
129134

130135
(* Output an addressing mode *)

asmcomp/arm64/proc.ml

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,8 @@ let stack_slot slot ty =
107107

108108
(* Calling conventions *)
109109

110+
let size_domainstate_args = 64 * size_int
111+
110112
let loc_int last_int make_stack int ofs =
111113
if !int <= last_int then begin
112114
let l = phys_reg !int in
@@ -138,43 +140,52 @@ let loc_int32 last_int make_stack int ofs =
138140
end
139141

140142
let calling_conventions
141-
first_int last_int first_float last_float make_stack arg =
143+
first_int last_int first_float last_float make_stack first_stack arg =
142144
let loc = Array.make (Array.length arg) Reg.dummy in
143145
let int = ref first_int in
144146
let float = ref first_float in
145-
let ofs = ref 0 in
147+
let ofs = ref first_stack in
146148
for i = 0 to Array.length arg - 1 do
147149
match arg.(i) with
148150
| Val | Int | Addr ->
149151
loc.(i) <- loc_int last_int make_stack int ofs
150152
| Float ->
151153
loc.(i) <- loc_float last_float make_stack float ofs
152154
done;
153-
(loc, Misc.align !ofs 16) (* keep stack 16-aligned *)
154-
155-
let incoming ofs = Incoming ofs
156-
let outgoing ofs = Outgoing ofs
155+
(loc, Misc.align (max 0 !ofs) 16) (* keep stack 16-aligned *)
156+
157+
let incoming ofs =
158+
if ofs >= 0
159+
then Incoming ofs
160+
else Domainstate (ofs + size_domainstate_args)
161+
let outgoing ofs =
162+
if ofs >= 0
163+
then Outgoing ofs
164+
else Domainstate (ofs + size_domainstate_args)
157165
let not_supported _ofs = fatal_error "Proc.loc_results: cannot call"
158166

159167
(* OCaml calling convention:
160168
first integer args in r0...r15
161169
first float args in d0...d15
162-
remaining args on stack.
170+
remaining args in domain area, then on stack.
163171
Return values in r0...r15 or d0...d15. *)
164172

165-
let max_arguments_for_tailcalls = 16
173+
let max_arguments_for_tailcalls = 16 (* in regs *) + 64 (* in domain state *)
174+
166175
let last_int_register = if macosx then 7 else 15
167176

168177
let loc_arguments arg =
169-
calling_conventions 0 last_int_register 100 115 outgoing arg
178+
calling_conventions 0 last_int_register 100 115
179+
outgoing (- size_domainstate_args) arg
170180
let loc_parameters arg =
171181
let (loc, _) =
172-
calling_conventions 0 last_int_register 100 115 incoming arg
182+
calling_conventions 0 last_int_register 100 115
183+
incoming (- size_domainstate_args) arg
173184
in
174185
loc
175186
let loc_results res =
176187
let (loc, _) =
177-
calling_conventions 0 last_int_register 100 115 not_supported res
188+
calling_conventions 0 last_int_register 100 115 not_supported 0 res
178189
in
179190
loc
180191

@@ -208,7 +219,7 @@ let loc_external_arguments ty_args =
208219
external_calling_conventions 0 7 100 107 outgoing ty_args
209220

210221
let loc_external_results res =
211-
let (loc, _) = calling_conventions 0 1 100 100 not_supported res in loc
222+
let (loc, _) = calling_conventions 0 1 100 100 not_supported 0 res in loc
212223

213224
let loc_exn_bucket = phys_reg 0
214225

asmcomp/i386/emit.mlp

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ let slot_offset loc cl =
7777
| Outgoing n ->
7878
assert (n >= 0);
7979
n
80+
| Domainstate _ -> assert false (* not a stack slot *)
8081

8182
(* Record symbols used and defined - at the end generate extern for those
8283
used but not defined *)
@@ -146,16 +147,24 @@ let domain_field f r =
146147
let load_domain_state r =
147148
I.mov (sym32 "Caml_state") r
148149

150+
let x86_data_type_for_stack_slot = function
151+
| Float -> REAL8
152+
| _ -> DWORD
153+
154+
(* The Domainstate locations are mapped to a global array "caml_extra_params"
155+
defined in runtime/i386*. We cannot access the domain state here
156+
because in the i386 port there is no register that always point to the
157+
domain state. A global array works because i386 does not
158+
support multiple domains. *)
159+
149160
let reg = function
150161
| { loc = Reg r } -> register_name r
151-
| { loc = Stack(Incoming n | Outgoing n) } when n < 0 ->
152-
sym32 "caml_extra_params" ~ofs:(n + 64)
153-
| { loc = Stack s; typ = Float } as r ->
154-
let ofs = slot_offset s (register_class r) in
155-
mem32 REAL8 ofs RSP
156-
| { loc = Stack s } as r ->
162+
| { loc = Stack(Domainstate n); typ = ty } ->
163+
mem_sym (x86_data_type_for_stack_slot ty)
164+
(emit_symbol "caml_extra_params") ~ofs:n
165+
| { loc = Stack s; typ = ty } as r ->
157166
let ofs = slot_offset s (register_class r) in
158-
mem32 DWORD ofs RSP
167+
mem32 (x86_data_type_for_stack_slot ty) ofs RSP
159168
| { loc = Unknown } ->
160169
fatal_error "Emit_i386.reg"
161170

0 commit comments

Comments
 (0)