Skip to content

Commit 93dd8f2

Browse files
committed
Optimal polling in C code based on masking
- Do not slow down allocations when an asynchronous callback cannot be handled immediately. - Do not wait that other threads have processed signals not for us in [caml_enter_blocking_section]. Before this commit, if a pending action cannot be processed immediately then all the allocations go through the slow path until it is processed. It is also possible that we are looping at the start of blocking sections because a signal is pending but not for us. This commit introduces a simpler design whereby [action_pending] alone is used to remember that an asynchronous callback should be run, and [young_limit] is only used to interrupt running code. The key is to modify going from C to OCaml code so that [young_limit] is set appropriately according to [action_pending]. This avoids situations where we have to repeatedly increase [young_limit] immediately despite the corresponding action not being processed. The intuition is that OCaml behaves as if asynchronous actions are "masked" inside C code (where an "unmasking" action is responsible for setting [young_limit] according to [action_pending] when the mask ends).
1 parent 2ead128 commit 93dd8f2

File tree

9 files changed

+105
-53
lines changed

9 files changed

+105
-53
lines changed

otherlibs/systhreads/st_stubs.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -674,7 +674,7 @@ CAMLprim value caml_thread_yield(value unit)
674674
st_thread_yield(&Thread_main_lock);
675675
Current_thread = st_tls_get(caml_thread_key);
676676
caml_thread_restore_runtime_state();
677-
if (Caml_state->action_pending || caml_check_pending_signals())
677+
if (caml_check_pending_signals())
678678
caml_set_action_pending(Caml_state);
679679

680680
return Val_unit;

runtime/amd64.S

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -535,6 +535,15 @@ ENDFUNCTION(G(caml_allocN))
535535
/* Call a C function from OCaml */
536536
/******************************************************************************/
537537

538+
/* Update [young_limit] when returning from C calls */
539+
#define RET_FROM_C_CALL \
540+
cmpq $0, Caml_state(action_pending); \
541+
jne 1f; \
542+
ret; \
543+
1: \
544+
movq $-1, Caml_state(young_limit); \
545+
ret
546+
538547
FUNCTION(G(caml_c_call))
539548
CFI_STARTPROC
540549
CFI_SIGNAL_FRAME
@@ -553,7 +562,7 @@ LBL(caml_c_call):
553562
/* Load ocaml stack and restore global variables */
554563
SWITCH_C_TO_OCAML
555564
/* Return to OCaml caller */
556-
ret
565+
RET_FROM_C_CALL
557566
CFI_ENDPROC
558567
ENDFUNCTION(G(caml_c_call))
559568

@@ -593,7 +602,7 @@ LBL(106):
593602
/* Load ocaml stack and restore global variables */
594603
SWITCH_C_TO_OCAML
595604
/* Return to OCaml caller */
596-
ret
605+
RET_FROM_C_CALL
597606
CFI_ENDPROC
598607
ENDFUNCTION(G(caml_c_call_stack_args))
599608

runtime/arm64.S

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -417,6 +417,17 @@ FUNCTION(caml_allocN)
417417
/* Call a C function from OCaml */
418418
/* Function to call is in ADDITIONAL_ARG */
419419

420+
/* Update [young_limit] when returning from C calls. */
421+
.macro RET_FROM_C_CALL
422+
ldr TMP, Caml_state(action_pending)
423+
cbnz TMP, 1f
424+
ret
425+
1:
426+
mov TMP, #-1
427+
str TMP, Caml_state(young_limit)
428+
ret
429+
.endm
430+
420431
FUNCTION(caml_c_call)
421432
CFI_STARTPROC
422433
CFI_OFFSET(29, -16)
@@ -437,9 +448,9 @@ FUNCTION(caml_c_call)
437448
SWITCH_C_TO_OCAML
438449
/* Return */
439450
ldp x29, x30, [sp], 16
440-
ret
451+
RET_FROM_C_CALL
441452
CFI_ENDPROC
442-
END_FUNCTION(caml_c_call)
453+
END_FUNCTION(caml_c_call)
443454

444455
FUNCTION(caml_c_call_stack_args)
445456
CFI_STARTPROC
@@ -478,8 +489,9 @@ FUNCTION(caml_c_call_stack_args)
478489
SWITCH_C_TO_OCAML
479490
/* Return */
480491
ldp x29, x30, [sp], 16
481-
ret
482-
CFI_ENDPROC
492+
RET_FROM_C_CALL
493+
CFI_ENDPROC
494+
END_FUNCTION(caml_c_call_stack_args)
483495

484496
/* Start the OCaml program */
485497

runtime/callback.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ CAMLexport value caml_callbackN_exn(value closure, int narg, value args[])
102102

103103
cont = save_and_clear_stack_parent(domain_state);
104104

105+
caml_update_young_limit_after_c_call(domain_state);
105106
res = caml_interprete(callback_code, sizeof(callback_code));
106107
if (Is_exception_result(res))
107108
domain_state->current_stack->sp += narg + 4; /* PR#3419 */
@@ -159,6 +160,7 @@ CAMLexport value caml_callback_exn(value closure, value arg)
159160
value res;
160161

161162
cont = save_and_clear_stack_parent(domain_state);
163+
caml_update_young_limit_after_c_call(domain_state);
162164
res = caml_callback_asm(domain_state, closure, &arg);
163165
restore_stack_parent(domain_state, cont);
164166

@@ -180,6 +182,7 @@ CAMLexport value caml_callback2_exn(value closure, value arg1, value arg2)
180182
value res;
181183

182184
cont = save_and_clear_stack_parent(domain_state);
185+
caml_update_young_limit_after_c_call(domain_state);
183186
res = caml_callback2_asm(domain_state, closure, args);
184187
restore_stack_parent(domain_state, cont);
185188

@@ -202,6 +205,7 @@ CAMLexport value caml_callback3_exn(value closure,
202205
value res;
203206

204207
cont = save_and_clear_stack_parent(domain_state);
208+
caml_update_young_limit_after_c_call(domain_state);
205209
res = caml_callback3_asm(domain_state, closure, args);
206210
restore_stack_parent(domain_state, cont);
207211

runtime/caml/domain.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ void caml_handle_incoming_interrupts(void);
6868
CAMLextern void caml_interrupt_self(void);
6969
void caml_interrupt_all_for_signal(void);
7070
void caml_reset_young_limit(caml_domain_state *);
71+
void caml_update_young_limit_after_c_call(caml_domain_state *);
7172

7273
CAMLextern void caml_reset_domain_lock(void);
7374
CAMLextern int caml_bt_is_in_blocking_section(void);

runtime/domain.c

Lines changed: 32 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,12 @@ Caml_inline void interrupt_domain(struct interruptor* s)
291291
{
292292
atomic_uintnat * interrupt_word =
293293
atomic_load_explicit(&s->interrupt_word, memory_order_relaxed);
294-
atomic_store_rel(interrupt_word, (uintnat)(-1));
294+
atomic_store_rel(interrupt_word, UINTNAT_MAX);
295+
}
296+
297+
Caml_inline void interrupt_domain_local(caml_domain_state* dom_st)
298+
{
299+
atomic_store_relaxed(&dom_st->young_limit, UINTNAT_MAX);
295300
}
296301

297302
int caml_incoming_interrupts_queued(void)
@@ -1462,20 +1467,37 @@ void caml_interrupt_all_for_signal(void)
14621467
}
14631468
}
14641469

1470+
/* To avoid any risk of forgetting an action, [caml_reset_young_limit]
1471+
is the only way (together with [interrupt_domain*]) through which
1472+
[young_limit] can be modified. We take care here of all possible
1473+
races. */
14651474
void caml_reset_young_limit(caml_domain_state * dom_st)
14661475
{
1467-
/* An interrupt might have been queued in the meanwhile; this
1468-
achieves the proper synchronisation. */
1476+
/* An interrupt might have been queued in the meanwhile; the
1477+
atomic_exchange achieves the proper synchronisation with the
1478+
reads that follow (unlike an atomic_store). */
14691479
atomic_exchange(&dom_st->young_limit, (uintnat)dom_st->young_start);
1470-
dom_internal * d = &all_domains[dom_st->id];
1471-
if (atomic_load_relaxed(&d->interruptor.interrupt_pending)
1480+
/* We might be here due to a recently-recorded signal (or a signal
1481+
might just have arrived), so we need to remember that we must run
1482+
signal handlers. In addition, in the case of long-running C code
1483+
that regularly polls with caml_process_pending_actions, we want
1484+
to force a query of all callbacks at every minor collection or
1485+
major slice (similarly to OCaml behaviour). */
1486+
caml_set_action_pending(dom_st);
1487+
/* In case of actions that we never delay, interrupt the domain
1488+
again immediately. */
1489+
atomic_uintnat * interrupt_pending =
1490+
&all_domains[dom_st->id].interruptor.interrupt_pending;
1491+
if (atomic_load_relaxed(interrupt_pending)
14721492
|| dom_st->requested_minor_gc
14731493
|| dom_st->requested_major_slice
1474-
|| atomic_load_relaxed(&dom_st->requested_external_interrupt)
1475-
|| dom_st->action_pending) {
1476-
atomic_store_rel(&dom_st->young_limit, (uintnat)-1);
1477-
CAMLassert(caml_check_gc_interrupt(dom_st));
1478-
}
1494+
|| atomic_load_relaxed(&dom_st->requested_external_interrupt))
1495+
interrupt_domain_local(dom_st);
1496+
}
1497+
1498+
void caml_update_young_limit_after_c_call(caml_domain_state * dom_st)
1499+
{
1500+
if (dom_st->action_pending) interrupt_domain_local(dom_st);
14791501
}
14801502

14811503
void caml_poll_gc_work(void)

runtime/interp.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,9 @@ sp is a local copy of the global variable Caml_state->extern_sp. */
9191
{ sp -= 2; sp[0] = env; sp[1] = (value)(pc + 1); \
9292
domain_state->current_stack->sp = sp; }
9393
#define Restore_after_c_call \
94-
{ sp = domain_state->current_stack->sp; env = *sp; sp += 2; }
94+
{ sp = domain_state->current_stack->sp; env = *sp; sp += 2; \
95+
caml_update_young_limit_after_c_call(domain_state); \
96+
}
9597

9698
/* For VM threads purposes, an event frame must look like accu + a
9799
C_CALL frame + a RETURN 1 frame.

runtime/minor_gc.c

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -802,16 +802,8 @@ void caml_alloc_small_dispatch (caml_domain_state * dom_st,
802802
/* In the case of allocations performed from OCaml, execute
803803
asynchronous callbacks. */
804804
caml_raise_if_exception(caml_do_pending_actions_exn());
805-
else {
805+
else
806806
caml_handle_gc_interrupt();
807-
/* We might be here due to a recently-recorded signal, so we
808-
need to remember that we must run signal handlers. In
809-
addition, in the case of long-running C code that regularly
810-
polls with caml_process_pending_actions, we want to force a
811-
query of all callbacks at every minor collection or major
812-
slice (similarly to OCaml behaviour). */
813-
dom_st->action_pending = 1;
814-
}
815807

816808
/* Now, there might be enough room in the minor heap to do our
817809
allocation. */

runtime/signals.c

Lines changed: 36 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -163,16 +163,17 @@ CAMLexport void (*caml_leave_blocking_section_hook)(void) =
163163

164164
CAMLexport void caml_enter_blocking_section(void)
165165
{
166-
while (1){
166+
caml_domain_state * domain = Caml_state;
167+
while (1) {
167168
/* Process all pending signals now */
168-
caml_raise_if_exception(caml_process_pending_signals_exn());
169+
if (Caml_check_gc_interrupt(domain)) {
170+
caml_handle_gc_interrupt();
171+
caml_raise_if_exception(caml_process_pending_signals_exn());
172+
}
169173
caml_enter_blocking_section_hook ();
170-
/* Check again for pending signals.
171-
If none, done; otherwise, try again */
172-
// FIXME: does this become very slow if a signal is recorded but
173-
// is masked for everybody in capacity of running signals at this
174-
// point?
175-
if (!caml_check_pending_signals()) break;
174+
/* Check again if a signal arrived in the meanwhile. If none,
175+
done; otherwise, try again. */
176+
if (!Caml_check_gc_interrupt(domain)) break;
176177
caml_leave_blocking_section_hook ();
177178
}
178179
}
@@ -202,7 +203,7 @@ CAMLexport void caml_leave_blocking_section(void)
202203
203204
So we force the examination of signals as soon as possible.
204205
*/
205-
if (Caml_state->action_pending || caml_check_pending_signals())
206+
if (caml_check_pending_signals())
206207
caml_set_action_pending(Caml_state);
207208

208209
errno = saved_errno;
@@ -270,22 +271,30 @@ void caml_request_minor_gc (void)
270271
271272
There are two kinds of asynchronous actions:
272273
273-
- Those that cannot be delayed but never call OCaml code (STW
274+
- Those that we do not delay but which never call OCaml code (STW
274275
interrupts, requested minor or major GC, forced systhread yield).
275276
276-
- Those that may raise OCaml exceptions but can be delayed
277-
(asynchronous callbacks, finalisers, memprof callbacks).
277+
- Those that may run OCaml code and raise OCaml exceptions, but can
278+
be delayed (asynchronous callbacks, finalisers, memprof
279+
callbacks).
278280
279-
[Caml_state->action_pending] records whether an action of the
280-
second kind is currently pending, and is reset _at the beginning_
281-
of processing all actions.
281+
Asynchronous actions are notified to the domain by playing with the
282+
allocation limit. Non-delayable actions are performed immediately,
283+
then [Caml_state->action_pending] is set in order to record that an
284+
action of the second kind might be pending. Then those actions are
285+
processed immediately if possible (e.g. allocation from OCaml), or
286+
remain delayed (e.g. allocation from C) until the program calls
287+
[caml_process_pending_actions].
282288
283-
Hence, when a delayable action is pending, either
284-
[Caml_state->action_pending] is 1, or there is a function currently
285-
running which is executing all actions.
289+
[Caml_state->action_pending] then reset _at the beginning_ of
290+
processing all actions. Hence, when a delayable action is pending,
291+
either [Caml_state->action_pending] is 1, or there is a function
292+
running which is in process of executing all actions.
286293
287-
This is used to ensure that [Caml_state->young_limit] is always set
288-
appropriately.
294+
When going from C to OCaml code, we set again
295+
[Caml_state->young_limit] to a high value if
296+
[Caml_state->action_pending] is set, in order to process actions as
297+
soon as possible.
289298
290299
In case there are two different callbacks (say, a signal and a
291300
finaliser) arriving at the same time, then the processing of one
@@ -301,7 +310,6 @@ void caml_request_minor_gc (void)
301310
void caml_set_action_pending(caml_domain_state * dom_st)
302311
{
303312
dom_st->action_pending = 1;
304-
atomic_store_rel(&dom_st->young_limit, (uintnat)-1);
305313
}
306314

307315
CAMLexport int caml_check_pending_actions(void)
@@ -311,15 +319,17 @@ CAMLexport int caml_check_pending_actions(void)
311319

312320
value caml_do_pending_actions_exn(void)
313321
{
314-
Caml_state->action_pending = 0;
315-
316-
/* 1. Non-delayable actions that do not run OCaml code. */
322+
/* 1. Non-delayable actions that do not run OCaml code.
317323
318-
/* Do any pending STW interrupt, minor collection or major slice */
324+
Do any pending STW interrupt, minor collection or major slice. */
319325
caml_handle_gc_interrupt();
320326
/* [young_limit] has now been reset. */
321327

322-
/* 2. Delayable actions that may raise OCaml exceptions. */
328+
/* 2. Delayable actions that may raise OCaml exceptions.
329+
330+
We can now clear the action_pending flag since we are going to
331+
execute all actions. */
332+
Caml_state->action_pending = 0;
323333

324334
/* Call signal handlers first */
325335
value exn = caml_process_pending_signals_exn();

0 commit comments

Comments
 (0)