Skip to content

Commit fa6fce6

Browse files
aartbikcommit-bot@chromium.org
authored andcommitted
[dart/vm] Improved inlining methods and heuristics
Rationale: Previous method cached graph information (instruction and call site counts) on a per-function level, not accounting for potential specializations. The improved method runs an extra constant folding pass, and only caches per-function information for non-specialized cases. As a result, we inling much better, see for example, the added test as illustration. Since we no longer cache for constants, compile-time may be increased a bit due to the extra scan. In the long run we should consider for common constant "situations" as the call site. #36880 Change-Id: I19f007c7f1860ad0ea88fafb38695dc154189ad5 Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/105460 Commit-Queue: Aart Bik <ajcbik@google.com> Reviewed-by: Martin Kustermann <kustermann@google.com> Reviewed-by: Alexander Markov <alexmarkov@google.com>
1 parent 8fa1cb2 commit fa6fce6

File tree

5 files changed

+405
-33
lines changed

5 files changed

+405
-33
lines changed

runtime/vm/compiler/backend/constant_propagator.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1453,7 +1453,8 @@ void ConstantPropagator::EliminateRedundantBranches() {
14531453
static void RemovePushArguments(StaticCallInstr* call) {
14541454
for (intptr_t i = 0; i < call->ArgumentCount(); ++i) {
14551455
PushArgumentInstr* push = call->PushArgumentAt(i);
1456-
ASSERT(push->input_use_list() == nullptr);
1456+
ASSERT(push->input_use_list() == nullptr); // no direct uses
1457+
push->ReplaceUsesWith(push->value()->definition()); // cleanup env uses
14571458
push->RemoveFromGraph();
14581459
}
14591460
}

runtime/vm/compiler/backend/inliner.cc

Lines changed: 72 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -779,6 +779,8 @@ class CallSiteInliner : public ValueObject {
779779
};
780780

781781
// Inlining heuristics based on Cooper et al. 2008.
782+
// TODO(ajcbik): with the better specialized computation of counts,
783+
// do we still want to special-case const_arg_count here?
782784
InliningDecision ShouldWeInline(const Function& callee,
783785
intptr_t instr_count,
784786
intptr_t call_site_count,
@@ -955,21 +957,28 @@ class CallSiteInliner : public ValueObject {
955957
return false;
956958
}
957959

960+
// Apply early heuristics. For a specialized case
961+
// (constants_arg_counts > 0), don't use a previously
962+
// estimate of the call site and instruction counts.
963+
// Note that at this point, optional constant parameters
964+
// are not counted yet, which makes this decision approximate.
958965
GrowableArray<Value*>* arguments = call_data->arguments;
959-
const intptr_t constant_arguments = CountConstants(*arguments);
966+
const intptr_t constant_arg_count = CountConstants(*arguments);
967+
const intptr_t instruction_count =
968+
constant_arg_count == 0 ? function.optimized_instruction_count() : 0;
969+
const intptr_t call_site_count =
970+
constant_arg_count == 0 ? function.optimized_call_site_count() : 0;
960971
InliningDecision decision = ShouldWeInline(
961-
function, function.optimized_instruction_count(),
962-
function.optimized_call_site_count(), constant_arguments);
972+
function, instruction_count, call_site_count, constant_arg_count);
963973
if (!decision.value) {
964974
TRACE_INLINING(
965975
THR_Print(" Bailout: early heuristics (%s) with "
966976
"code size: %" Pd ", "
967977
"call sites: %" Pd ", "
968978
"inlining depth of callee: %d, "
969979
"const args: %" Pd "\n",
970-
decision.reason, function.optimized_instruction_count(),
971-
function.optimized_call_site_count(),
972-
function.inlining_depth(), constant_arguments));
980+
decision.reason, instruction_count, call_site_count,
981+
function.inlining_depth(), constant_arg_count));
973982
PRINT_INLINING_TREE("Early heuristic", &call_data->caller, &function,
974983
call_data->call);
975984
return false;
@@ -1193,26 +1202,30 @@ class CallSiteInliner : public ValueObject {
11931202
printer.PrintBlocks();
11941203
}
11951204

1196-
// Collect information about the call site and caller graph.
1197-
// TODO(zerny): Do this after CP and dead code elimination.
1205+
// Collect information about the call site and caller graph. At this
1206+
// point, optional constant parameters are counted too, making the
1207+
// specialized vs. non-specialized decision accurate.
11981208
intptr_t constants_count = 0;
1199-
for (intptr_t i = 0; i < param_stubs->length(); ++i) {
1209+
for (intptr_t i = 0, n = param_stubs->length(); i < n; ++i) {
12001210
if ((*param_stubs)[i]->IsConstant()) ++constants_count;
12011211
}
1202-
1203-
FlowGraphInliner::CollectGraphInfo(callee_graph);
1204-
const intptr_t size = function.optimized_instruction_count();
1205-
const intptr_t call_site_count = function.optimized_call_site_count();
1212+
intptr_t instruction_count = 0;
1213+
intptr_t call_site_count = 0;
1214+
FlowGraphInliner::CollectGraphInfo(callee_graph, constants_count,
1215+
/*force*/ false, &instruction_count,
1216+
&call_site_count);
12061217

12071218
// Use heuristics do decide if this call should be inlined.
1208-
InliningDecision decision =
1209-
ShouldWeInline(function, size, call_site_count, constants_count);
1219+
InliningDecision decision = ShouldWeInline(
1220+
function, instruction_count, call_site_count, constants_count);
12101221
if (!decision.value) {
12111222
// If size is larger than all thresholds, don't consider it again.
1212-
if ((size > FLAG_inlining_size_threshold) &&
1223+
if ((instruction_count > FLAG_inlining_size_threshold) &&
12131224
(call_site_count > FLAG_inlining_callee_call_sites_threshold) &&
1214-
(size > FLAG_inlining_constant_arguments_min_size_threshold) &&
1215-
(size > FLAG_inlining_constant_arguments_max_size_threshold)) {
1225+
(instruction_count >
1226+
FLAG_inlining_constant_arguments_min_size_threshold) &&
1227+
(instruction_count >
1228+
FLAG_inlining_constant_arguments_max_size_threshold)) {
12161229
function.set_is_inlinable(false);
12171230
}
12181231
TRACE_INLINING(
@@ -1221,7 +1234,7 @@ class CallSiteInliner : public ValueObject {
12211234
"call sites: %" Pd ", "
12221235
"inlining depth of callee: %d, "
12231236
"const args: %" Pd "\n",
1224-
decision.reason, size, call_site_count,
1237+
decision.reason, instruction_count, call_site_count,
12251238
function.inlining_depth(), constants_count));
12261239
PRINT_INLINING_TREE("Heuristic fail", &call_data->caller, &function,
12271240
call_data->call);
@@ -1231,6 +1244,7 @@ class CallSiteInliner : public ValueObject {
12311244
// If requested, a stricter heuristic is applied to this inlining. This
12321245
// heuristic always scans the method (rather than possibly reusing
12331246
// cached results) to make sure all specializations are accounted for.
1247+
// TODO(ajcbik): with the now better bookkeeping, explore removing this
12341248
if (stricter_heuristic) {
12351249
if (!IsSmallLeaf(callee_graph)) {
12361250
TRACE_INLINING(
@@ -1254,7 +1268,7 @@ class CallSiteInliner : public ValueObject {
12541268

12551269
// Build succeeded so we restore the bailout jump.
12561270
inlined_ = true;
1257-
inlined_size_ += size;
1271+
inlined_size_ += instruction_count;
12581272
if (is_recursive_call) {
12591273
inlined_recursive_call_ = true;
12601274
}
@@ -1284,8 +1298,7 @@ class CallSiteInliner : public ValueObject {
12841298
TRACE_INLINING(THR_Print(" Success\n"));
12851299
TRACE_INLINING(THR_Print(
12861300
" with reason %s, code size %" Pd ", call sites: %" Pd "\n",
1287-
decision.reason, function.optimized_instruction_count(),
1288-
call_site_count));
1301+
decision.reason, instruction_count, call_site_count));
12891302
PRINT_INLINING_TREE(NULL, &call_data->caller, &function, call);
12901303
return true;
12911304
} else {
@@ -1481,6 +1494,10 @@ class CallSiteInliner : public ValueObject {
14811494
}
14821495

14831496
bool InlineClosureCalls() {
1497+
// Under this flag, tear off testing closure calls appear before the
1498+
// StackOverflowInstr, which breaks assertions in our compiler when inlined.
1499+
// TODO(sjindel): move testing closure calls after first check
1500+
if (FLAG_enable_testing_pragmas) return false; // keep all closures
14841501
bool inlined = false;
14851502
const GrowableArray<CallSites::ClosureCallInfo>& call_info =
14861503
inlining_call_sites_->closure_calls();
@@ -2237,15 +2254,37 @@ FlowGraphInliner::FlowGraphInliner(
22372254
speculative_policy_(speculative_policy),
22382255
precompiler_(precompiler) {}
22392256

2240-
void FlowGraphInliner::CollectGraphInfo(FlowGraph* flow_graph, bool force) {
2257+
void FlowGraphInliner::CollectGraphInfo(FlowGraph* flow_graph,
2258+
intptr_t constants_count,
2259+
bool force,
2260+
intptr_t* instruction_count,
2261+
intptr_t* call_site_count) {
22412262
const Function& function = flow_graph->function();
2263+
// For OSR, don't even bother.
2264+
if (flow_graph->IsCompiledForOsr()) {
2265+
*instruction_count = 0;
2266+
*call_site_count = 0;
2267+
return;
2268+
}
2269+
// Specialized case: always recompute, never cache.
2270+
if (constants_count > 0) {
2271+
ASSERT(!force);
2272+
GraphInfoCollector info;
2273+
info.Collect(*flow_graph);
2274+
*instruction_count = info.instruction_count();
2275+
*call_site_count = info.call_site_count();
2276+
return;
2277+
}
2278+
// Non-specialized case: unless forced, only recompute on a cache miss.
2279+
ASSERT(constants_count == 0);
22422280
if (force || (function.optimized_instruction_count() == 0)) {
22432281
GraphInfoCollector info;
22442282
info.Collect(*flow_graph);
2245-
22462283
function.SetOptimizedInstructionCountClamped(info.instruction_count());
22472284
function.SetOptimizedCallSiteCountClamped(info.call_site_count());
22482285
}
2286+
*instruction_count = function.optimized_instruction_count();
2287+
*call_site_count = function.optimized_call_site_count();
22492288
}
22502289

22512290
// TODO(srdjan): This is only needed when disassembling and/or profiling.
@@ -2312,9 +2351,15 @@ bool FlowGraphInliner::AlwaysInline(const Function& function) {
23122351
}
23132352

23142353
int FlowGraphInliner::Inline() {
2315-
// Collect graph info and store it on the function.
2316-
// We might later use it for an early bailout from the inlining.
2317-
CollectGraphInfo(flow_graph_);
2354+
// Collect some early graph information assuming it is non-specialized
2355+
// so that the cached approximation may be used later for an early
2356+
// bailout from inlining.
2357+
intptr_t instruction_count = 0;
2358+
intptr_t call_site_count = 0;
2359+
FlowGraphInliner::CollectGraphInfo(flow_graph_,
2360+
/*constants_count*/ 0,
2361+
/*force*/ false, &instruction_count,
2362+
&call_site_count);
23182363

23192364
const Function& top = flow_graph_->function();
23202365
if ((FLAG_inlining_filter != NULL) &&

runtime/vm/compiler/backend/inliner.h

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,8 +96,21 @@ class FlowGraphInliner : ValueObject {
9696
// depth that we inlined.
9797
int Inline();
9898

99-
// Compute graph info if it was not already computed or if 'force' is true.
100-
static void CollectGraphInfo(FlowGraph* flow_graph, bool force = false);
99+
// Computes graph information (instruction and call site count).
100+
// For the non-specialized cases (num_constants_args == 0), the
101+
// method uses a cache to avoid recomputing the counts (the cached
102+
// value may still be approximate but close). The 'force' flag is
103+
// used to update the cached value at the end of running the full pipeline
104+
// on non-specialized cases. Specialized cases (num_constants_args > 0)
105+
// always recompute the counts without caching.
106+
//
107+
// TODO(ajcbik): cache for specific constant argument combinations too?
108+
static void CollectGraphInfo(FlowGraph* flow_graph,
109+
intptr_t num_constant_args,
110+
bool force,
111+
intptr_t* instruction_count,
112+
intptr_t* call_site_count);
113+
101114
static void SetInliningId(FlowGraph* flow_graph, intptr_t inlining_id);
102115

103116
bool AlwaysInline(const Function& function);

runtime/vm/compiler/compiler_pass.cc

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,9 @@ void CompilerPass::RunInliningPipeline(PipelineMode mode,
217217
INVOKE_PASS(TypePropagation);
218218
INVOKE_PASS(ApplyICData);
219219
INVOKE_PASS(Canonicalize);
220+
// Run constant propagation to make sure we specialize for
221+
// (optional) constant arguments passed into the inlined method.
222+
INVOKE_PASS(ConstantPropagation);
220223
// Optimize (a << b) & c patterns, merge instructions. Must occur
221224
// before 'SelectRepresentations' which inserts conversion nodes.
222225
INVOKE_PASS(TryOptimizePatterns);
@@ -475,10 +478,17 @@ COMPILER_PASS(WriteBarrierElimination,
475478
{ WriteBarrierElimination(flow_graph); });
476479

477480
COMPILER_PASS(FinalizeGraph, {
478-
// Compute and store graph informations (call & instruction counts)
479-
// to be later used by the inliner.
480-
FlowGraphInliner::CollectGraphInfo(flow_graph, true);
481+
// At the end of the pipeline, force recomputing and caching graph
482+
// information (instruction and call site counts) for the (assumed)
483+
// non-specialized case with better values, for future inlining.
484+
intptr_t instruction_count = 0;
485+
intptr_t call_site_count = 0;
486+
FlowGraphInliner::CollectGraphInfo(flow_graph,
487+
/*constants_count*/ 0,
488+
/*force*/ true, &instruction_count,
489+
&call_site_count);
481490
flow_graph->function().set_inlining_depth(state->inlining_depth);
491+
// Remove redefinitions for the rest of the pipeline.
482492
flow_graph->RemoveRedefinitions();
483493
});
484494

0 commit comments

Comments
 (0)