Skip to content

Commit 5db8084

Browse files
authored
Revert "Eliminate forwarder stubs by reusing method precodes for call counting indirection" (dotnet#125285)
Reverts dotnet#124664 This appears to have caused a 12-13% regression nuget restore perf due to increased time spent in ThePreStub, specifically in RtlEnterCriticalSection
1 parent c290dc1 commit 5db8084

4 files changed

Lines changed: 131 additions & 100 deletions

File tree

src/coreclr/vm/callcounting.cpp

Lines changed: 97 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -400,6 +400,29 @@ void CallCountingManager::CallCountingStubAllocator::EnumerateHeapRanges(CLRData
400400

401401
#endif // DACCESS_COMPILE
402402

403+
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
404+
// CallCountingManager::MethodDescForwarderStubHashTraits
405+
406+
CallCountingManager::MethodDescForwarderStubHashTraits::key_t
407+
CallCountingManager::MethodDescForwarderStubHashTraits::GetKey(const element_t &e)
408+
{
409+
WRAPPER_NO_CONTRACT;
410+
return e->GetMethodDesc();
411+
}
412+
413+
BOOL CallCountingManager::MethodDescForwarderStubHashTraits::Equals(const key_t &k1, const key_t &k2)
414+
{
415+
WRAPPER_NO_CONTRACT;
416+
return k1 == k2;
417+
}
418+
419+
CallCountingManager::MethodDescForwarderStubHashTraits::count_t
420+
CallCountingManager::MethodDescForwarderStubHashTraits::Hash(const key_t &k)
421+
{
422+
WRAPPER_NO_CONTRACT;
423+
return (count_t)(size_t)k;
424+
}
425+
403426
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
404427
// CallCountingManager::CallCountingManagerHashTraits
405428

@@ -595,14 +618,6 @@ bool CallCountingManager::SetCodeEntryPoint(
595618
// Call counting is disabled, complete, or pending completion. The pending completion stage here would be
596619
// relatively rare, let it be handled elsewhere.
597620
methodDesc->SetCodeEntryPoint(codeEntryPoint);
598-
if (methodDesc->MayHaveEntryPointSlotsToBackpatch())
599-
{
600-
// Reset the precode target to prestub. For backpatchable methods, the precode must always
601-
// point to prestub (not native code) so that new vtable slots flow through DoBackpatch()
602-
// for discovery and recording. SetCodeEntryPoint() above handles recorded slots via
603-
// BackpatchEntryPointSlots() without touching the precode.
604-
Precode::GetPrecodeFromEntryPoint(methodDesc->GetTemporaryEntryPoint())->ResetTargetInterlocked();
605-
}
606621
return true;
607622
}
608623

@@ -641,12 +656,6 @@ bool CallCountingManager::SetCodeEntryPoint(
641656
->AsyncPromoteToTier1(activeCodeVersion, createTieringBackgroundWorkerRef);
642657
}
643658
methodDesc->SetCodeEntryPoint(codeEntryPoint);
644-
if (methodDesc->MayHaveEntryPointSlotsToBackpatch())
645-
{
646-
// Reset the precode target to prestub so that new vtable slots flow through DoBackpatch()
647-
// for discovery and recording. The call counting stub is no longer needed.
648-
Precode::GetPrecodeFromEntryPoint(methodDesc->GetTemporaryEntryPoint())->ResetTargetInterlocked();
649-
}
650659
callCountingInfo->SetStage(CallCountingInfo::Stage::Complete);
651660
return true;
652661
} while (false);
@@ -709,41 +718,51 @@ bool CallCountingManager::SetCodeEntryPoint(
709718
PCODE callCountingCodeEntryPoint = callCountingStub->GetEntryPoint();
710719
if (methodDesc->MayHaveEntryPointSlotsToBackpatch())
711720
{
712-
// For methods that may have entry point slots to backpatch, redirect the method's temporary entry point
713-
// (precode) to the call counting stub. This reuses the method's own precode as the stable indirection,
714-
// avoiding the need to allocate separate forwarder stubs.
715-
//
716-
// The call counting stub should not be the entry point stored directly in vtable slots:
717-
// - Stubs should be deletable without leaving dangling pointers in vtable slots
718-
// - On some architectures (e.g. arm64), jitted code may load the entry point into a register at a GC-safe
719-
// point, and the stub could be deleted before the register is used for the call
720-
//
721-
// Ensure vtable slots point to the temporary entry point (precode) so calls flow through
722-
// precode → call counting stub → native code. Vtable slots may have been backpatched to native code
723-
// during the initial publish or tiering delay. BackpatchToResetEntryPointSlots() also sets
724-
// GetMethodEntryPoint() to the temporary entry point, which we override below.
725-
//
726-
// There is a benign race window between resetting vtable slots and setting the precode target: a thread
727-
// may briefly see vtable slots pointing to the precode while the precode still points to its previous
728-
// target (prestub or native code). This results in at most one uncounted call, which is acceptable since
729-
// call counting is a heuristic.
730-
methodDesc->BackpatchToResetEntryPointSlots();
731-
732-
// Keep GetMethodEntryPoint() set to the native code entry point rather than the temporary entry point.
733-
// DoBackpatch() (prestub.cpp) skips slot recording when GetMethodEntryPoint() == GetTemporaryEntryPoint(),
734-
// interpreting it as "method not yet published". By keeping GetMethodEntryPoint() at native code, we
735-
// ensure that after the precode reverts to prestub (when call counting stubs are deleted), new vtable
736-
// slots discovered by DoBackpatch() will be properly recorded for future backpatching.
737-
methodDesc->SetMethodEntryPoint(codeEntryPoint);
738-
Precode *precode = Precode::GetPrecodeFromEntryPoint(methodDesc->GetTemporaryEntryPoint());
739-
precode->SetTargetInterlocked(callCountingCodeEntryPoint, FALSE);
721+
// The call counting stub should not be the entry point that is called first in the process of a call
722+
// - Stubs should be deletable. Many methods will have call counting stubs associated with them, and although the memory
723+
// involved is typically insignificant compared to the average memory overhead per method, by steady-state it would
724+
// otherwise be unnecessary memory overhead serving no purpose.
725+
// - In order to be able to delete a stub, the jitted code of a method cannot be allowed to load the stub as the entry
726+
// point of a callee into a register in a GC-safe point that allows for the stub to be deleted before the register is
727+
// reused to call the stub. On some processor architectures, perhaps the JIT can guarantee that it would not load the
728+
// entry point into a register before the call, but this is not possible on arm32 or arm64. Rather, perhaps the
729+
// region containing the load and call would not be considered GC-safe. Calls are considered GC-safe points, and this
730+
// may cause many methods that are currently fully interruptible to have to be partially interruptible and record
731+
// extra GC info instead. This would be nontrivial and there would be tradeoffs.
732+
// - For any method that may have an entry point slot that would be backpatched with the call counting stub's entry
733+
// point, a small forwarder stub (precode) is created. The forwarder stub has loader allocator lifetime and forwards to
734+
// the larger call counting stub. This is a simple solution for now and seems to have negligible impact.
735+
// - Reusing FuncPtrStubs was considered. FuncPtrStubs are currently not used as a code entry point for a virtual or
736+
// interface method and may be bypassed. For example, a call may call through the vtable slot, or a devirtualized call
737+
// may call through a FuncPtrStub. The target of a FuncPtrStub is a code entry point and is backpatched when a
738+
// method's active code entry point changes. Mixing the current use of FuncPtrStubs with the use as a forwarder for
739+
// call counting does not seem trivial and would likely complicate its use. There may not be much gain in reusing
740+
// FuncPtrStubs, as typically, they are created for only a small percentage of virtual/interface methods.
741+
742+
MethodDescForwarderStubHash &methodDescForwarderStubHash = callCountingManager->m_methodDescForwarderStubHash;
743+
Precode *forwarderStub = methodDescForwarderStubHash.Lookup(methodDesc);
744+
if (forwarderStub == nullptr)
745+
{
746+
AllocMemTracker forwarderStubAllocationTracker;
747+
forwarderStub =
748+
Precode::Allocate(
749+
methodDesc->GetPrecodeType(),
750+
methodDesc,
751+
methodDesc->GetLoaderAllocator(),
752+
&forwarderStubAllocationTracker);
753+
methodDescForwarderStubHash.Add(forwarderStub);
754+
forwarderStubAllocationTracker.SuppressRelease();
755+
}
756+
757+
forwarderStub->SetTargetInterlocked(callCountingCodeEntryPoint, false);
758+
callCountingCodeEntryPoint = forwarderStub->GetEntryPoint();
740759
}
741760
else
742761
{
743762
_ASSERTE(methodDesc->IsVersionableWithPrecode());
744-
methodDesc->SetCodeEntryPoint(callCountingCodeEntryPoint);
745763
}
746764

765+
methodDesc->SetCodeEntryPoint(callCountingCodeEntryPoint);
747766
callCountingInfo->SetStage(CallCountingInfo::Stage::StubMayBeActive);
748767
return true;
749768
}
@@ -913,13 +932,6 @@ void CallCountingManager::CompleteCallCounting()
913932
if (activeCodeVersion == codeVersion)
914933
{
915934
methodDesc->SetCodeEntryPoint(activeCodeVersion.GetNativeCode());
916-
if (methodDesc->MayHaveEntryPointSlotsToBackpatch())
917-
{
918-
// Reset the precode target to prestub so that new vtable slots flow through
919-
// DoBackpatch() for discovery and recording. The call counting stub will be
920-
// deleted by DeleteAllCallCountingStubs().
921-
Precode::GetPrecodeFromEntryPoint(methodDesc->GetTemporaryEntryPoint())->ResetTargetInterlocked();
922-
}
923935
break;
924936
}
925937

@@ -935,19 +947,11 @@ void CallCountingManager::CompleteCallCounting()
935947
if (activeNativeCode != 0)
936948
{
937949
methodDesc->SetCodeEntryPoint(activeNativeCode);
938-
if (methodDesc->MayHaveEntryPointSlotsToBackpatch())
939-
{
940-
Precode::GetPrecodeFromEntryPoint(methodDesc->GetTemporaryEntryPoint())->ResetTargetInterlocked();
941-
}
942950
break;
943951
}
944952
}
945953

946954
methodDesc->ResetCodeEntryPoint();
947-
if (methodDesc->MayHaveEntryPointSlotsToBackpatch())
948-
{
949-
Precode::GetPrecodeFromEntryPoint(methodDesc->GetTemporaryEntryPoint())->ResetTargetInterlocked();
950-
}
951955
} while (false);
952956

953957
callCountingInfo->SetStage(CallCountingInfo::Stage::Complete);
@@ -1085,15 +1089,7 @@ void CallCountingManager::StopAllCallCounting(TieredCompilationManager *tieredCo
10851089

10861090
// The intention is that all call counting stubs will be deleted shortly, and only methods that are called again
10871091
// will cause stubs to be recreated, so reset the code entry point
1088-
MethodDesc *methodDesc = codeVersion.GetMethodDesc();
1089-
methodDesc->ResetCodeEntryPoint();
1090-
if (methodDesc->MayHaveEntryPointSlotsToBackpatch())
1091-
{
1092-
// ResetCodeEntryPoint() for backpatchable methods resets recorded slots but does not touch the
1093-
// precode target. Reset the precode target to prestub so that new vtable slots flow through the
1094-
// prestub for slot discovery and recording.
1095-
Precode::GetPrecodeFromEntryPoint(methodDesc->GetTemporaryEntryPoint())->ResetTargetInterlocked();
1096-
}
1092+
codeVersion.GetMethodDesc()->ResetCodeEntryPoint();
10971093
callCountingInfo->SetStage(newCallCountingStage);
10981094
}
10991095

@@ -1113,6 +1109,14 @@ void CallCountingManager::StopAllCallCounting(TieredCompilationManager *tieredCo
11131109
EX_SWALLOW_NONTERMINAL;
11141110
}
11151111
}
1112+
1113+
// Reset forwarder stubs, they are not in use anymore
1114+
MethodDescForwarderStubHash &methodDescForwarderStubHash = callCountingManager->m_methodDescForwarderStubHash;
1115+
for (auto itEnd = methodDescForwarderStubHash.End(), it = methodDescForwarderStubHash.Begin(); it != itEnd; ++it)
1116+
{
1117+
Precode *forwarderStub = *it;
1118+
forwarderStub->ResetTargetInterlocked();
1119+
}
11161120
}
11171121
}
11181122

@@ -1140,6 +1144,7 @@ void CallCountingManager::DeleteAllCallCountingStubs()
11401144
_ASSERTE(callCountingManager->m_callCountingInfosPendingCompletion.IsEmpty());
11411145

11421146
// Clear the call counting stub from call counting infos and delete completed infos
1147+
MethodDescForwarderStubHash &methodDescForwarderStubHash = callCountingManager->m_methodDescForwarderStubHash;
11431148
CallCountingInfoByCodeVersionHash &callCountingInfoByCodeVersionHash =
11441149
callCountingManager->m_callCountingInfoByCodeVersionHash;
11451150
for (auto itEnd = callCountingInfoByCodeVersionHash.End(), it = callCountingInfoByCodeVersionHash.Begin();
@@ -1164,14 +1169,14 @@ void CallCountingManager::DeleteAllCallCountingStubs()
11641169
continue;
11651170
}
11661171

1167-
// Ensure the precode target is prestub for backpatchable methods whose call counting has completed.
1168-
// CompleteCallCounting() should have already reset the precode to prestub; this is a safety net to
1169-
// guarantee the invariant that the precode always points to prestub when call counting is not active,
1170-
// so that new vtable slots can be discovered and recorded by DoBackpatch().
1171-
MethodDesc *methodDesc = callCountingInfo->GetCodeVersion().GetMethodDesc();
1172-
if (methodDesc->MayHaveEntryPointSlotsToBackpatch())
1172+
// Currently, tier 0 is the last code version that is counted, and the method is typically not counted anymore.
1173+
// Remove the forwarder stub if one exists, a new one will be created if necessary, for example, if a profiler adds
1174+
// an IL code version for the method.
1175+
Precode *const *forwarderStubPtr =
1176+
methodDescForwarderStubHash.LookupPtr(callCountingInfo->GetCodeVersion().GetMethodDesc());
1177+
if (forwarderStubPtr != nullptr)
11731178
{
1174-
Precode::GetPrecodeFromEntryPoint(methodDesc->GetTemporaryEntryPoint())->ResetTargetInterlocked();
1179+
methodDescForwarderStubHash.RemovePtr(const_cast<Precode **>(forwarderStubPtr));
11751180
}
11761181

11771182
callCountingInfoByCodeVersionHash.Remove(it);
@@ -1222,6 +1227,24 @@ void CallCountingManager::TrimCollections()
12221227
}
12231228
EX_SWALLOW_NONTERMINAL
12241229
}
1230+
1231+
count = m_methodDescForwarderStubHash.GetCount();
1232+
capacity = m_methodDescForwarderStubHash.GetCapacity();
1233+
if (count == 0)
1234+
{
1235+
if (capacity != 0)
1236+
{
1237+
m_methodDescForwarderStubHash.RemoveAll();
1238+
}
1239+
}
1240+
else if (count <= capacity / 4)
1241+
{
1242+
EX_TRY
1243+
{
1244+
m_methodDescForwarderStubHash.Reallocate(count * 2);
1245+
}
1246+
EX_SWALLOW_NONTERMINAL
1247+
}
12251248
}
12261249

12271250
#endif // !DACCESS_COMPILE

src/coreclr/vm/callcounting.h

Lines changed: 29 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -19,18 +19,11 @@ When starting call counting for a method (see CallCountingManager::SetCodeEntryP
1919
- A CallCountingStub is created. It contains a small amount of code that decrements the remaining call count and checks for
2020
zero. When nonzero, it jumps to the code version's native code entry point. When zero, it forwards to a helper function that
2121
handles tier promotion.
22-
- For tiered methods that do not normally use a MethodDesc precode as the stable entry point (virtual and interface methods
23-
when slot backpatching is enabled), the method's own temporary-entrypoint precode is redirected to the call counting stub,
24-
and vtable slots are reset to point to the temporary entry point. This ensures calls flow through temporary-entrypoint
25-
precode -> call counting stub -> native code, and the call counting stub can be safely deleted since vtable slots don't
26-
point to it directly. GetMethodEntryPoint() is kept at the native code entry point (not the temporary entry point) so that
27-
DoBackpatch() can still record new vtable slots after the precode reverts to prestub. During call counting, there is a
28-
bounded window where new vtable slots may not be recorded because the precode target is the call counting stub rather than
29-
the prestub. These slots are corrected once call counting stubs are deleted and the precode reverts to prestub.
30-
When call counting ends, the precode is always reset to prestub (never to native code), preserving the invariant
31-
that new vtable slots can be discovered and recorded by DoBackpatch().
32-
- For methods with a precode (or when slot backpatching is disabled), the method's code entry point is set to the call
33-
counting stub directly.
22+
- For tiered methods that don't have a precode (virtual and interface methods when slot backpatching is enabled), a forwarder
23+
stub (a precode) is created and it forwards to the call counting stub. This is so that the call counting stub can be safely
24+
and easily deleted. The forwarder stubs are only used when counting calls, there is one per method (not per code version), and
25+
they are not deleted.
26+
- The method's code entry point is set to the forwarder stub or the call counting stub to count calls to the code version
3427
3528
When the call count threshold is reached (see CallCountingManager::OnCallCountThresholdReached):
3629
- The helper call enqueues completion of call counting for background processing
@@ -43,6 +36,8 @@ After all work queued for promotion is completed and methods transitioned to opt
4336
- All call counting stubs are deleted. For code versions that have not completed counting, the method's code entry point is
4437
reset such that call counting would be reestablished on the next call.
4538
- Completed call counting infos are deleted
39+
- For methods that no longer have any code versions that need to be counted, the forwarder stubs are no longer tracked. If a
40+
new IL code version is added thereafter (perhaps by a profiler), a new forwarder stub may be created.
4641
4742
Miscellaneous
4843
-------------
@@ -291,6 +286,27 @@ class CallCountingManager
291286
DISABLE_COPY(CallCountingStubAllocator);
292287
};
293288

289+
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
290+
// CallCountingManager::MethodDescForwarderStub
291+
292+
private:
293+
class MethodDescForwarderStubHashTraits : public DefaultSHashTraits<Precode *>
294+
{
295+
private:
296+
typedef DefaultSHashTraits<Precode *> Base;
297+
public:
298+
typedef Base::element_t element_t;
299+
typedef Base::count_t count_t;
300+
typedef MethodDesc *key_t;
301+
302+
public:
303+
static key_t GetKey(const element_t &e);
304+
static BOOL Equals(const key_t &k1, const key_t &k2);
305+
static count_t Hash(const key_t &k);
306+
};
307+
308+
typedef SHash<MethodDescForwarderStubHashTraits> MethodDescForwarderStubHash;
309+
294310
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
295311
// CallCountingManager::CallCountingManagerHashTraits
296312

@@ -325,6 +341,7 @@ class CallCountingManager
325341
private:
326342
CallCountingInfoByCodeVersionHash m_callCountingInfoByCodeVersionHash;
327343
CallCountingStubAllocator m_callCountingStubAllocator;
344+
MethodDescForwarderStubHash m_methodDescForwarderStubHash;
328345
SArray<CallCountingInfo *> m_callCountingInfosPendingCompletion;
329346

330347
public:

src/coreclr/vm/method.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3165,7 +3165,7 @@ void MethodDesc::RecordAndBackpatchEntryPointSlot_Locked(
31653165
backpatchTracker->AddSlotAndPatch_Locked(this, slotLoaderAllocator, slot, slotType, currentEntryPoint);
31663166
}
31673167

3168-
bool MethodDesc::TryBackpatchEntryPointSlots(
3168+
FORCEINLINE bool MethodDesc::TryBackpatchEntryPointSlots(
31693169
PCODE entryPoint,
31703170
bool isPrestubEntryPoint,
31713171
bool onlyFromPrestubEntryPoint)

0 commit comments

Comments
 (0)