Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 18 additions & 11 deletions src/Build/BackEnd/BuildManager/BuildManager.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1233,20 +1233,31 @@ private void RecordCrashTelemetry(Exception exception, bool isUnhandled)
{
string? host = _buildTelemetry?.BuildEngineHost ?? BuildEnvironmentState.GetHostName();

int? activeNodeCount;
int? submissionCount;
lock (_syncLock)
{
activeNodeCount = _activeNodes?.Count;
submissionCount = _buildSubmissions?.Count;
}

CrashTelemetryRecorder.RecordCrashTelemetry(
exception,
isUnhandled ? CrashExitType.UnhandledException : CrashExitType.EndBuildFailure,
isUnhandled,
ExceptionHandling.IsCriticalException(exception),
ProjectCollection.Version?.ToString(),
NativeMethodsShared.FrameworkName,
host);
host,
isStandaloneExecution: _buildTelemetry?.IsStandaloneExecution ?? false,
maxNodeCount: _buildParameters?.MaxNodeCount,
activeNodeCount,
submissionCount);
}

/// <summary>
/// Extracts build state under lock and delegates to <see cref="CrashTelemetryRecorder"/>
/// for EndBuild hang diagnostic telemetry emission. Also writes diagnostics to disk
/// via <see cref="ExceptionHandling.DumpHangDiagnosticsToFile"/>.
/// for EndBuild hang diagnostic telemetry emission.
/// </summary>
private void EmitEndBuildHangDiagnostics(string waitPhase, Stopwatch hangWatch)
{
Expand All @@ -1272,12 +1283,6 @@ private void EmitEndBuildHangDiagnostics(string waitPhase, Stopwatch hangWatch)
host = _buildTelemetry?.BuildEngineHost ?? BuildEnvironmentState.GetHostName();
}

string diagnostics = $"Phase={waitPhase}, Duration={hangWatch.ElapsedMilliseconds}ms, " +
$"PendingSubmissions={pendingSubmissionCount}, WithResultNoLogging={submissionsWithResultNoLogging}, " +
$"ThreadException={threadExceptionRecorded}, UnmatchedProjectStarted={unmatchedProjectStartedCount}";

ExceptionHandling.DumpHangDiagnosticsToFile(diagnostics);

CrashTelemetryRecorder.CollectAndEmitEndBuildHangDiagnostics(
waitPhase,
hangWatch.ElapsedMilliseconds,
Expand All @@ -1287,10 +1292,12 @@ private void EmitEndBuildHangDiagnostics(string waitPhase, Stopwatch hangWatch)
unmatchedProjectStartedCount,
ProjectCollection.Version?.ToString(),
NativeMethodsShared.FrameworkName,
host);
host,
isStandaloneExecution: _buildTelemetry?.IsStandaloneExecution ?? false,
maxNodeCount: _buildParameters?.MaxNodeCount,
activeNodeCount: _activeNodes?.Count);
}


/// <summary>
/// Convenience method. Submits a lone build request and blocks until results are available.
/// </summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -805,8 +805,6 @@ public async Task HandleBuildResultAsync(
BuildEventContext buildEventContext,
CancellationToken cancellationToken)
{
ErrorUtilities.VerifyThrowInternalNull(requestConfiguration.Project, nameof(requestConfiguration.Project));

if (_projectCachePlugins.IsEmpty)
{
return;
Expand All @@ -818,6 +816,10 @@ public async Task HandleBuildResultAsync(
requestConfiguration.RetrieveFromCache();
}

// Now we are sure the Project property is available, verify it's not null before proceeding.
// If it's null, it means the configuration is not properly loaded, which should not happen at this stage.
ErrorUtilities.VerifyThrowInternalNull(requestConfiguration.Project, nameof(requestConfiguration.Project));

// Filter to plugins which apply to the project, if any
List<ProjectCacheDescriptor> projectCacheDescriptors = GetProjectCacheDescriptors(requestConfiguration.Project).ToList();
if (projectCacheDescriptors.Count == 0)
Expand Down
35 changes: 35 additions & 0 deletions src/Framework/Telemetry/CrashTelemetry.cs
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,29 @@ internal class CrashTelemetry : TelemetryBase, IActivityTelemetryDataHolder
/// </summary>
public int? UnmatchedProjectStartedCount { get; set; }

// --- Build state diagnostic properties (help diagnose the context of the crash) ---

/// <summary>
/// Whether MSBuild is running standalone (CLI) or hosted (VS, etc.).
/// </summary>
public bool? IsStandaloneExecution { get; set; }

/// <summary>
/// Maximum number of build nodes configured (BuildParameters.MaxNodeCount).
/// Helps determine if out-of-proc nodes were in use.
/// </summary>
public int? MaxNodeCount { get; set; }

/// <summary>
/// Number of currently active build nodes at crash time.
/// </summary>
public int? ActiveNodeCount { get; set; }

/// <summary>
/// Number of active build submissions at crash time.
/// </summary>
public int? SubmissionCount { get; set; }

/// <summary>
/// The original exception, kept for passing to <c>FaultEvent</c>.
/// Not serialized to telemetry properties.
Expand Down Expand Up @@ -372,6 +395,12 @@ public Dictionary<string, object> GetActivityProperties()
AddIfNotNull(ThreadExceptionRecorded);
AddIfNotNull(UnmatchedProjectStartedCount);

// Build state diagnostic properties
AddIfNotNull(IsStandaloneExecution);
AddIfNotNull(MaxNodeCount);
AddIfNotNull(ActiveNodeCount);
AddIfNotNull(SubmissionCount);

return telemetryItems;

void AddIfNotNull(object? value, [CallerArgumentExpression(nameof(value))] string key = "")
Expand Down Expand Up @@ -422,6 +451,12 @@ public override IDictionary<string, string> GetProperties()
AddIfNotNull(ThreadExceptionRecorded?.ToString(), nameof(ThreadExceptionRecorded));
AddIfNotNull(UnmatchedProjectStartedCount?.ToString(), nameof(UnmatchedProjectStartedCount));

// Build state diagnostic properties
AddIfNotNull(IsStandaloneExecution?.ToString(), nameof(IsStandaloneExecution));
AddIfNotNull(MaxNodeCount?.ToString(), nameof(MaxNodeCount));
AddIfNotNull(ActiveNodeCount?.ToString(), nameof(ActiveNodeCount));
AddIfNotNull(SubmissionCount?.ToString(), nameof(SubmissionCount));

return properties;

void AddIfNotNull(string? value, [CallerArgumentExpression(nameof(value))] string key = "")
Expand Down
24 changes: 21 additions & 3 deletions src/Framework/Telemetry/CrashTelemetryRecorder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -31,21 +31,33 @@ internal static class CrashTelemetryRecorder
/// <param name="buildEngineVersion">MSBuild version string, if available.</param>
/// <param name="buildEngineFrameworkName">Framework name, if available.</param>
/// <param name="buildEngineHost">Host name (VS, VSCode, CLI, etc.), if available.</param>
/// <param name="isStandaloneExecution">True if MSBuild runs from command line, false if hosted.</param>
/// <param name="maxNodeCount">Maximum number of build nodes configured.</param>
/// <param name="activeNodeCount">Number of currently active build nodes at crash time.</param>
/// <param name="submissionCount">Number of active build submissions at crash time.</param>
public static void RecordCrashTelemetry(
Exception exception,
CrashExitType exitType,
bool isUnhandled,
bool isCritical,
string? buildEngineVersion = null,
string? buildEngineFrameworkName = null,
string? buildEngineHost = null)
string? buildEngineHost = null,
bool? isStandaloneExecution = null,
int? maxNodeCount = null,
int? activeNodeCount = null,
int? submissionCount = null)
{
try
{
CrashTelemetry crashTelemetry = CreateCrashTelemetry(exception, exitType, isUnhandled, isCritical);
crashTelemetry.BuildEngineVersion = buildEngineVersion;
crashTelemetry.BuildEngineFrameworkName = buildEngineFrameworkName;
crashTelemetry.BuildEngineHost = buildEngineHost;
crashTelemetry.IsStandaloneExecution = isStandaloneExecution;
crashTelemetry.MaxNodeCount = maxNodeCount;
crashTelemetry.ActiveNodeCount = activeNodeCount;
crashTelemetry.SubmissionCount = submissionCount;
KnownTelemetry.CrashTelemetry = crashTelemetry;
}
catch
Expand Down Expand Up @@ -195,7 +207,10 @@ public static void CollectAndEmitEndBuildHangDiagnostics(
int unmatchedProjectStartedCount,
string? buildEngineVersion,
string? buildEngineFrameworkName,
string? buildEngineHost)
string? buildEngineHost,
bool isStandaloneExecution,
int? maxNodeCount = null,
int? activeNodeCount = null)
{
try
{
Expand All @@ -211,9 +226,12 @@ public static void CollectAndEmitEndBuildHangDiagnostics(
SubmissionsWithResultNoLogging = submissionsWithResultNoLogging,
ThreadExceptionRecorded = threadExceptionRecorded,
UnmatchedProjectStartedCount = unmatchedProjectStartedCount,
IsStandaloneExecution = isStandaloneExecution,
MaxNodeCount = maxNodeCount,
ActiveNodeCount = activeNodeCount,
};

TelemetryManager.Instance?.Initialize(isStandalone: false);
TelemetryManager.Instance?.Initialize(isStandaloneExecution);

using IActivity? activity = TelemetryManager.Instance
?.DefaultActivitySource
Expand Down
16 changes: 15 additions & 1 deletion src/MSBuild/XMake.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1113,7 +1113,8 @@ private static void RecordCrashTelemetry(Exception exception, ExitType exitType,
ExceptionHandling.IsCriticalException(exception),
ProjectCollection.Version?.ToString(),
NativeMethodsShared.FrameworkName,
BuildEnvironmentState.GetHostName());
BuildEnvironmentState.GetHostName(),
isStandaloneExecution: !s_isNodeMode);
}

private static ExitType OutputPropertiesAfterEvaluation(string[] getProperty, string[] getItem, Project project, TextWriter outputStream)
Expand Down Expand Up @@ -2005,6 +2006,12 @@ internal static void SetConsoleUI()
/// </summary>
private static bool s_isServerNode;

/// <summary>
/// Indicates that this process was launched as a worker node (via -nodeMode switch).
/// Worker nodes are not standalone executions regardless of who spawned them.
/// </summary>
private static bool s_isNodeMode;

/// <summary>
/// Coordinates the processing of all detected switches. It gathers information necessary to invoke the build engine, and
/// performs deeper error checking on the switches and their parameters.
Expand Down Expand Up @@ -2130,6 +2137,13 @@ private static bool ProcessCommandLineSwitches(
}
else if (commandLineSwitches.IsParameterizedSwitchSet(CommandLineSwitches.ParameterizedSwitch.NodeMode))
{
s_isNodeMode = true;

// Worker nodes are not standalone executions. Override the flag that
// Main() set before we knew this was a worker node process, so that
// BuildManager crash/hang telemetry also reports correctly.
KnownTelemetry.PartialBuildTelemetry?.IsStandaloneExecution = false;

StartLocalNode(commandLineSwitches, lowPriority);
}
else
Expand Down
36 changes: 33 additions & 3 deletions src/Shared/Debugging/DebugUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@ namespace Microsoft.Build.Shared.Debugging
{
internal static class DebugUtils
{
#pragma warning disable CA1810 // Intentional: static constructor catches exceptions to prevent TypeInitializationException
static DebugUtils()
#pragma warning restore CA1810
{
try
{
Expand Down Expand Up @@ -43,6 +45,22 @@ static DebugUtils()
// Console may not be available.
}
}

// Initialize diagnostic fields inside the static constructor so failures
// are caught here rather than poisoning the type with an unrecoverable
// TypeInitializationException. On .NET Framework, EnvironmentUtilities
// accesses Process.GetCurrentProcess() which can throw Win32Exception
// in restricted environments or when performance counters are corrupted.
try
{
ProcessInfoString = GetProcessInfoString();
ShouldDebugCurrentProcess = CurrentProcessMatchesDebugName();
}
catch
{
ProcessInfoString ??= "Unknown";
ShouldDebugCurrentProcess = false;
}
}

// DebugUtils are initialized early on by the test runner - during preparing data for DataMemeberAttribute of some test,
Expand Down Expand Up @@ -94,10 +112,22 @@ private static bool CurrentProcessMatchesDebugName()
return thisProcessMatchesName;
}

public static readonly string ProcessInfoString =
$"{(ProcessNodeMode.Value?.ToString() ?? "CentralNode")}_{EnvironmentUtilities.ProcessName}_PID={EnvironmentUtilities.CurrentProcessId}_x{(Environment.Is64BitProcess ? "64" : "86")}";
/// <summary>
/// Builds a diagnostic string identifying this process (node mode, name, PID, bitness).
/// Must be called from the static constructor rather than as a field initializer because
/// on .NET Framework, <see cref="EnvironmentUtilities.ProcessName"/> and
/// <see cref="EnvironmentUtilities.CurrentProcessId"/> access
/// <c>Process.GetCurrentProcess()</c> which can throw <see cref="System.ComponentModel.Win32Exception"/>
/// in restricted environments or when performance counters are corrupted.
/// A field-initializer failure would produce an unrecoverable <see cref="TypeInitializationException"/>
/// that poisons the entire <see cref="DebugUtils"/> type, whereas the static constructor's
/// try/catch lets the type initialize successfully with a safe fallback value.
/// </summary>
private static string GetProcessInfoString() => $"{(ProcessNodeMode.Value?.ToString() ?? "CentralNode")}_{EnvironmentUtilities.ProcessName}_PID={EnvironmentUtilities.CurrentProcessId}_x{(Environment.Is64BitProcess ? "64" : "86")}";

public static readonly string ProcessInfoString;

public static readonly bool ShouldDebugCurrentProcess = CurrentProcessMatchesDebugName();
public static readonly bool ShouldDebugCurrentProcess;

public static string DebugPath { get; private set; }

Expand Down
27 changes: 0 additions & 27 deletions src/Shared/ExceptionHandling.cs
Original file line number Diff line number Diff line change
Expand Up @@ -422,33 +422,6 @@ internal static void DumpExceptionToFile(Exception ex)
}
}

/// <summary>
/// Writes hang diagnostic information to a file so it persists on disk
/// for later retrieval from customer machines.
/// File is written to the same directory as crash dump files (<see cref="DebugDumpPath"/>).
/// </summary>
internal static void DumpHangDiagnosticsToFile(string diagnostics)
{
try
{
Directory.CreateDirectory(DebugDumpPath);

var pid = EnvironmentUtilities.CurrentProcessId;
string fileName = Path.Combine(DebugDumpPath, $"MSBuild_pid-{pid}.hang.txt");

using (StreamWriter writer = FileUtilities.OpenWrite(fileName, append: true))
{
writer.WriteLine(DateTime.Now.ToString("G", CultureInfo.CurrentCulture));
writer.WriteLine(diagnostics);
writer.WriteLine("===================");
}
}
catch
{
// Best-effort: diagnostic file writing must never make things worse.
}
}

/// <summary>
/// Returns the content of any exception dump files modified
/// since the provided time, otherwise returns an empty string.
Expand Down
Loading