Skip to content

Commit 70d6903

Browse files
monojenkinsmarek-safar
authored andcommitted
[2019-08] [merp] Use a separate program as the hang supervisor. (#16900)
* [merp] Use a separate program as the hang supervisor. Fixes #15646 macOS does not like signals being sent from the forked supervisor process, possibly towards anywhere but definitely when sent to the parent process. The following message is currently spewed after the supervisor process attempting to send a SIGSEGV to a hanged Mono process: "The process has forked and you cannot use this CoreFoundation functionality safely. You MUST exec()." We follow that direction and introduce a new binary that, when available in the mono executable's binary directory, is used to abort the parent process for us. * Experiment: remove line possibly causing bad compilation under C++
1 parent 4bff2b6 commit 70d6903

File tree

9 files changed

+92
-23
lines changed

9 files changed

+92
-23
lines changed

configure.ac

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6079,6 +6079,7 @@ AC_CONFIG_FILES([po/mcs/Makefile.in])
60796079
AC_CONFIG_FILES([acceptance-tests/microbench-perf.sh],[chmod +x acceptance-tests/microbench-perf.sh])
60806080
AC_CONFIG_FILES([runtime/mono-wrapper],[chmod +x runtime/mono-wrapper])
60816081
AC_CONFIG_FILES([runtime/monodis-wrapper],[chmod +x runtime/monodis-wrapper])
6082+
AC_CONFIG_FILES([runtime/bin/mono-hang-watchdog],[chmod +x runtime/bin/mono-hang-watchdog])
60826083

60836084
AC_CONFIG_COMMANDS([runtime/etc/mono/1.0/machine.config],
60846085
[ depth=../../../..
@@ -6733,6 +6734,7 @@ tools/Makefile
67336734
tools/locale-builder/Makefile
67346735
tools/sgen/Makefile
67356736
tools/pedump/Makefile
6737+
tools/mono-hang-watchdog/Makefile
67366738
runtime/Makefile
67376739
msvc/Makefile
67386740
po/Makefile

mono/metadata/icall.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6380,7 +6380,7 @@ void
63806380
ves_icall_Mono_Runtime_EnableCrashReportingLog (const char *directory, MonoError *error)
63816381
{
63826382
#ifndef DISABLE_CRASH_REPORTING
6383-
mono_summarize_set_timeline_dir (directory);
6383+
mono_threads_summarize_init (directory);
63846384
#endif
63856385
}
63866386

mono/metadata/threads-types.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -540,6 +540,9 @@ typedef struct {
540540
MonoContext ctx_mem;
541541
} MonoThreadSummary;
542542

543+
void
544+
mono_threads_summarize_init (const char *timeline_dir);
545+
543546
gboolean
544547
mono_threads_summarize (MonoContext *ctx, gchar **out, MonoStackHash *hashes, gboolean silent, gboolean signal_handler_controller, gchar *mem, size_t provided_size);
545548

mono/metadata/threads.c

Lines changed: 18 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
#include <mono/metadata/exception-internals.h>
5959
#include <mono/utils/mono-state.h>
6060
#include <mono/metadata/w32subset.h>
61+
#include <mono/metadata/mono-config.h>
6162

6263
#ifdef HAVE_SYS_WAIT_H
6364
#include <sys/wait.h>
@@ -6164,7 +6165,6 @@ mono_threads_summarize_one (MonoThreadSummary *out, MonoContext *ctx)
61646165
return success;
61656166
}
61666167

6167-
#define TIMEOUT_CRASH_REPORTER_FATAL 30
61686168
#define MAX_NUM_THREADS 128
61696169
typedef struct {
61706170
gint32 has_owner; // state of this memory
@@ -6180,7 +6180,7 @@ typedef struct {
61806180
gboolean silent; // print to stdout
61816181
} SummarizerGlobalState;
61826182

6183-
#if defined(HAVE_KILL) && !defined(HOST_ANDROID) && defined(HAVE_WAITPID) && ((!defined(HOST_DARWIN) && defined(SYS_fork)) || HAVE_FORK)
6183+
#if defined(HAVE_KILL) && !defined(HOST_ANDROID) && defined(HAVE_WAITPID) && defined(HAVE_EXECVE) && ((!defined(HOST_DARWIN) && defined(SYS_fork)) || HAVE_FORK)
61846184
#define HAVE_MONO_SUMMARIZER_SUPERVISOR 1
61856185
#endif
61866186

@@ -6191,8 +6191,9 @@ typedef struct {
61916191
} SummarizerSupervisorState;
61926192

61936193
#ifndef HAVE_MONO_SUMMARIZER_SUPERVISOR
6194-
static void
6195-
summarizer_supervisor_wait (SummarizerSupervisorState *state)
6194+
6195+
void
6196+
mono_threads_summarize_init (const char *timeline_dir)
61966197
{
61976198
return;
61986199
}
@@ -6211,23 +6212,14 @@ summarizer_supervisor_end (SummarizerSupervisorState *state)
62116212
}
62126213

62136214
#else
6214-
static void
6215-
summarizer_supervisor_wait (SummarizerSupervisorState *state)
6216-
{
6217-
sleep (TIMEOUT_CRASH_REPORTER_FATAL);
6215+
static const char *hang_watchdog_path;
62186216

6219-
// If we haven't been SIGKILL'ed yet, we signal our parent
6220-
// and then exit
6221-
#ifdef HAVE_KILL
6222-
g_async_safe_printf("Crash Reporter has timed out, sending SIGSEGV\n");
6223-
kill (state->pid, SIGSEGV);
6224-
#else
6225-
g_error ("kill () is not supported by this platform");
6226-
#endif
6227-
6228-
exit (1);
6217+
void
6218+
mono_threads_summarize_init (const char *timeline_dir)
6219+
{
6220+
hang_watchdog_path = g_build_filename (mono_get_config_dir (), "..", "bin", "mono-hang-watchdog", NULL);
6221+
mono_summarize_set_timeline_dir (timeline_dir);
62296222
}
6230-
62316223
static pid_t
62326224
summarizer_supervisor_start (SummarizerSupervisorState *state)
62336225
{
@@ -6254,6 +6246,13 @@ summarizer_supervisor_start (SummarizerSupervisorState *state)
62546246

62556247
if (pid != 0)
62566248
state->supervisor_pid = pid;
6249+
else {
6250+
char pid_str[20]; // pid is a uint64_t, 20 digits max in decimal form
6251+
sprintf (pid_str, "%llu", (uint64_t)state->pid);
6252+
const char *const args[] = { hang_watchdog_path, pid_str, NULL };
6253+
execve (args[0], (char * const*)args, NULL); // run 'mono-hang-watchdog [pid]'
6254+
g_assert_not_reached ();
6255+
}
62576256

62586257
return pid;
62596258
}
@@ -6567,8 +6566,6 @@ mono_threads_summarize (MonoContext *ctx, gchar **out, MonoStackHash *hashes, gb
65676566
g_assert (mem);
65686567
success = mono_threads_summarize_execute_internal (ctx, out, hashes, silent, mem, provided_size, TRUE);
65696568
summarizer_supervisor_end (&synch);
6570-
} else {
6571-
summarizer_supervisor_wait (&synch);
65726569
}
65736570

65746571
if (!already_async)

mono/utils/mono-state.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <mono/utils/mono-state.h>
1616
#include <mono/utils/mono-threads-coop.h>
1717
#include <mono/metadata/object-internals.h>
18+
#include <mono/metadata/mono-config-dirs.h>
1819

1920
#include <sys/param.h>
2021
#include <fcntl.h>

runtime/bin/mono-hang-watchdog.in

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
#! /bin/sh
2+
r='@mono_build_root@'
3+
exec "$r/tools/mono-hang-watchdog/mono-hang-watchdog" "$@"

tools/Makefile.am

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
SUBDIRS = locale-builder sgen pedump
1+
SUBDIRS = locale-builder sgen pedump mono-hang-watchdog
22

33

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
2+
AM_CPPFLAGS = $(SHARED_CFLAGS)
3+
4+
if DISABLE_EXECUTABLES
5+
bin_PROGRAMS =
6+
else
7+
bin_PROGRAMS = mono-hang-watchdog
8+
endif
9+
10+
CFLAGS = $(filter-out @CXX_REMOVE_CFLAGS@, @CFLAGS@)
11+
12+
mono_hang_watchdog_SOURCES = mono-hang-watchdog.c
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
/* Given a external process' id as argument, the program waits for a set timeout then attempts to abort that process */
2+
/* Used by the Mono runtime's crash reporting. */
3+
4+
#include <stdlib.h>
5+
#include <stdio.h>
6+
#include <stdint.h>
7+
#include <errno.h>
8+
#include <unistd.h>
9+
10+
#include "config.h"
11+
12+
#ifdef HAVE_SIGNAL_H
13+
#include <signal.h>
14+
#endif
15+
16+
#define TIMEOUT 30
17+
18+
static char* program_name;
19+
void program_exit (int exit_code, const char* message);
20+
21+
int main (int argc, char* argv[])
22+
{
23+
program_name = argv [0];
24+
if (argc != 2)
25+
program_exit (1, "Please provide one argument (pid)");
26+
errno = 0;
27+
pid_t pid = (pid_t)strtoul (argv [1], NULL, 10);
28+
if (errno)
29+
program_exit (2, "Invalid pid");
30+
31+
sleep (TIMEOUT);
32+
33+
/* if we survived the timeout, we consider the Mono process as hung */
34+
35+
#ifndef HAVE_KILL
36+
/* just inform the user */
37+
printf ("Mono process with pid %llu appears to be hung", (uint64_t)pid);
38+
return 0;
39+
#else
40+
printf ("Mono process hang detected, sending kill signal to pid %llu\n", (uint64_t)pid);
41+
return kill (pid, SIGKILL);
42+
#endif
43+
}
44+
45+
void program_exit (int exit_code, const char* message)
46+
{
47+
if (message)
48+
printf ("%s\n", message);
49+
printf ("Usage: '%s [pid]'\t\t[pid]: The id for the Mono process\n", program_name);
50+
exit (exit_code);
51+
}

0 commit comments

Comments
 (0)