Skip to content

Commit 6bfd47a

Browse files
committed
fix(e2e): clean interrupted docker harness runs
1 parent 08ae0e6 commit 6bfd47a

3 files changed

Lines changed: 376 additions & 6 deletions

File tree

scripts/lib/docker-e2e-logs.sh

Lines changed: 63 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,68 @@ run_logged_print_heartbeat() {
4444
log_file="$(docker_e2e_run_log "$label")"
4545
"$@" >"$log_file" 2>&1 &
4646
local command_pid=$!
47+
local cleanup_done=0
48+
local previous_int_trap
49+
local previous_term_trap
50+
local previous_hup_trap
51+
previous_int_trap="$(trap -p INT || true)"
52+
previous_term_trap="$(trap -p TERM || true)"
53+
previous_hup_trap="$(trap -p HUP || true)"
54+
terminate_heartbeat_command() {
55+
kill -TERM "$command_pid" 2>/dev/null || true
56+
local grace_seconds="${OPENCLAW_DOCKER_E2E_HEARTBEAT_TERM_GRACE_SECONDS:-30}"
57+
if ! [[ "$grace_seconds" =~ ^[0-9]+$ ]] || [ "$grace_seconds" -lt 1 ]; then
58+
grace_seconds="30"
59+
else
60+
grace_seconds="$((10#$grace_seconds))"
61+
fi
62+
local wait_attempt
63+
for wait_attempt in $(seq 1 "$((grace_seconds * 10))"); do
64+
if ! kill -0 "$command_pid" 2>/dev/null; then
65+
return 0
66+
fi
67+
/bin/sleep 0.1
68+
done
69+
kill -KILL "$command_pid" 2>/dev/null || true
70+
}
71+
restore_heartbeat_traps() {
72+
if [ -n "$previous_int_trap" ]; then
73+
eval "$previous_int_trap"
74+
else
75+
trap - INT
76+
fi
77+
if [ -n "$previous_term_trap" ]; then
78+
eval "$previous_term_trap"
79+
else
80+
trap - TERM
81+
fi
82+
if [ -n "$previous_hup_trap" ]; then
83+
eval "$previous_hup_trap"
84+
else
85+
trap - HUP
86+
fi
87+
}
88+
cleanup_heartbeat_command() {
89+
local cleanup_status="${1:-$?}"
90+
if [ "$cleanup_done" = "1" ]; then
91+
return "$cleanup_status"
92+
fi
93+
cleanup_done=1
94+
trap - INT TERM HUP
95+
if kill -0 "$command_pid" 2>/dev/null; then
96+
terminate_heartbeat_command
97+
wait "$command_pid" 2>/dev/null || true
98+
fi
99+
rm -f "$log_file"
100+
restore_heartbeat_traps
101+
if [ "$cleanup_status" -ge 128 ]; then
102+
exit "$cleanup_status"
103+
fi
104+
return "$cleanup_status"
105+
}
106+
trap 'cleanup_heartbeat_command 130' INT
107+
trap 'cleanup_heartbeat_command 143' TERM
108+
trap 'cleanup_heartbeat_command 129' HUP
47109
local started_at="$SECONDS"
48110
local next_heartbeat=$interval_seconds
49111
local status=0
@@ -65,7 +127,7 @@ run_logged_print_heartbeat() {
65127
status=$?
66128
set -e
67129
docker_e2e_print_log "$log_file"
68-
rm -f "$log_file"
130+
cleanup_heartbeat_command 0
69131
return "$status"
70132
}
71133

scripts/lib/docker-e2e-package.sh

Lines changed: 107 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -224,13 +224,115 @@ docker_e2e_run_with_harness() {
224224
local run_status=0
225225
local cid_dir
226226
local cidfile
227+
local docker_run_pid=""
228+
local harness_stdin_fd=""
229+
local cleanup_done=0
230+
local previous_int_trap
231+
local previous_term_trap
232+
local previous_hup_trap
227233
cid_dir="$(mktemp -d "${TMPDIR:-/tmp}/openclaw-docker-e2e-container.XXXXXX")"
228234
cidfile="$cid_dir/container.cid"
229-
docker_e2e_docker_run_cmd run --rm --cidfile "$cidfile" "${DOCKER_E2E_HARNESS_ARGS[@]}" "$@" ||
230-
run_status="$?"
231-
docker_e2e_cleanup_container_cidfile "$cidfile"
232-
rmdir "$cid_dir" 2>/dev/null || true
233-
docker_e2e_cleanup_package_mount_args
235+
previous_int_trap="$(trap -p INT || true)"
236+
previous_term_trap="$(trap -p TERM || true)"
237+
previous_hup_trap="$(trap -p HUP || true)"
238+
restore_harness_traps() {
239+
if [ -n "$previous_int_trap" ]; then
240+
eval "$previous_int_trap"
241+
else
242+
trap - INT
243+
fi
244+
if [ -n "$previous_term_trap" ]; then
245+
eval "$previous_term_trap"
246+
else
247+
trap - TERM
248+
fi
249+
if [ -n "$previous_hup_trap" ]; then
250+
eval "$previous_hup_trap"
251+
else
252+
trap - HUP
253+
fi
254+
}
255+
docker_e2e_harness_descendant_pids() {
256+
local parent_pid="$1"
257+
local child_pid
258+
for child_pid in $(pgrep -P "$parent_pid" 2>/dev/null || true); do
259+
docker_e2e_harness_descendant_pids "$child_pid"
260+
printf '%s\n' "$child_pid"
261+
done
262+
}
263+
terminate_harness_docker_run() {
264+
[ -n "$docker_run_pid" ] || return 0
265+
kill -0 "$docker_run_pid" 2>/dev/null || return 0
266+
local descendant_pids
267+
descendant_pids="$(docker_e2e_harness_descendant_pids "$docker_run_pid")"
268+
if [ -n "$descendant_pids" ]; then
269+
kill -TERM $descendant_pids 2>/dev/null || true
270+
fi
271+
kill -TERM "$docker_run_pid" 2>/dev/null || true
272+
local grace_seconds="${OPENCLAW_DOCKER_E2E_CONTAINER_TERM_GRACE_SECONDS:-10}"
273+
if ! [[ "$grace_seconds" =~ ^[0-9]+$ ]] || [ "$grace_seconds" -lt 1 ]; then
274+
grace_seconds="10"
275+
else
276+
grace_seconds="$((10#$grace_seconds))"
277+
fi
278+
local wait_attempt
279+
for wait_attempt in $(seq 1 "$((grace_seconds * 10))"); do
280+
if ! kill -0 "$docker_run_pid" 2>/dev/null; then
281+
return 0
282+
fi
283+
/bin/sleep 0.1
284+
done
285+
descendant_pids="$(docker_e2e_harness_descendant_pids "$docker_run_pid")"
286+
if [ -n "$descendant_pids" ]; then
287+
kill -KILL $descendant_pids 2>/dev/null || true
288+
fi
289+
kill -KILL "$docker_run_pid" 2>/dev/null || true
290+
}
291+
cleanup_harness_run() {
292+
local cleanup_status="${1:-$?}"
293+
local exit_after_cleanup="${2:-0}"
294+
if [ "$cleanup_done" = "1" ]; then
295+
if [ "$exit_after_cleanup" = "1" ]; then
296+
exit "$cleanup_status"
297+
fi
298+
return "$cleanup_status"
299+
fi
300+
cleanup_done=1
301+
trap - INT TERM HUP
302+
terminate_harness_docker_run
303+
wait "$docker_run_pid" 2>/dev/null || true
304+
docker_e2e_cleanup_container_cidfile "$cidfile"
305+
rmdir "$cid_dir" 2>/dev/null || true
306+
docker_e2e_cleanup_package_mount_args
307+
if [ -n "$harness_stdin_fd" ]; then
308+
exec {harness_stdin_fd}<&-
309+
harness_stdin_fd=""
310+
fi
311+
restore_harness_traps
312+
if [ "$exit_after_cleanup" = "1" ]; then
313+
exit "$cleanup_status"
314+
fi
315+
return "$cleanup_status"
316+
}
317+
trap 'cleanup_harness_run 130 1' INT
318+
trap 'cleanup_harness_run 143 1' TERM
319+
trap 'cleanup_harness_run 129 1' HUP
320+
exec {harness_stdin_fd}<&0
321+
docker_e2e_docker_run_cmd run --rm --cidfile "$cidfile" "${DOCKER_E2E_HARNESS_ARGS[@]}" "$@" <&$harness_stdin_fd &
322+
docker_run_pid="$!"
323+
local had_errexit=0
324+
case "$-" in
325+
*e*)
326+
had_errexit=1
327+
;;
328+
esac
329+
set +e
330+
wait "$docker_run_pid"
331+
run_status="$?"
332+
if [ "$had_errexit" = "1" ]; then
333+
set -e
334+
fi
335+
cleanup_harness_run 0
234336
return "$run_status"
235337
}
236338

0 commit comments

Comments
 (0)