Skip to content

Commit e2c2e41

Browse files
committed
fix(egress): v4 round — bridge bind on Linux, listener-role split, fallback gate, audit.log truth
Addresses GodsBoy's May 30 follow-up review on PR #30179. P0 — Linux sandboxes could not reach the proxy: - _default_http_listen now binds the docker bridge gateway on Linux (host.docker.internal resolves to the bridge gateway there; the old loopback-only bind was unreachable from containers). Loopback stays the default on Docker Desktop platforms; bridge-less Linux falls back to loopback with a warning. - Live-testing the fix against the real v0.39 binary surfaced a second latent bug the host-side E2E had masked: v0.39's http_listen does NOT terminate CONNECT — tunnel_listen does. HTTPS_PROXY traffic through http_listen got 400s from upstream. build_proxy_config now binds tunnel_listen (CONNECT/MITM) on tunnel_port and http_listen (plain-HTTP forwards) on tunnel_port+1; docker.py points HTTPS_PROXY at tunnel_port and HTTP_PROXY at tunnel_port+1. - Liveness probes (start_proxy poll loop, get_status) now probe the CONFIGURED bind host via _read_http_listen_from_config() instead of hardcoded loopback, which would have killed a healthy bridge-bound daemon as 'never came up'. P2 — allow_env_fallback dead on the partial-secret path: - The missing-secret branch in _build_proxy_subprocess_env now honors proxy.allow_env_fallback exactly as its own error message promises. - cmd_start refuses (or warns, with the fallback flag) when credential_source=bitwarden but secrets.bitwarden is disabled/missing — closing the silent-degrade-to-host-env hole. P2 — audit.log decoy on v0.39: - Wizard pre-create failure downgraded to a warning (file is non-load-bearing until the version bump); success line qualified as 'reserved'; user + dev docs stop telling operators to wire monitoring to the path today. P3 — metrics comment no longer claims 'hermes egress status' surfaces the ephemeral metrics port (it can't; :0 is random and unrecorded). Validation: 180 unit tests pass (9 new), gated E2E passes, and a live end-to-end run against the real binary verified CONNECT-MITM with Authorization swap on tunnel_listen, plain-HTTP swap on tunnel_port+1, 403 on non-allowlisted hosts, and bind-host-aware status probing. Docs gain a Linux firewall troubleshooting section (container→docker0 INPUT drops, e.g. ufw default-deny).
1 parent 42139c3 commit e2c2e41

8 files changed

Lines changed: 502 additions & 130 deletions

File tree

agent/proxy_sources/iron_proxy.py

Lines changed: 166 additions & 77 deletions
Large diffs are not rendered by default.

hermes_cli/proxy_cli.py

Lines changed: 42 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -351,17 +351,19 @@ def cmd_setup(args: argparse.Namespace) -> int:
351351
]
352352

353353
audit_log_path = ip._proxy_state_dir() / "audit.log"
354-
# Pre-create the audit log with 0o600 so iron-proxy inherits private
355-
# perms instead of letting the daemon create it under the default
356-
# umask (potentially world-readable). Raises on failure (planted
357-
# symlink, immutable parent, full disk) — the wizard must surface
358-
# that rather than print "✓" for a file the daemon will create
359-
# under a slacker umask.
354+
# Pre-create the audit log with 0o600. On the pinned v0.39 the
355+
# daemon does NOT write to this file (no ``log.audit_path`` field in
356+
# its config schema) — it's reserved for the v0.40+ upgrade where
357+
# per-request records start flowing. Because the file is
358+
# non-load-bearing today, a pre-create failure (immutable parent,
359+
# pre-existing foreign-owned file, full disk) is a WARNING, not a
360+
# setup abort.
361+
audit_log_ok = True
360362
try:
361363
ip.ensure_audit_log(audit_log_path)
362364
except RuntimeError as exc:
363-
console.print(f" [red]✗ {exc}[/red]")
364-
return 1
365+
audit_log_ok = False
366+
console.print(f" [yellow]⚠ {exc}[/yellow]")
365367

366368
# Allow operator override of the deny list via
367369
# ``proxy.upstream_deny_cidrs`` — but the default (None) gives a safe
@@ -381,7 +383,12 @@ def cmd_setup(args: argparse.Namespace) -> int:
381383
mappings_path = ip.write_mappings(mappings)
382384
console.print(f" [green]✓[/green] config: {cfg_path}")
383385
console.print(f" [green]✓[/green] mappings: {mappings_path}")
384-
console.print(f" [green]✓[/green] audit log: {audit_log_path}")
386+
if audit_log_ok:
387+
console.print(
388+
f" [green]✓[/green] audit log: {audit_log_path} "
389+
f"[dim](reserved — not written by iron-proxy v0.39; "
390+
f"per-request records land in iron-proxy.log)[/dim]"
391+
)
385392

386393
# ------------------------------------------------------------------ enable
387394
proxy_cfg["enabled"] = True
@@ -451,6 +458,32 @@ def cmd_start(args: argparse.Namespace) -> int:
451458
and bw_cfg is not None
452459
and bool(bw_cfg.get("enabled"))
453460
)
461+
# Silent-degrade guard: the operator explicitly chose
462+
# ``credential_source: bitwarden``, but secrets.bitwarden has since
463+
# been disabled or removed. Proceeding would quietly start on host
464+
# env — exactly the bug class the BW mode is meant to defeat. Refuse
465+
# unless the documented escape hatch is set.
466+
if credential_source == "bitwarden" and not refresh_bw:
467+
if bool(proxy_cfg.get("allow_env_fallback", False)):
468+
console.print(
469+
"[yellow]⚠ credential_source=bitwarden but "
470+
"secrets.bitwarden is disabled or missing — falling back "
471+
"to host-env secrets (allow_env_fallback=true). Rotated "
472+
"Bitwarden keys will NOT propagate.[/yellow]"
473+
)
474+
else:
475+
console.print(
476+
"[red]✗ Refusing to start: proxy.credential_source is "
477+
"'bitwarden' but secrets.bitwarden is disabled or "
478+
"missing.[/red]"
479+
)
480+
console.print(
481+
" Re-enable it (`secrets.bitwarden.enabled: true`), switch "
482+
"back to env credentials with `hermes egress setup "
483+
"--no-bitwarden`, or set `proxy.allow_env_fallback: true` "
484+
"to opt into the host-env fallback."
485+
)
486+
return 1
454487
# Pass the proxy-side allow_env_fallback opt-in through to
455488
# start_proxy. This is a deliberate, documented escape hatch: when
456489
# set, the daemon silently falls back to host env if BWS is

tests/test_iron_proxy.py

Lines changed: 156 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -212,8 +212,8 @@ def test_wizard_rendered_yaml_contains_deny_list(hermes_home, tmp_path):
212212

213213

214214
def test_default_bind_is_loopback_not_zero_zero(tmp_path):
215-
"""``http_listen`` must NOT be ``0.0.0.0:PORT`` or ``:PORT`` (latter is
216-
INADDR_ANY). Loopback only by default."""
215+
"""The sandbox-facing listeners must NOT be ``0.0.0.0:PORT`` or
216+
``:PORT`` (latter is INADDR_ANY)."""
217217

218218
cfg = ip.build_proxy_config(
219219
mappings=[_sample_mapping()],
@@ -222,12 +222,16 @@ def test_default_bind_is_loopback_not_zero_zero(tmp_path):
222222
tunnel_port=12345,
223223
http_listen=["127.0.0.1:12345"], # explicit so test is deterministic
224224
)
225-
primary = cfg["proxy"]["http_listen"]
226-
assert primary == "127.0.0.1:12345"
227-
# Sentinel: confirm we didn't accidentally serialize a bare-port form
228-
# like ":12345" (that's INADDR_ANY).
229-
assert not primary.startswith(":")
230-
assert "0.0.0.0" not in primary
225+
# tunnel_listen is the CONNECT/MITM listener sandboxes hit via
226+
# HTTPS_PROXY; http_listen is the plain-HTTP forward on port+1.
227+
assert cfg["proxy"]["tunnel_listen"] == "127.0.0.1:12345"
228+
assert cfg["proxy"]["http_listen"] == "127.0.0.1:12346"
229+
for key in ("tunnel_listen", "http_listen", "https_listen"):
230+
val = cfg["proxy"][key]
231+
# Sentinel: confirm we didn't accidentally serialize a bare-port
232+
# form like ":12345" (that's INADDR_ANY).
233+
assert not val.startswith(":")
234+
assert "0.0.0.0" not in val
231235
# iron-proxy v0.39 doesn't support http_listens (plural). We
232236
# deliberately do NOT emit that key — re-emitting it would cause
233237
# the daemon to fail YAML unmarshal at start time.
@@ -237,14 +241,14 @@ def test_default_bind_is_loopback_not_zero_zero(tmp_path):
237241
)
238242

239243

240-
def test_default_bind_uses_loopback_on_linux(tmp_path, monkeypatch):
244+
def test_default_bind_uses_docker_bridge_on_linux(tmp_path, monkeypatch):
241245
"""When http_listen isn't passed AND we're on Linux, the singular
242-
http_listen field is the loopback bind. iron-proxy v0.39 only
243-
supports one bind per daemon process — earlier versions of this
244-
code emitted a plural http_listens list with the docker bridge
245-
appended, but v0.39 rejects that key so we keep loopback only.
246-
Sandboxes still reach the daemon via host.docker.internal which
247-
Docker maps to the host gateway, so loopback-only is functional."""
246+
http_listen field is the DOCKER BRIDGE bind — not loopback.
247+
iron-proxy v0.39 only supports one bind per daemon process, and on
248+
Linux ``host.docker.internal:host-gateway`` resolves to the bridge
249+
gateway (172.17.0.1 by default), which a loopback-only daemon never
250+
answers. Sandboxes must be able to reach the proxy from the
251+
container's vantage point."""
248252

249253
monkeypatch.setattr(ip.platform, "system", lambda: "Linux")
250254
monkeypatch.setattr(ip, "_detect_docker_bridge_ip", lambda: "172.17.0.1")
@@ -254,12 +258,48 @@ def test_default_bind_uses_loopback_on_linux(tmp_path, monkeypatch):
254258
ca_key=tmp_path / "ca.key",
255259
tunnel_port=9090,
256260
)
257-
# Primary http_listen is loopback.
258-
assert cfg["proxy"]["http_listen"] == "127.0.0.1:9090"
261+
# The sandbox-facing CONNECT/MITM listener binds the bridge gateway —
262+
# reachable from containers via host.docker.internal. Plain-HTTP
263+
# forward listener rides on port+1, same host.
264+
assert cfg["proxy"]["tunnel_listen"] == "172.17.0.1:9090"
265+
assert cfg["proxy"]["http_listen"] == "172.17.0.1:9091"
259266
# No http_listens (plural) — v0.39 rejects that key.
260267
assert "http_listens" not in cfg["proxy"]
261268

262269

270+
def test_default_bind_falls_back_to_loopback_without_bridge(tmp_path, monkeypatch):
271+
"""On Linux without a detectable docker0 bridge (docker not
272+
installed / not running), fall back to loopback rather than
273+
refusing to bind."""
274+
275+
monkeypatch.setattr(ip.platform, "system", lambda: "Linux")
276+
monkeypatch.setattr(ip, "_detect_docker_bridge_ip", lambda: None)
277+
cfg = ip.build_proxy_config(
278+
mappings=[_sample_mapping()],
279+
ca_cert=tmp_path / "ca.crt",
280+
ca_key=tmp_path / "ca.key",
281+
tunnel_port=9090,
282+
)
283+
assert cfg["proxy"]["tunnel_listen"] == "127.0.0.1:9090"
284+
assert cfg["proxy"]["http_listen"] == "127.0.0.1:9091"
285+
286+
287+
def test_default_bind_is_loopback_on_macos(tmp_path, monkeypatch):
288+
"""On Docker Desktop platforms host.docker.internal routes to the
289+
host via VPNkit, so loopback is reachable from containers and is
290+
the least-exposed bind."""
291+
292+
monkeypatch.setattr(ip.platform, "system", lambda: "Darwin")
293+
cfg = ip.build_proxy_config(
294+
mappings=[_sample_mapping()],
295+
ca_cert=tmp_path / "ca.crt",
296+
ca_key=tmp_path / "ca.key",
297+
tunnel_port=9090,
298+
)
299+
assert cfg["proxy"]["tunnel_listen"] == "127.0.0.1:9090"
300+
assert cfg["proxy"]["http_listen"] == "127.0.0.1:9091"
301+
302+
263303
def test_metrics_listener_pinned_to_loopback_ephemeral(tmp_path):
264304
"""iron-proxy v0.39's default metrics_listen is ``:9090``, which
265305
collides with our default tunnel_port=9090. build_proxy_config MUST
@@ -1455,3 +1495,102 @@ def test_persisted_nonce_roundtrip(hermes_home, monkeypatch):
14551495
def test_persisted_nonce_returns_none_when_missing(hermes_home):
14561496
"""No nonce file → None, callers fall back to argv0 basename."""
14571497
assert ip._read_persisted_nonce() is None
1498+
1499+
1500+
# ---------------------------------------------------------------------------
1501+
# v4 round (GodsBoy follow-up): bind-host-aware liveness probes +
1502+
# allow_env_fallback on the partial-secret path
1503+
# ---------------------------------------------------------------------------
1504+
1505+
1506+
def test_read_http_listen_from_config_returns_host_and_port(hermes_home):
1507+
"""_read_http_listen_from_config must surface the BIND HOST, not just
1508+
the port — on Linux the daemon binds the docker bridge gateway and a
1509+
hardcoded loopback probe would report a healthy daemon as dead."""
1510+
1511+
state = ip._proxy_state_dir()
1512+
(state / "proxy.yaml").write_text(
1513+
"proxy:\n http_listen: 172.17.0.1:9090\n", encoding="utf-8"
1514+
)
1515+
assert ip._read_http_listen_from_config() == ("172.17.0.1", 9090)
1516+
assert ip._read_tunnel_port_from_config() == 9090
1517+
1518+
1519+
def test_read_http_listen_from_config_missing_file(hermes_home):
1520+
assert ip._read_http_listen_from_config() is None
1521+
1522+
1523+
def test_get_status_probes_configured_bind_host(hermes_home, monkeypatch):
1524+
"""get_status must probe the configured bind host (e.g. the docker
1525+
bridge IP), not loopback unconditionally."""
1526+
1527+
state = ip._proxy_state_dir()
1528+
(state / "proxy.yaml").write_text(
1529+
"proxy:\n http_listen: 172.17.0.1:9123\n", encoding="utf-8"
1530+
)
1531+
(state / "ca.crt").write_text("cert")
1532+
ip._write_pidfile_safely(ip._pidfile(), 99999)
1533+
monkeypatch.setattr(ip, "_pid_alive", lambda pid: True)
1534+
monkeypatch.setattr(ip, "find_iron_proxy", lambda **kw: None)
1535+
1536+
probed = {}
1537+
1538+
def fake_probe(host, port):
1539+
probed["host"] = host
1540+
probed["port"] = port
1541+
return True
1542+
1543+
monkeypatch.setattr(ip, "_port_listening", fake_probe)
1544+
status = ip.get_status()
1545+
assert probed == {"host": "172.17.0.1", "port": 9123}
1546+
assert status.listening is True
1547+
assert status.tunnel_port == 9123
1548+
1549+
1550+
def test_partial_bitwarden_secrets_honor_allow_env_fallback(
1551+
hermes_home, monkeypatch,
1552+
):
1553+
"""The missing-secret branch's own error message tells operators to
1554+
set proxy.allow_env_fallback — so the flag must actually work there
1555+
(previously only the empty-token branch honored it)."""
1556+
1557+
ip.write_mappings([_sample_mapping("OPENROUTER_API_KEY")])
1558+
monkeypatch.setenv("OPENROUTER_API_KEY", "sk-host-fallback")
1559+
1560+
import agent.secret_sources.bitwarden as bw
1561+
monkeypatch.setattr(
1562+
bw, "fetch_bitwarden_secrets", lambda **kw: ({}, []),
1563+
)
1564+
monkeypatch.setenv("BWS_ACCESS_TOKEN", "tok")
1565+
bw_cfg = {
1566+
"project_id": "proj",
1567+
"access_token_env": "BWS_ACCESS_TOKEN",
1568+
"allow_env_fallback": True,
1569+
}
1570+
1571+
env = ip._build_proxy_subprocess_env(
1572+
refresh_from_bitwarden=True, bitwarden_config=bw_cfg,
1573+
)
1574+
# Falls back to the host env value instead of raising.
1575+
assert env.get("OPENROUTER_API_KEY") == "sk-host-fallback"
1576+
1577+
1578+
def test_partial_bitwarden_secrets_raise_without_fallback(
1579+
hermes_home, monkeypatch,
1580+
):
1581+
"""Strict default: missing BWS secrets raise."""
1582+
1583+
ip.write_mappings([_sample_mapping("OPENROUTER_API_KEY")])
1584+
monkeypatch.setenv("OPENROUTER_API_KEY", "sk-host")
1585+
1586+
import agent.secret_sources.bitwarden as bw
1587+
monkeypatch.setattr(
1588+
bw, "fetch_bitwarden_secrets", lambda **kw: ({}, []),
1589+
)
1590+
monkeypatch.setenv("BWS_ACCESS_TOKEN", "tok")
1591+
bw_cfg = {"project_id": "proj", "access_token_env": "BWS_ACCESS_TOKEN"}
1592+
1593+
with pytest.raises(RuntimeError, match="did not return secrets"):
1594+
ip._build_proxy_subprocess_env(
1595+
refresh_from_bitwarden=True, bitwarden_config=bw_cfg,
1596+
)

tests/test_iron_proxy_cli.py

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -425,3 +425,88 @@ def test_setup_has_rotate_tokens_flag():
425425
assert args.rotate_tokens is False
426426
args = parser.parse_args(["setup", "--rotate-tokens"])
427427
assert args.rotate_tokens is True
428+
429+
430+
# ---------------------------------------------------------------------------
431+
# v4 round: credential_source=bitwarden with secrets.bitwarden disabled
432+
# must NOT silently degrade to host-env secrets
433+
# ---------------------------------------------------------------------------
434+
435+
436+
def test_cmd_start_refuses_when_bitwarden_mode_but_disabled(hermes_home, monkeypatch):
437+
"""config keeps credential_source: bitwarden but secrets.bitwarden.enabled
438+
later flips to false — cmd_start must refuse, not silently start on
439+
host env (the silent-degrade class strict mode is meant to close)."""
440+
441+
from hermes_cli.config import load_config, save_config
442+
cfg = load_config()
443+
cfg.setdefault("proxy", {})["enabled"] = True
444+
cfg["proxy"]["credential_source"] = "bitwarden"
445+
cfg["proxy"]["fail_on_uncovered_providers"] = False
446+
cfg.setdefault("secrets", {})["bitwarden"] = {"enabled": False}
447+
save_config(cfg)
448+
449+
def must_not_call(**kw):
450+
pytest.fail("start_proxy must not run when bitwarden mode is broken")
451+
monkeypatch.setattr(ip, "start_proxy", must_not_call)
452+
monkeypatch.setattr(ip, "discover_uncovered_providers", lambda **kw: [])
453+
monkeypatch.setattr(ip, "discover_blocked_providers", lambda **kw: [])
454+
455+
rc = proxy_cli.cmd_start(_args())
456+
assert rc == 1
457+
458+
459+
def test_cmd_start_bitwarden_disabled_proceeds_with_env_fallback(
460+
hermes_home, monkeypatch,
461+
):
462+
"""Same scenario but proxy.allow_env_fallback=true is the documented
463+
escape hatch — start proceeds (with a warning)."""
464+
465+
from hermes_cli.config import load_config, save_config
466+
cfg = load_config()
467+
cfg.setdefault("proxy", {})["enabled"] = True
468+
cfg["proxy"]["credential_source"] = "bitwarden"
469+
cfg["proxy"]["allow_env_fallback"] = True
470+
cfg["proxy"]["fail_on_uncovered_providers"] = False
471+
cfg.setdefault("secrets", {})["bitwarden"] = {"enabled": False}
472+
save_config(cfg)
473+
474+
captured: dict = {}
475+
476+
def fake_start_proxy(**kw):
477+
captured.update(kw)
478+
s = ip.ProxyStatus()
479+
s.pid = 4242
480+
s.listening = True
481+
return s
482+
483+
monkeypatch.setattr(ip, "start_proxy", fake_start_proxy)
484+
monkeypatch.setattr(ip, "discover_uncovered_providers", lambda **kw: [])
485+
monkeypatch.setattr(ip, "discover_blocked_providers", lambda **kw: [])
486+
487+
rc = proxy_cli.cmd_start(_args())
488+
assert rc == 0
489+
# BW refresh is off (bitwarden disabled), running on env secrets.
490+
assert captured.get("refresh_secrets_from_bitwarden") is False
491+
492+
493+
def test_cmd_setup_audit_log_failure_is_warning_not_abort(hermes_home, monkeypatch):
494+
"""On the pinned v0.39 the daemon never writes audit.log, so a
495+
pre-create failure must not abort the wizard."""
496+
497+
monkeypatch.setattr(ip, "find_iron_proxy", lambda **kw: hermes_home / "iron-proxy")
498+
monkeypatch.setattr(ip, "discover_provider_mappings", lambda **kw: [
499+
ip.TokenMapping(
500+
proxy_token="hermes-proxy-deadbeef",
501+
real_env_name="OPENROUTER_API_KEY",
502+
upstream_hosts=("openrouter.ai",),
503+
),
504+
])
505+
monkeypatch.setattr(ip, "discover_uncovered_providers", lambda **kw: [])
506+
507+
def boom(path):
508+
raise RuntimeError("could not pre-create audit log (synthetic)")
509+
monkeypatch.setattr(ip, "ensure_audit_log", boom)
510+
511+
rc = proxy_cli.cmd_setup(_args())
512+
assert rc == 0

0 commit comments

Comments
 (0)