Skip to content

Commit 32a41b1

Browse files
committed
feat(cli): mempalace cypher — read-only Cypher query CLI (#191)
Wraps the daemon's ``POST /cypher`` endpoint for arbitrary AGE Cypher queries. Composes with ``mempalace graph`` (snapshot) — this is the escape hatch when the pre-aggregated snapshot isn't enough. Read-only is enforced server-side: the daemon runs each query inside a ``READ ONLY`` postgres transaction, so write verbs (CREATE/MERGE/SET/DELETE/REMOVE) fail with SQLSTATE 25006 → HTTP 403. We trust the server enforcement instead of client-side blocklists, which would inevitably drift from the daemon's policy. The 403 is surfaced as a friendly hint with exit 2: "this endpoint is read-only; rewrite as MATCH / RETURN, or use the mempalace_kg_* MCP tools to mutate" Other failure modes match cmd_graph / cmd_list exactly: - DaemonError (network) → exit 1, stderr "palace daemon unreachable" - 401 / 404 / 503 → exit 1, same shape - inner-error envelope → exit 2 - empty positional QUERY → exit 2 without contacting daemon Output formats: ``table`` (aligned columns, default), ``json`` (pass-through daemon envelope), ``csv`` (pipe-friendly, no decoration). ``--graph`` defaults to ``mempalace_kg``; ``--limit`` is advisory — the daemon's statement_timeout (PR #228) is the real ceiling. Tests: 21 cases across 4 classes (flag propagation, formats, empty rows, daemon-down matrix). Mocks _post_cypher to keep CI hermetic — no daemon required. Slice of #191. Composes with cli-list-drawer-browser and the just-shipped cli-graph-kg-snapshot.
1 parent 48baeed commit 32a41b1

2 files changed

Lines changed: 670 additions & 0 deletions

File tree

mempalace/cli.py

Lines changed: 291 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2062,6 +2062,262 @@ def cmd_graph(args):
20622062
_print_graph_table(data)
20632063

20642064

2065+
# ── mempalace cypher (issue #191) ─────────────────────────────────────
2066+
#
2067+
# Read-only Cypher query CLI: wraps the daemon's ``POST /cypher``
2068+
# endpoint, which executes arbitrary Cypher against the AGE knowledge
2069+
# graph inside a ``READ ONLY`` postgres transaction. Write verbs
2070+
# (CREATE/MERGE/SET/DELETE/REMOVE) fail server-side with SQLSTATE 25006
2071+
# → HTTP 403. We trust the server enforcement instead of blocklisting
2072+
# client-side: simpler, can't drift from the daemon's policy, and the
2073+
# spec is explicit (see PR #228 for the statement_timeout side of the
2074+
# safety story).
2075+
#
2076+
# Composes with ``mempalace graph`` (pre-aggregated snapshot) — cypher
2077+
# is the arbitrary-walk escape hatch when the snapshot isn't enough.
2078+
2079+
2080+
_CYPHER_DEFAULT_GRAPH = "mempalace_kg"
2081+
2082+
2083+
def _post_cypher(body: dict) -> tuple[dict | None, int | None]:
2084+
"""POST to ``/cypher`` and classify the HTTP status.
2085+
2086+
Returns ``(data, status_code)`` where ``status_code`` is the HTTP
2087+
status on a non-2xx response and ``None`` on success. We classify
2088+
rather than just raising ``DaemonError`` because the spec needs to
2089+
distinguish 403 (read-only enforcement) from 401/404 (auth / older
2090+
daemon) so the CLI can emit a friendly "rewrite as MATCH/RETURN"
2091+
hint on write attempts. Network failures still raise ``DaemonError``.
2092+
"""
2093+
import urllib.error
2094+
import urllib.request
2095+
2096+
url = f"{_daemon_url()}/cypher"
2097+
headers = {"content-type": "application/json"}
2098+
api_key = os.environ.get("PALACE_API_KEY", "").strip()
2099+
if api_key:
2100+
headers["x-api-key"] = api_key
2101+
req = urllib.request.Request(
2102+
url,
2103+
data=json.dumps(body).encode("utf-8"),
2104+
headers=headers,
2105+
method="POST",
2106+
)
2107+
try:
2108+
with urllib.request.urlopen(req, timeout=_daemon_timeout()) as resp:
2109+
return json.loads(resp.read().decode("utf-8", errors="replace")), None
2110+
except urllib.error.HTTPError as e:
2111+
return None, e.code
2112+
except (urllib.error.URLError, ConnectionError, OSError) as e:
2113+
raise DaemonError(f"daemon unreachable at {_daemon_url()}: {e}") from e
2114+
2115+
2116+
def _resolve_cypher_format(args) -> str:
2117+
"""``--format`` wins, then ``--json`` shorthand, default ``table``.
2118+
2119+
Same precedence shape as ``_resolve_graph_format`` / ``_resolve_search_format``.
2120+
"""
2121+
fmt = getattr(args, "format", None)
2122+
if fmt:
2123+
return fmt
2124+
if getattr(args, "json", False):
2125+
return "json"
2126+
return "table"
2127+
2128+
2129+
def _extract_cypher_rows(data: dict) -> list[dict]:
2130+
"""Pull rows out of the daemon's /cypher envelope.
2131+
2132+
The daemon parses RETURN aliases out of the Cypher source and ships
2133+
back ``{"rows": [{...}, ...]}`` (plus optional metadata). Defensive
2134+
against future shape drift: accept top-level ``rows`` or ``data``.
2135+
"""
2136+
rows = data.get("rows")
2137+
if rows is None:
2138+
rows = data.get("data") or []
2139+
return rows if isinstance(rows, list) else []
2140+
2141+
2142+
def _print_cypher_table(rows: list[dict]) -> None:
2143+
"""Aligned-column table: one row per Cypher result row."""
2144+
if not rows:
2145+
print("\n No rows.\n")
2146+
return
2147+
2148+
# Stable column order: union of all keys, preserving first-seen order.
2149+
columns: list[str] = []
2150+
seen: set[str] = set()
2151+
for row in rows:
2152+
for key in row.keys():
2153+
if key not in seen:
2154+
columns.append(key)
2155+
seen.add(key)
2156+
2157+
def _cell(v) -> str:
2158+
if v is None:
2159+
return ""
2160+
if isinstance(v, (dict, list)):
2161+
return json.dumps(v, ensure_ascii=False)
2162+
return str(v)
2163+
2164+
widths = {c: len(c) for c in columns}
2165+
str_rows: list[dict] = []
2166+
for row in rows:
2167+
str_row = {c: _cell(row.get(c)) for c in columns}
2168+
for c in columns:
2169+
widths[c] = max(widths[c], len(str_row[c]))
2170+
str_rows.append(str_row)
2171+
2172+
header = " " + " ".join(c.ljust(widths[c]) for c in columns)
2173+
sep = " " + " ".join("─" * widths[c] for c in columns)
2174+
print()
2175+
print(header)
2176+
print(sep)
2177+
for r in str_rows:
2178+
print(" " + " ".join(r[c].ljust(widths[c]) for c in columns))
2179+
print(f"\n {len(rows)} row{'s' if len(rows) != 1 else ''}.\n")
2180+
2181+
2182+
def _print_cypher_csv(rows: list[dict]) -> None:
2183+
"""CSV to stdout — pipe-friendly, no header decoration."""
2184+
import csv
2185+
2186+
if not rows:
2187+
return
2188+
2189+
columns: list[str] = []
2190+
seen: set[str] = set()
2191+
for row in rows:
2192+
for key in row.keys():
2193+
if key not in seen:
2194+
columns.append(key)
2195+
seen.add(key)
2196+
2197+
writer = csv.DictWriter(sys.stdout, fieldnames=columns, extrasaction="ignore")
2198+
writer.writeheader()
2199+
for row in rows:
2200+
flat = {}
2201+
for c in columns:
2202+
v = row.get(c)
2203+
if isinstance(v, (dict, list)):
2204+
flat[c] = json.dumps(v, ensure_ascii=False)
2205+
else:
2206+
flat[c] = "" if v is None else v
2207+
writer.writerow(flat)
2208+
2209+
2210+
def cmd_cypher(args):
2211+
"""Run a read-only Cypher query against the AGE knowledge graph (issue #191).
2212+
2213+
Wraps the daemon's ``POST /cypher``, which executes inside a
2214+
``READ ONLY`` postgres transaction (write verbs fail with HTTP 403,
2215+
SQLSTATE 25006). Output formats: ``table`` (aligned columns),
2216+
``json`` (pass-through), ``csv`` (pipe-friendly). The optional
2217+
``--limit`` is advisory — the daemon's own statement_timeout is the
2218+
real ceiling.
2219+
2220+
Daemon unreachable → stderr error + exit 1; 403 read-only write
2221+
attempt → friendly hint + exit 2; inner-error payload → exit 2.
2222+
"""
2223+
fmt = _resolve_cypher_format(args)
2224+
want_json = fmt == "json"
2225+
2226+
query = getattr(args, "query", "")
2227+
if not query or not str(query).strip():
2228+
if want_json:
2229+
_emit_json({"error": "missing required positional QUERY", "source": "cli"})
2230+
else:
2231+
print("error: missing required positional QUERY", file=sys.stderr)
2232+
sys.exit(2)
2233+
2234+
graph = getattr(args, "graph", None) or _CYPHER_DEFAULT_GRAPH
2235+
body: dict = {"cypher": str(query), "graph": str(graph)}
2236+
2237+
try:
2238+
data, status = _post_cypher(body)
2239+
except DaemonError as e:
2240+
# Match cmd_graph / cmd_list daemon-down fallback. JSON callers
2241+
# get a structured error on stdout; humans get the standard
2242+
# "daemon unreachable" line on stderr.
2243+
if want_json:
2244+
_emit_json({"error": str(e), "source": "daemon"})
2245+
else:
2246+
print(
2247+
f"palace daemon unreachable at {_daemon_url()} — "
2248+
f"see mempalace status for diagnostics ({e})",
2249+
file=sys.stderr,
2250+
)
2251+
sys.exit(1)
2252+
2253+
if status == 403:
2254+
# Server-enforced read-only: SQLSTATE 25006 surfaces as HTTP 403.
2255+
# Don't dump traceback noise — give the operator a one-line hint
2256+
# that maps to the next action.
2257+
hint = (
2258+
"daemon /cypher returned 403 — this endpoint is read-only; "
2259+
"rewrite as MATCH / RETURN, or use the mempalace_kg_* MCP tools to mutate"
2260+
)
2261+
if want_json:
2262+
_emit_json({"error": hint, "source": "daemon", "status": 403})
2263+
else:
2264+
print(hint, file=sys.stderr)
2265+
sys.exit(2)
2266+
2267+
if status is not None:
2268+
# 401/404/503 etc — endpoint missing on an older daemon, auth
2269+
# mismatch, or non-postgres backend. Treat the same as
2270+
# unreachable so scripts get one failure shape.
2271+
if want_json:
2272+
_emit_json(
2273+
{
2274+
"error": f"daemon /cypher returned {status}",
2275+
"source": "daemon",
2276+
"status": status,
2277+
}
2278+
)
2279+
else:
2280+
print(
2281+
f"palace daemon unreachable at {_daemon_url()} — "
2282+
f"/cypher returned {status} (see mempalace status for diagnostics)",
2283+
file=sys.stderr,
2284+
)
2285+
sys.exit(1)
2286+
2287+
# Daemon may surface an inner error envelope — match cmd_graph's exit-2.
2288+
if data is not None and "error" in data and "rows" not in data and "data" not in data:
2289+
if want_json:
2290+
_emit_json(data)
2291+
else:
2292+
print(f"\n {data['error']}", file=sys.stderr)
2293+
sys.exit(2)
2294+
2295+
rows = _extract_cypher_rows(data or {})
2296+
2297+
if want_json:
2298+
# Stable top-level shape — pass through the daemon envelope so
2299+
# scripts can rely on it. Defaults make missing keys explicit.
2300+
out = {
2301+
"rows": rows,
2302+
"count": len(rows),
2303+
"graph": graph,
2304+
}
2305+
# Surface any extra metadata the daemon adds without crowding it
2306+
# into "rows" — e.g. elapsed_ms, warnings.
2307+
if isinstance(data, dict):
2308+
for k, v in data.items():
2309+
if k not in ("rows", "data", "count", "graph"):
2310+
out[k] = v
2311+
_emit_json(out)
2312+
return
2313+
2314+
if fmt == "csv":
2315+
_print_cypher_csv(rows)
2316+
return
2317+
2318+
_print_cypher_table(rows)
2319+
2320+
20652321
def cmd_wakeup(args):
20662322
"""Show L0 (identity) + L1 (essential story) — the wake-up context."""
20672323
from .layers import MemoryStack
@@ -3937,6 +4193,40 @@ def main():
39374193
),
39384194
)
39394195

4196+
# cypher — read-only Cypher query against the AGE knowledge graph
4197+
p_cypher = sub.add_parser(
4198+
"cypher",
4199+
help="Run a read-only Cypher query against the AGE knowledge graph",
4200+
)
4201+
p_cypher.add_argument(
4202+
"query",
4203+
help="Cypher query string (MATCH / RETURN; write verbs are server-rejected)",
4204+
)
4205+
p_cypher.add_argument(
4206+
"--graph",
4207+
default=_CYPHER_DEFAULT_GRAPH,
4208+
help=f"AGE graph name (default: {_CYPHER_DEFAULT_GRAPH})",
4209+
)
4210+
p_cypher.add_argument(
4211+
"--format",
4212+
choices=("table", "json", "csv"),
4213+
default=None,
4214+
help=(
4215+
"Output format: table (default, aligned columns), "
4216+
"json (pass-through daemon envelope; same as --json), "
4217+
"csv (pipe-friendly, no decoration)"
4218+
),
4219+
)
4220+
p_cypher.add_argument(
4221+
"--limit",
4222+
type=int,
4223+
default=None,
4224+
help=(
4225+
"Advisory cap. The daemon's statement_timeout (PR #228) is the real "
4226+
"ceiling — pass LIMIT in the query itself for a hard cutoff."
4227+
),
4228+
)
4229+
39404230
# compress
39414231
p_compress = sub.add_parser(
39424232
"compress", help="Compress drawers using AAAK Dialect (~30x reduction)"
@@ -4338,6 +4628,7 @@ def _nonneg_int(value: str) -> int:
43384628
"search": cmd_search,
43394629
"list": cmd_list,
43404630
"graph": cmd_graph,
4631+
"cypher": cmd_cypher,
43414632
"export": cmd_export,
43424633
"sweep": cmd_sweep,
43434634
"sync": cmd_sync,

0 commit comments

Comments
 (0)