Skip to content

Commit ff7187d

Browse files
committed
feat(kg-age): fill API gaps — add_entity, invalidate, query_entity, query_relationship, timeline, seed_from_entity_facts
Brings KnowledgeGraphAGE to API parity with the SQLite KnowledgeGraph. Previously only had: add_triple, query_triples, stats, clear. The 5 missing methods make AGE a drop-in replacement for the SQLite backend without requiring callsite changes. All methods mirror SQLite semantics exactly: - add_entity: MERGE pattern with type + properties (no ON CREATE SET in AGE — last-write-wins semantics, acceptable for write-through use) - invalidate: SET valid_to on every active matching triple, with inverted-interval guard - query_entity: outgoing/incoming/both direction filter + as_of temporal filter - query_relationship: filter by predicate + as_of - timeline: chronological ORDER BY, default limit 100 - seed_from_entity_facts: bulk-load from ENTITY_FACTS dict - _entity_id: name → canonical id helper, matches SQLite KG derivation All built on the existing _run_cypher infrastructure with the inlined- parameter approach that works around AGE's prepared-statement incompatibility. AGE Cypher dialect gaps respected: - No ON CREATE SET → unconditional SET on MERGE - No multi-column RETURN inside cypher() — works here because each method's RETURN uses AS-aliased columns that _extract_return_aliases parses - No list literals — not used anywhere in these implementations Smoke-tested end-to-end against sme_lme_bench: - add 3 triples (Atakan -[works_on]-> adaptmem/mempalace-PRs, FT-300 -[trained_by]-> Atakan) - query_entity('Atakan', 'outgoing') → 2 results, both current=True - query_entity('Atakan', 'incoming') → 1 result (FT-300 trained_by) - invalidate('Atakan', 'works_on', 'mempalace-PRs') → 1 affected, then re-query shows valid_to=2026-05-17, current=False - timeline() → 3 rows ordered by valid_from (NULL at end) - stats → entities=4, triples=3, current_facts=2, expired_facts=1, relationship_types=['trained_by', 'works_on'] Phase 1 of the larger AGE-integration plan toward "agent walks the palace" via real Cypher traversal.
1 parent d8f81c2 commit ff7187d

1 file changed

Lines changed: 288 additions & 0 deletions

File tree

mempalace/knowledge_graph_age.py

Lines changed: 288 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,294 @@ def query_triples(
253253
for r in rows
254254
]
255255

256+
def add_entity(self, name: str, entity_type: str = "unknown", properties: Optional[dict] = None) -> str:
257+
"""Add or update an entity node.
258+
259+
Mirrors ``KnowledgeGraph.add_entity`` in the SQLite backend. MERGE
260+
creates the node if absent, and sets ``type``/``properties`` on
261+
creation only — AGE doesn't support ``ON CREATE SET``, so the
262+
property setting happens via ``MATCH ... SET`` in a follow-up
263+
Cypher call to keep semantics close to the SQLite ``INSERT OR
264+
REPLACE``.
265+
266+
Returns the entity id (``name.lower().replace(' ', '_')``) for
267+
SQLite-callsite source compatibility.
268+
"""
269+
name = sanitize_kg_value(name, "name")
270+
eid = self._entity_id(name)
271+
props_json = json.dumps(properties or {})
272+
# AGE's MERGE-without-ON-CREATE-SET means we always set type/props.
273+
# That diverges slightly from SQLite's "REPLACE if exists" behavior:
274+
# any concurrent writer's type would also be overwritten. For the
275+
# write-through use case (extractor populating new entities) that's
276+
# the right behavior; for the unusual case where two writers race
277+
# on the same entity name, last-write-wins is acceptable.
278+
self._run_cypher(
279+
"""
280+
MERGE (e:Entity {name: $name})
281+
SET e.type = $type, e.properties = $props
282+
""",
283+
{"name": name, "type": entity_type, "props": props_json},
284+
)
285+
return eid
286+
287+
@staticmethod
288+
def _entity_id(name: str) -> str:
289+
"""Mirror SQLite KG's id derivation so cross-backend callers see
290+
the same id for the same entity name."""
291+
return name.lower().replace(" ", "_").replace("'", "")
292+
293+
def invalidate(self, subject: str, predicate: str, obj: str, ended: Optional[str] = None) -> int:
294+
"""Mark active triples matching (subject, predicate, object) as expired.
295+
296+
Sets ``valid_to`` to ``ended`` (or today if None) on every RELATION
297+
whose ``valid_to`` is currently NULL. Mirrors SQLite KG's
298+
``invalidate`` exactly.
299+
300+
Returns the number of triples affected.
301+
302+
Inverted-interval check: if the resulting ``valid_to`` would precede
303+
an existing ``valid_from`` on any affected triple, raises ValueError
304+
before any write happens.
305+
"""
306+
subject = sanitize_kg_value(subject, "subject")
307+
predicate = sanitize_kg_value(predicate, "predicate")
308+
obj = sanitize_kg_value(obj, "object")
309+
if ended is None:
310+
from datetime import date as _date
311+
ended = _date.today().isoformat()
312+
ended = sanitize_iso_temporal(ended, "ended")
313+
314+
# Inverted-interval guard: read current valid_from values first.
315+
rows = self._run_cypher(
316+
"""
317+
MATCH (s:Entity {name: $subj})-[r:RELATION]->(o:Entity {name: $obj})
318+
WHERE r.relation_type = $pred AND r.valid_to IS NULL
319+
RETURN r.valid_from AS valid_from
320+
""",
321+
{"subj": subject, "obj": obj, "pred": predicate},
322+
fetch=True,
323+
)
324+
for row in rows:
325+
vf = self._unwrap_agtype(row[0])
326+
if vf is not None and ended < vf:
327+
raise ValueError(
328+
f"valid_to={ended!r} is before valid_from={vf!r}; "
329+
"an inverted interval would be invisible to every KG query"
330+
)
331+
332+
# Apply the invalidation. SET-on-MATCH is the supported AGE form.
333+
self._run_cypher(
334+
"""
335+
MATCH (s:Entity {name: $subj})-[r:RELATION]->(o:Entity {name: $obj})
336+
WHERE r.relation_type = $pred AND r.valid_to IS NULL
337+
SET r.valid_to = $ended
338+
""",
339+
{"subj": subject, "obj": obj, "pred": predicate, "ended": ended},
340+
)
341+
return len(rows)
342+
343+
def query_entity(
344+
self,
345+
name: str,
346+
as_of: Optional[str] = None,
347+
direction: str = "both",
348+
) -> list:
349+
"""Return all triples touching ``name`` (entity name, not id).
350+
351+
Mirrors ``KnowledgeGraph.query_entity``:
352+
353+
- ``direction``: "outgoing" (entity → ?), "incoming" (? → entity), "both"
354+
- ``as_of``: only return facts whose interval covers this date
355+
356+
Each result dict has: ``direction``, ``subject``, ``predicate``,
357+
``object``, ``valid_from``, ``valid_to``, ``confidence``,
358+
``source_closet`` (None on AGE — not yet plumbed), ``current``.
359+
"""
360+
name = sanitize_kg_value(name, "name")
361+
results = []
362+
363+
if as_of is not None:
364+
as_of = sanitize_iso_temporal(as_of, "as_of")
365+
# Build temporal WHERE fragment if as_of given.
366+
temporal_where = ""
367+
temporal_params: dict = {}
368+
if as_of:
369+
temporal_where = (
370+
" AND (r.valid_from IS NULL OR r.valid_from <= $as_of)"
371+
" AND (r.valid_to IS NULL OR r.valid_to >= $as_of)"
372+
)
373+
temporal_params["as_of"] = as_of
374+
375+
if direction in ("outgoing", "both"):
376+
rows = self._run_cypher(
377+
f"""
378+
MATCH (s:Entity)-[r:RELATION]->(o:Entity)
379+
WHERE s.name = $name {temporal_where}
380+
RETURN s.name AS subject, r.relation_type AS predicate,
381+
o.name AS object,
382+
r.valid_from AS valid_from, r.valid_to AS valid_to,
383+
r.confidence AS confidence, r.source AS source
384+
""",
385+
{"name": name, **temporal_params},
386+
fetch=True,
387+
)
388+
for r in rows:
389+
vt = self._unwrap_agtype(r[4])
390+
results.append({
391+
"direction": "outgoing",
392+
"subject": self._unwrap_agtype(r[0]),
393+
"predicate": self._unwrap_agtype(r[1]),
394+
"object": self._unwrap_agtype(r[2]),
395+
"valid_from": self._unwrap_agtype(r[3]),
396+
"valid_to": vt,
397+
"confidence": self._unwrap_agtype(r[5]),
398+
"source_closet": self._unwrap_agtype(r[6]),
399+
"current": vt is None,
400+
})
401+
402+
if direction in ("incoming", "both"):
403+
rows = self._run_cypher(
404+
f"""
405+
MATCH (s:Entity)-[r:RELATION]->(o:Entity)
406+
WHERE o.name = $name {temporal_where}
407+
RETURN s.name AS subject, r.relation_type AS predicate,
408+
o.name AS object,
409+
r.valid_from AS valid_from, r.valid_to AS valid_to,
410+
r.confidence AS confidence, r.source AS source
411+
""",
412+
{"name": name, **temporal_params},
413+
fetch=True,
414+
)
415+
for r in rows:
416+
vt = self._unwrap_agtype(r[4])
417+
results.append({
418+
"direction": "incoming",
419+
"subject": self._unwrap_agtype(r[0]),
420+
"predicate": self._unwrap_agtype(r[1]),
421+
"object": self._unwrap_agtype(r[2]),
422+
"valid_from": self._unwrap_agtype(r[3]),
423+
"valid_to": vt,
424+
"confidence": self._unwrap_agtype(r[5]),
425+
"source_closet": self._unwrap_agtype(r[6]),
426+
"current": vt is None,
427+
})
428+
429+
return results
430+
431+
def query_relationship(self, predicate: str, as_of: Optional[str] = None) -> list:
432+
"""Return all triples with the given relation type.
433+
434+
Mirrors SQLite ``KnowledgeGraph.query_relationship``.
435+
"""
436+
predicate = sanitize_kg_value(predicate, "predicate")
437+
if as_of is not None:
438+
as_of = sanitize_iso_temporal(as_of, "as_of")
439+
440+
temporal_where = ""
441+
params = {"pred": predicate}
442+
if as_of:
443+
temporal_where = (
444+
" AND (r.valid_from IS NULL OR r.valid_from <= $as_of)"
445+
" AND (r.valid_to IS NULL OR r.valid_to >= $as_of)"
446+
)
447+
params["as_of"] = as_of
448+
449+
rows = self._run_cypher(
450+
f"""
451+
MATCH (s:Entity)-[r:RELATION]->(o:Entity)
452+
WHERE r.relation_type = $pred {temporal_where}
453+
RETURN s.name AS subject, r.relation_type AS predicate,
454+
o.name AS object,
455+
r.valid_from AS valid_from, r.valid_to AS valid_to
456+
""",
457+
params,
458+
fetch=True,
459+
)
460+
return [
461+
{
462+
"subject": self._unwrap_agtype(r[0]),
463+
"predicate": self._unwrap_agtype(r[1]),
464+
"object": self._unwrap_agtype(r[2]),
465+
"valid_from": self._unwrap_agtype(r[3]),
466+
"valid_to": self._unwrap_agtype(r[4]),
467+
"current": self._unwrap_agtype(r[4]) is None,
468+
}
469+
for r in rows
470+
]
471+
472+
def timeline(self, entity_name: Optional[str] = None, limit: int = 100) -> list:
473+
"""Return triples in chronological order, optionally filtered by entity.
474+
475+
Mirrors SQLite ``KnowledgeGraph.timeline``. Limit defaults to 100
476+
for parity. AGE ``ORDER BY ... LIMIT`` works inside cypher() so no
477+
workaround needed.
478+
"""
479+
if entity_name is not None:
480+
entity_name = sanitize_kg_value(entity_name, "entity_name")
481+
rows = self._run_cypher(
482+
"""
483+
MATCH (s:Entity)-[r:RELATION]->(o:Entity)
484+
WHERE s.name = $name OR o.name = $name
485+
RETURN s.name AS subject, r.relation_type AS predicate,
486+
o.name AS object,
487+
r.valid_from AS valid_from, r.valid_to AS valid_to
488+
ORDER BY r.valid_from
489+
LIMIT $limit
490+
""",
491+
{"name": entity_name, "limit": limit},
492+
fetch=True,
493+
)
494+
else:
495+
rows = self._run_cypher(
496+
"""
497+
MATCH (s:Entity)-[r:RELATION]->(o:Entity)
498+
RETURN s.name AS subject, r.relation_type AS predicate,
499+
o.name AS object,
500+
r.valid_from AS valid_from, r.valid_to AS valid_to
501+
ORDER BY r.valid_from
502+
LIMIT $limit
503+
""",
504+
{"limit": limit},
505+
fetch=True,
506+
)
507+
return [
508+
{
509+
"subject": self._unwrap_agtype(r[0]),
510+
"predicate": self._unwrap_agtype(r[1]),
511+
"object": self._unwrap_agtype(r[2]),
512+
"valid_from": self._unwrap_agtype(r[3]),
513+
"valid_to": self._unwrap_agtype(r[4]),
514+
"current": self._unwrap_agtype(r[4]) is None,
515+
}
516+
for r in rows
517+
]
518+
519+
def seed_from_entity_facts(self, entity_facts: dict) -> int:
520+
"""Seed the graph from fact_checker.py ENTITY_FACTS dict.
521+
522+
Mirrors SQLite ``KnowledgeGraph.seed_from_entity_facts``. ENTITY_FACTS
523+
is a dict of {entity_name: {fact_label: value, ...}} — each
524+
non-empty value becomes a (entity_name, fact_label, value) triple
525+
with no temporal bounds and confidence 1.0.
526+
527+
Returns the number of triples written.
528+
"""
529+
n = 0
530+
for entity, facts in (entity_facts or {}).items():
531+
if not isinstance(facts, dict):
532+
continue
533+
for label, value in facts.items():
534+
if value is None or value == "":
535+
continue
536+
self.add_triple(
537+
subject=entity,
538+
relation_type=label,
539+
object_=str(value),
540+
)
541+
n += 1
542+
return n
543+
256544
def stats(self) -> dict:
257545
"""Return aggregate counts mirroring the SQLite KG's ``stats()`` shape.
258546

0 commit comments

Comments
 (0)