Skip to content

Commit 9a1facf

Browse files
jpheinclaude
andcommitted
feat: filesystem + conversation source adapters (RFC 002 §9, #63)
Create FilesystemSourceAdapter wrapping miner.scan_project()/chunk_text()/ detect_room() and ConversationSourceAdapter wrapping convo_miner.scan_convos()/ chunk_exchanges()/detect_convo_room(). Both registered as entry points. Existing miner.mine() and convo_miner.mine_convos() remain callable — adapters provide an alternative entry point via the plugin contract. 25 new tests across test_sources_filesystem.py and test_sources_conversations.py. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent eb77c8c commit 9a1facf

5 files changed

Lines changed: 692 additions & 0 deletions

File tree

mempalace/sources/conversations.py

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
"""Conversation source adapter (RFC 002 §9).
2+
3+
Thin wrapper around ``mempalace.convo_miner`` — the existing conversation
4+
mining pipeline. Delegates session scanning, exchange chunking, and room
5+
detection to convo_miner internals so adapter users get the same behavior
6+
as ``mempalace mine --mode convos`` without coupling to function signatures.
7+
8+
This adapter does NOT replace ``convo_miner.mine_convos()``; it provides
9+
an alternative entry point via the adapter plugin contract.
10+
"""
11+
12+
from __future__ import annotations
13+
14+
import logging
15+
import os
16+
from pathlib import Path
17+
from typing import Iterator, Optional
18+
19+
from .base import (
20+
AdapterSchema,
21+
BaseSourceAdapter,
22+
DrawerRecord,
23+
FieldSpec,
24+
RouteHint,
25+
SourceItemMetadata,
26+
SourceNotFoundError,
27+
SourceRef,
28+
SourceSummary,
29+
)
30+
from .context import PalaceContext
31+
32+
logger = logging.getLogger(__name__)
33+
34+
35+
class ConversationSourceAdapter(BaseSourceAdapter):
36+
"""Ingest AI conversation transcripts from a local directory.
37+
38+
Wraps ``convo_miner.scan_convos()``, ``convo_miner.chunk_exchanges()``,
39+
and ``convo_miner.detect_convo_room()`` so the full conversation mining
40+
pipeline is available via the adapter contract.
41+
"""
42+
43+
name = "conversations"
44+
adapter_version = "1.0.0"
45+
capabilities = frozenset({"supports_incremental"})
46+
supported_modes = frozenset({"chunked_content"})
47+
declared_transformations = frozenset({"chunk_exchanges"})
48+
49+
def ingest(
50+
self,
51+
*,
52+
source: SourceRef,
53+
palace: PalaceContext,
54+
) -> Iterator[DrawerRecord]:
55+
from ..convo_miner import chunk_exchanges, detect_convo_room, scan_convos
56+
57+
convo_dir = source.local_path
58+
if not convo_dir:
59+
raise SourceNotFoundError("ConversationSourceAdapter requires source.local_path")
60+
61+
convo_path = Path(convo_dir).expanduser().resolve()
62+
if not convo_path.is_dir():
63+
raise SourceNotFoundError(f"Not a directory: {convo_path}")
64+
65+
wing = source.options.get("wing", "conversations")
66+
agent = source.options.get("agent", "mempalace")
67+
68+
session_files = scan_convos(str(convo_path))
69+
70+
limit = source.options.get("limit", 0)
71+
if limit > 0:
72+
session_files = session_files[:limit]
73+
74+
for session_file in session_files:
75+
source_file = str(session_file)
76+
77+
try:
78+
stat = Path(session_file).stat()
79+
version = f"mtime:{stat.st_mtime:.0f}:size:{stat.st_size}"
80+
except OSError:
81+
continue
82+
83+
yield SourceItemMetadata(
84+
source_file=source_file,
85+
version=version,
86+
)
87+
88+
try:
89+
content = Path(session_file).read_text(encoding="utf-8", errors="replace")
90+
except (OSError, UnicodeDecodeError):
91+
logger.debug("Skipping unreadable session: %s", session_file)
92+
continue
93+
94+
if not content.strip():
95+
continue
96+
97+
room = detect_convo_room(content)
98+
chunks = chunk_exchanges(content)
99+
100+
for i, chunk in enumerate(chunks):
101+
chunk_content = chunk if isinstance(chunk, str) else chunk.get("content", str(chunk))
102+
yield DrawerRecord(
103+
content=chunk_content,
104+
source_file=source_file,
105+
chunk_index=i,
106+
route_hint=RouteHint(wing=wing, room=room),
107+
)
108+
109+
def is_current(
110+
self,
111+
*,
112+
item: SourceItemMetadata,
113+
existing_metadata: Optional[dict],
114+
) -> bool:
115+
if existing_metadata is None:
116+
return False
117+
try:
118+
current_mtime = os.path.getmtime(item.source_file)
119+
except OSError:
120+
return False
121+
existing_mtime = existing_metadata.get("source_mtime")
122+
if existing_mtime is None:
123+
return False
124+
return abs(current_mtime - existing_mtime) < 0.01
125+
126+
def describe_schema(self) -> AdapterSchema:
127+
return AdapterSchema(
128+
fields={
129+
"source_file": FieldSpec(
130+
type="string",
131+
required=True,
132+
description="Path to the conversation session file",
133+
indexed=True,
134+
),
135+
"extract_mode": FieldSpec(
136+
type="string",
137+
required=False,
138+
description="Extraction mode used for this session",
139+
),
140+
},
141+
version=self.adapter_version,
142+
)
143+
144+
def source_summary(self, *, source: SourceRef) -> SourceSummary:
145+
from ..convo_miner import scan_convos
146+
147+
if not source.local_path:
148+
return SourceSummary(description="conversations (no path)")
149+
try:
150+
sessions = scan_convos(source.local_path)
151+
return SourceSummary(
152+
description=f"conversations: {source.local_path}",
153+
item_count=len(sessions),
154+
)
155+
except Exception:
156+
return SourceSummary(description=f"conversations: {source.local_path}")

mempalace/sources/filesystem.py

Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
"""Filesystem source adapter (RFC 002 §9).
2+
3+
Thin wrapper around ``mempalace.miner`` — the existing filesystem mining
4+
pipeline. Delegates scanning, chunking, room detection, and metadata
5+
construction to miner internals so adapter users get the same behavior as
6+
``mempalace mine`` without coupling to the miner's function signatures.
7+
8+
This adapter does NOT replace ``miner.mine()``; it provides an alternative
9+
entry point via the adapter plugin contract. Both paths coexist.
10+
"""
11+
12+
from __future__ import annotations
13+
14+
import logging
15+
import os
16+
from pathlib import Path
17+
from typing import Iterator, Optional
18+
19+
from .base import (
20+
AdapterSchema,
21+
BaseSourceAdapter,
22+
DrawerRecord,
23+
FieldSpec,
24+
RouteHint,
25+
SourceItemMetadata,
26+
SourceNotFoundError,
27+
SourceRef,
28+
SourceSummary,
29+
)
30+
from .context import PalaceContext
31+
32+
logger = logging.getLogger(__name__)
33+
34+
35+
class FilesystemSourceAdapter(BaseSourceAdapter):
36+
"""Ingest project files from a local directory tree.
37+
38+
Wraps ``miner.scan_project()``, ``miner.chunk_text()``, and
39+
``miner.detect_room()`` so the full filesystem mining pipeline is
40+
available via the adapter contract.
41+
"""
42+
43+
name = "filesystem"
44+
adapter_version = "1.0.0"
45+
capabilities = frozenset({"supports_incremental"})
46+
supported_modes = frozenset({"chunked_content"})
47+
declared_transformations = frozenset({"chunk_text"})
48+
49+
def ingest(
50+
self,
51+
*,
52+
source: SourceRef,
53+
palace: PalaceContext,
54+
) -> Iterator[DrawerRecord]:
55+
from ..config import MempalaceConfig
56+
from ..miner import (
57+
_build_drawer_metadata,
58+
_extract_content_date,
59+
chunk_text,
60+
detect_room,
61+
load_config,
62+
scan_project,
63+
)
64+
65+
project_dir = source.local_path
66+
if not project_dir:
67+
raise SourceNotFoundError("FilesystemSourceAdapter requires source.local_path")
68+
69+
project_path = Path(project_dir).expanduser().resolve()
70+
if not project_path.is_dir():
71+
raise SourceNotFoundError(f"Not a directory: {project_path}")
72+
73+
config = load_config(str(project_path))
74+
palace_config = MempalaceConfig()
75+
76+
wing = source.options.get("wing") or config.get("wing", project_path.name)
77+
rooms = config.get("rooms", [{"name": "general", "description": "All project files"}])
78+
agent = source.options.get("agent", "mempalace")
79+
respect_gitignore = source.options.get("respect_gitignore", True)
80+
include_ignored = source.options.get("include_ignored")
81+
82+
files = scan_project(
83+
str(project_path),
84+
respect_gitignore=respect_gitignore,
85+
include_ignored=include_ignored,
86+
)
87+
88+
limit = source.options.get("limit", 0)
89+
if limit > 0:
90+
files = files[:limit]
91+
92+
cfg_chunk_size = palace_config.chunk_size
93+
cfg_chunk_overlap = palace_config.chunk_overlap
94+
cfg_min_chunk_size = palace_config.min_chunk_size
95+
96+
for filepath in files:
97+
source_file = str(filepath)
98+
rel_path = filepath.relative_to(project_path).as_posix()
99+
100+
try:
101+
stat = filepath.stat()
102+
version = f"mtime:{stat.st_mtime:.0f}:size:{stat.st_size}"
103+
except OSError:
104+
continue
105+
106+
yield SourceItemMetadata(
107+
source_file=source_file,
108+
version=version,
109+
)
110+
111+
try:
112+
content = filepath.read_text(encoding="utf-8", errors="replace")
113+
except (OSError, UnicodeDecodeError):
114+
logger.debug("Skipping unreadable file: %s", rel_path)
115+
continue
116+
117+
if not content.strip():
118+
continue
119+
120+
room = detect_room(filepath, content, rooms, project_path)
121+
room_resolver = palace_config.resolve_room if palace_config.room_aliases else None
122+
if room_resolver:
123+
room = room_resolver(room)
124+
125+
chunks = chunk_text(
126+
content,
127+
source_file,
128+
chunk_size=cfg_chunk_size,
129+
chunk_overlap=cfg_chunk_overlap,
130+
min_chunk_size=cfg_min_chunk_size,
131+
)
132+
133+
for chunk in chunks:
134+
yield DrawerRecord(
135+
content=chunk["content"],
136+
source_file=source_file,
137+
chunk_index=chunk["chunk_index"],
138+
metadata={
139+
"line_start": chunk.get("line_start"),
140+
"line_end": chunk.get("line_end"),
141+
},
142+
route_hint=RouteHint(wing=wing, room=room),
143+
)
144+
145+
def is_current(
146+
self,
147+
*,
148+
item: SourceItemMetadata,
149+
existing_metadata: Optional[dict],
150+
) -> bool:
151+
if existing_metadata is None:
152+
return False
153+
try:
154+
current_mtime = os.path.getmtime(item.source_file)
155+
except OSError:
156+
return False
157+
existing_mtime = existing_metadata.get("source_mtime")
158+
if existing_mtime is None:
159+
return False
160+
return abs(current_mtime - existing_mtime) < 0.01
161+
162+
def describe_schema(self) -> AdapterSchema:
163+
return AdapterSchema(
164+
fields={
165+
"source_file": FieldSpec(
166+
type="string",
167+
required=True,
168+
description="Absolute path to the source file",
169+
indexed=True,
170+
),
171+
"line_start": FieldSpec(
172+
type="int",
173+
required=False,
174+
description="1-indexed start line in source (Tier 6a)",
175+
),
176+
"line_end": FieldSpec(
177+
type="int",
178+
required=False,
179+
description="1-indexed end line in source (Tier 6a)",
180+
),
181+
},
182+
version=self.adapter_version,
183+
)
184+
185+
def source_summary(self, *, source: SourceRef) -> SourceSummary:
186+
from ..miner import scan_project
187+
188+
if not source.local_path:
189+
return SourceSummary(description="filesystem (no path)")
190+
try:
191+
files = scan_project(source.local_path)
192+
return SourceSummary(
193+
description=f"filesystem: {source.local_path}",
194+
item_count=len(files),
195+
)
196+
except Exception:
197+
return SourceSummary(description=f"filesystem: {source.local_path}")

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@ postgres = "mempalace.backends.postgres:PostgresBackend"
5757
# (``mempalace-source-cursor``, ``mempalace-source-git``, …) also register
5858
# under this group.
5959
[project.entry-points."mempalace.sources"]
60+
filesystem = "mempalace.sources.filesystem:FilesystemSourceAdapter"
61+
conversations = "mempalace.sources.conversations:ConversationSourceAdapter"
6062
opencode = "mempalace.sources.opencode:OpenCodeSourceAdapter"
6163

6264
[project.optional-dependencies]

0 commit comments

Comments
 (0)