Aureliolo
diff --git a/‎.github/workflows/dependency-review.yml‎
Lines changed: 27 additions & 1 deletion b/‎.github/workflows/dependency-review.yml‎
Lines changed: 27 additions & 1 deletion
diff --git a/‎CLAUDE.md‎
Lines changed: 2 additions & 2 deletions b/‎CLAUDE.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/architecture/decisions.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/architecture/decisions.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/design/memory.md‎
Lines changed: 10 additions & 6 deletions b/‎docs/design/memory.md‎
Lines changed: 10 additions & 6 deletions
diff --git a/‎docs/reference/embedding-evaluation.md‎
Lines changed: 5 additions & 4 deletions b/‎docs/reference/embedding-evaluation.md‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎docs/roadmap/open-questions.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/roadmap/open-questions.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pyproject.toml‎
Lines changed: 3 additions & 0 deletions b/‎pyproject.toml‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎src/synthorg/api/controllers/__init__.py‎
Lines changed: 3 additions & 0 deletions b/‎src/synthorg/api/controllers/__init__.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎src/synthorg/api/controllers/memory.py‎
Lines changed: 155 additions & 0 deletions b/‎src/synthorg/api/controllers/memory.py‎
Lines changed: 155 additions & 0 deletions
diff --git a/‎src/synthorg/api/controllers/setup.py‎
Lines changed: 26 additions & 0 deletions b/‎src/synthorg/api/controllers/setup.py‎
Lines changed: 26 additions & 0 deletions
@@ -90,6 +90,12 @@ jobs:
           # only invoked during development and CI linting. GPL copyleft does
           # not apply to the project output. The action cannot distinguish
           # tool deps from runtime deps, so they need per-package exemptions.
+          #
+          # Fine-tune optional dep group (torch, sentence-transformers) and
+          # transitive CUDA/NVIDIA deps. Optional -- only installed via
+          # synthorg[fine-tune]. License metadata missing from PyPI for NVIDIA
+          # CUDA packages (proprietary, freely redistributable). torch is
+          # BSD-style. scikit-learn has compound BSD-3-Clause AND scancode tag.
           allow-dependencies-licenses: >-
             pkg:pypi/mem0ai@1.0.9,
             pkg:pypi/numpy@2.4.4,
@@ -113,5 +119,25 @@ jobs:
             pkg:golang/github.com/alfatraining/structtag@1.0.0,
             pkg:golang/github.com/fatih/structtag@1.2.0,
             pkg:npm/json-schema-typed@8.0.2,
-            pkg:npm/victory-vendor@37.3.6
+            pkg:npm/victory-vendor@37.3.6,
+            pkg:pypi/scikit-learn@1.8.0,
+            pkg:pypi/torch@2.11.0,
+            pkg:pypi/cuda-bindings@13.2.0,
+            pkg:pypi/cuda-pathfinder@1.5.0,
+            pkg:pypi/cuda-toolkit@13.0.2,
+            pkg:pypi/nvidia-cublas@13.1.0.3,
+            pkg:pypi/nvidia-cuda-cupti@13.0.85,
+            pkg:pypi/nvidia-cuda-nvrtc@13.0.88,
+            pkg:pypi/nvidia-cuda-runtime@13.0.96,
+            pkg:pypi/nvidia-cudnn-cu13@9.19.0.56,
+            pkg:pypi/nvidia-cufft@12.0.0.61,
+            pkg:pypi/nvidia-cufile@1.15.1.6,
+            pkg:pypi/nvidia-curand@10.4.0.35,
+            pkg:pypi/nvidia-cusolver@12.0.4.66,
+            pkg:pypi/nvidia-cusparse@12.6.3.3,
+            pkg:pypi/nvidia-cusparselt-cu13@0.8.0,
+            pkg:pypi/nvidia-nccl-cu13@2.28.9,
+            pkg:pypi/nvidia-nvjitlink@13.0.88,
+            pkg:pypi/nvidia-nvshmem-cu13@3.4.5,
+            pkg:pypi/nvidia-nvtx@13.0.85
           comment-summary-in-pr: always
@@ -88,7 +88,7 @@ curl http://localhost:3000/api/v1/health   # backend (via web proxy)
 
 ```text
 src/synthorg/
-  api/            # Litestar REST + WebSocket API, RFC 9457 errors, setup wizard, personality presets, auth/, guards (role-based access control), user management, auto-wiring, lifecycle, bootstrap (agent registry init from config), template packs (list + live-apply)
+  api/            # Litestar REST + WebSocket API, RFC 9457 errors, setup wizard, personality presets, auth/, guards (role-based access control), user management, auto-wiring, lifecycle, bootstrap (agent registry init from config), template packs (list + live-apply), memory admin (fine-tuning pipeline, embedder queries)
   backup/         # Backup/restore orchestrator, scheduler, retention, handlers/
   budget/         # Cost tracking, budget enforcement, quota degradation (including synchronous peek for routing-time selector hints), CFO optimization, trend analysis, budget forecasting, configurable currency formatting
   cli/            # Python CLI module (superseded by top-level cli/ Go binary)
@@ -97,7 +97,7 @@ src/synthorg/
   core/           # Shared domain models, base classes, resilience config
   engine/         # Orchestration, execution loops, task engine, coordination, checkpoint recovery, approval/review gates, stagnation detection, context budget, compaction, hybrid loop, workspace/ (git worktree isolation, merge orchestration, semantic conflict detection), workflow/ (Kanban board, Agile sprints, WIP limits, sprint lifecycle, velocity tracking, ceremony scheduling, strategies/ (pluggable scheduling strategies), velocity_calculators/ (pluggable velocity calculators))
   hr/             # Hiring, firing, onboarding, agent registry, performance tracking, activity timeline, activity event types, cost event redaction, career history, promotion/demotion
-  memory/         # Pluggable MemoryBackend, retrieval pipeline, org memory, consolidation
+  memory/         # Pluggable MemoryBackend, retrieval pipeline, org memory, consolidation, embedding/ (LMEB-ranked model selection, embedder config resolution, fine-tuning pipeline)
   persistence/    # Pluggable PersistenceBackend, SQLite, settings + user + artifact + project + preset repositories, artifact content storage (pluggable ArtifactStorageBackend, filesystem impl)
   observability/  # Structured logging, correlation tracking, redaction, third-party logger taming, log shipping (syslog, HTTP), compressed archival, events/
   providers/      # LLM provider abstraction, presets, model auto-discovery, capabilities, runtime CRUD (management/), provider families, discovery SSRF allowlist, health tracking, active health probing, routing/ (strategy-based model routing, multi-provider resolution with ModelCandidateSelector protocol, QuotaAwareSelector, CheapestSelector)
 
@@ -100,7 +100,7 @@ All significant design and architecture decisions, organized by domain. Each ent
 | MTEB | General passage retrieval | MTEB performance does not transfer to memory retrieval (Pearson: -0.115). Optimizing for MTEB may actively harm memory retrieval quality |
 | Manual evaluation | Custom retrieval benchmarks | Too expensive to maintain. LMEB provides a standardized, reproducible alternative |
 
-**Model selection:** Three deployment tiers recommended based on LMEB scores. See [Embedding Evaluation](../reference/embedding-evaluation.md) for the full analysis. Domain-specific fine-tuning (+10-27% improvement) documented as a planned configuration stub via `EmbeddingFineTuneConfig`; the Mem0 adapter does not yet consume this config at initialization.
+**Model selection:** Three deployment tiers recommended based on LMEB scores. See [Embedding Evaluation](../reference/embedding-evaluation.md) for the full analysis. Domain-specific fine-tuning (+10-27% improvement) configured via `EmbeddingFineTuneConfig`; when enabled, the Mem0 adapter uses the checkpoint path as the model identifier. The fine-tuning pipeline stages themselves raise `NotImplementedError` -- only the checkpoint lookup is wired (see #1001).
 
 ## Overarching Pattern
 
 
@@ -359,12 +359,12 @@ Key findings:
        lr=1e-5). Single GPU, 1-2 hours for ~500 documents
     4. **Deploy** -- save checkpoint; update `Mem0EmbedderConfig` to point to fine-tuned model
 
-    **Integration design (planned):** fine-tuning is an offline pipeline, not a runtime
-    operation. The optional `EmbeddingFineTuneConfig` (disabled by default) stores the
-    checkpoint path. In a future implementation, backend initialization will check for a
-    checkpoint and prefer the fine-tuned model when available, falling back to the base
-    model with a logged warning. The config is currently defined but not wired into the
-    Mem0 adapter initialization.
+    **Integration design:** fine-tuning is an offline pipeline triggered via
+    `POST /admin/memory/fine-tune` (see `MemoryAdminController`). The optional
+    `EmbeddingFineTuneConfig` (disabled by default) stores the checkpoint path. When
+    `enabled=True` and `checkpoint_path` is set, backend initialization uses the
+    checkpoint path as the model identifier passed to the Mem0 SDK. The embedding
+    provider must serve the fine-tuned model under this identifier.
 
     ```python
     class EmbeddingFineTuneConfig(BaseModel):
@@ -376,6 +376,10 @@ Key findings:
         training_data_dir: NotBlankStr | None = None
     ```
 
+    When `enabled=True`, both `checkpoint_path` and `base_model` are required
+    (enforced by model validation).  Path traversal (`..`) and Windows-style
+    paths are rejected to prevent container path escapes.
+
     A future `FineTuningPipeline` protocol would formalize the four stages:
 
     ```python
 
@@ -214,12 +214,13 @@ single GPU.
 
 Fine-tuning is an **offline pipeline**, not a runtime operation. The `EmbeddingFineTuneConfig`
 (see [Memory Design Spec](../design/memory.md#embedding-model-selection))
-stores the configuration. Planned initialization behavior (not yet implemented in the Mem0 adapter):
+stores the configuration. Initialization behavior in the Mem0 adapter:
 
-1. If `fine_tune.enabled` and checkpoint exists at `fine_tune.checkpoint_path`: use fine-tuned model
-2. If `fine_tune.enabled` but no checkpoint: log warning, use base model
-3. If `fine_tune.enabled` is `False` (default): use base model, no checkpoint check
+1. If `fine_tune.enabled` and `checkpoint_path` is set: the checkpoint path is used as the model
+   identifier passed to the Mem0 SDK (the embedding provider must serve the fine-tuned model)
+2. If `fine_tune.enabled` is `False` (default): the base model is used, no checkpoint check
 
+The pipeline is triggered via `POST /admin/memory/fine-tune` (see `MemoryAdminController`).
 This follows the project's pattern of disabled-by-default optional features
 (cf. `DualModeConfig` in consolidation).
 
 
@@ -25,7 +25,7 @@ Numbers are stable identifiers -- resolved questions are removed without renumbe
 | Cost explosion from agent loops | High | Budget hard stops, loop detection, max iterations per task. |
 | Agent quality degradation with cheap models | Medium | Quality gates, minimum model requirements per task type. |
 | Third-party library breaking changes | Medium | Pin versions, integration tests, abstraction layers. |
-| Memory retrieval quality | Medium | Mem0 selected as initial backend (see [Decision Log](../architecture/decisions.md)). LMEB evaluation ([arXiv:2603.12572](https://arxiv.org/abs/2603.12572)) shows MTEB scores do not predict memory retrieval quality (Spearman: -0.130). Embedding model selection should be guided by LMEB episodic + procedural scores. Optional domain fine-tuning (+10-27%) planned via an offline pipeline configured with `EmbeddingFineTuneConfig` (currently a stub; the Mem0 adapter does not yet use it). See [Embedding Evaluation](../reference/embedding-evaluation.md). |
+| Memory retrieval quality | Medium | Mem0 selected as initial backend (see [Decision Log](../architecture/decisions.md)). LMEB evaluation ([arXiv:2603.12572](https://arxiv.org/abs/2603.12572)) shows MTEB scores do not predict memory retrieval quality (Spearman: -0.130). Embedding model selection should be guided by LMEB episodic + procedural scores. Optional domain fine-tuning (+10-27%) via an offline pipeline configured with `EmbeddingFineTuneConfig`. Checkpoint lookup is wired into the Mem0 adapter; pipeline stages (data generation, hard negative mining, contrastive training) are not yet implemented (see #1001). See [Embedding Evaluation](../reference/embedding-evaluation.md). |
 | Agent personality inconsistency | Low | Strong system prompts, few-shot examples, personality tests. |
 | WebSocket scaling | Low | Start local, add Redis pub/sub when needed. |
 
 
@@ -34,6 +34,9 @@ dependencies = [
 requires = ["hatchling==1.29.0"]
 build-backend = "hatchling.build"
 
+[project.optional-dependencies]
+fine-tune = ["torch==2.11.0", "sentence-transformers==5.3.0"]
+
 [tool.hatch.version]
 path = "src/synthorg/__init__.py"
 
 
@@ -17,6 +17,7 @@
 from synthorg.api.controllers.departments import DepartmentController
 from synthorg.api.controllers.health import HealthController
 from synthorg.api.controllers.meetings import MeetingController
+from synthorg.api.controllers.memory import MemoryAdminController
 from synthorg.api.controllers.messages import MessageController
 from synthorg.api.controllers.personalities import (
     PersonalityPresetController,
@@ -57,6 +58,7 @@
     SetupPersonalityController,
     PersonalityPresetController,
     BackupController,
+    MemoryAdminController,
     TemplatePackController,
     UserController,
 )
@@ -79,6 +81,7 @@
     "DepartmentController",
     "HealthController",
     "MeetingController",
+    "MemoryAdminController",
     "MessageController",
     "PersonalityPresetController",
     "ProjectController",
 
@@ -0,0 +1,155 @@
+"""Memory admin controller -- fine-tuning and embedder endpoints.
+
+All endpoints require CEO or the internal SYSTEM role
+(used by the CLI for admin operations).
+"""
+
+from litestar import Controller, get, post
+from litestar.datastructures import State  # noqa: TC002
+from pydantic import BaseModel, ConfigDict, Field
+
+from synthorg.api.dto import ApiResponse
+from synthorg.api.guards import HumanRole, require_roles
+from synthorg.api.state import AppState  # noqa: TC001
+from synthorg.core.types import NotBlankStr  # noqa: TC001
+from synthorg.memory.embedding.fine_tune import FineTuneStage
+from synthorg.memory.embedding.fine_tune_models import (
+    FineTuneRequest,
+    FineTuneStatus,
+)
+from synthorg.observability import get_logger
+from synthorg.observability.events.memory import (
+    MEMORY_EMBEDDER_SETTINGS_READ_FAILED,
+    MEMORY_FINE_TUNE_REQUESTED,
+)
+
+logger = get_logger(__name__)
+
+
+class ActiveEmbedderResponse(BaseModel):
+    """Active embedder configuration read from settings."""
+
+    model_config = ConfigDict(frozen=True, allow_inf_nan=False)
+
+    provider: NotBlankStr | None = Field(
+        default=None,
+        description="Embedding provider name",
+    )
+    model: NotBlankStr | None = Field(
+        default=None,
+        description="Embedding model identifier",
+    )
+    dims: int | None = Field(
+        default=None,
+        ge=1,
+        description="Embedding vector dimensions",
+    )
+
+
+class MemoryAdminController(Controller):
+    """Admin endpoints for memory management.
+
+    Provides fine-tuning pipeline control and embedder configuration
+    queries.  All endpoints require CEO or SYSTEM role.
+    """
+
+    path = "/admin/memory"
+    tags = ("admin", "memory")
+    guards = [require_roles(HumanRole.CEO, HumanRole.SYSTEM)]  # noqa: RUF012
+
+    @post("/fine-tune")
+    async def start_fine_tune(
+        self,
+        state: State,  # noqa: ARG002
+        data: FineTuneRequest,
+    ) -> ApiResponse[FineTuneStatus]:
+        """Trigger a fine-tuning pipeline run.
+
+        Args:
+            state: Application state.
+            data: Fine-tuning request parameters.
+
+        Returns:
+            Current pipeline status.
+        """
+        logger.info(
+            MEMORY_FINE_TUNE_REQUESTED,
+            source_dir=data.source_dir,
+            base_model=data.base_model,
+        )
+        # Pipeline stages are not yet implemented -- return status
+        # indicating the pipeline is idle with a descriptive error.
+        # See issue #1001 for the implementation roadmap.
+        return ApiResponse(
+            data=FineTuneStatus(
+                stage=FineTuneStage.FAILED,
+                error=(
+                    "Fine-tuning pipeline stages are not yet "
+                    "implemented. Install synthorg[fine-tune] "
+                    "and check back in a future release."
+                ),
+            ),
+        )
+
+    @get("/fine-tune/status")
+    async def get_fine_tune_status(
+        self,
+        state: State,  # noqa: ARG002
+    ) -> ApiResponse[FineTuneStatus]:
+        """Get the current fine-tuning pipeline status.
+
+        Args:
+            state: Application state.
+
+        Returns:
+            Current pipeline status.
+        """
+        return ApiResponse(
+            data=FineTuneStatus(stage=FineTuneStage.IDLE),
+        )
+
+    @get("/embedder")
+    async def get_active_embedder(
+        self,
+        state: State,
+    ) -> ApiResponse[ActiveEmbedderResponse]:
+        """Get the active embedder configuration.
+
+        Args:
+            state: Application state.
+
+        Returns:
+            Active embedder provider, model, and dims.
+        """
+        app_state: AppState = state.app_state
+        result = ActiveEmbedderResponse()
+        if app_state.has_settings_service:
+            svc = app_state.settings_service
+            try:
+                provider_sv = await svc.get("memory", "embedder_provider")
+                model_sv = await svc.get("memory", "embedder_model")
+                dims_sv = await svc.get("memory", "embedder_dims")
+                dims_value: int | None = None
+                if dims_sv.value:
+                    try:
+                        dims_value = int(dims_sv.value)
+                    except ValueError, TypeError:
+                        logger.warning(
+                            MEMORY_EMBEDDER_SETTINGS_READ_FAILED,
+                            setting="embedder_dims",
+                            value=dims_sv.value,
+                            reason="invalid integer value",
+                        )
+                result = ActiveEmbedderResponse(
+                    provider=provider_sv.value or None,
+                    model=model_sv.value or None,
+                    dims=dims_value,
+                )
+            except MemoryError, RecursionError:
+                raise
+            except Exception:
+                logger.warning(
+                    MEMORY_EMBEDDER_SETTINGS_READ_FAILED,
+                    exc_info=True,
+                )
+        return ApiResponse(data=result)
@@ -27,6 +27,9 @@
 from synthorg.api.controllers.setup_helpers import (
     auto_create_template_agents as _auto_create_template_agents,
 )
+from synthorg.api.controllers.setup_helpers import (
+    auto_select_embedder,
+)
 from synthorg.api.controllers.setup_helpers import (
     check_has_agents as _check_has_agents,
 )
@@ -45,6 +48,9 @@
 from synthorg.api.controllers.setup_helpers import (
     check_setup_not_complete as _check_setup_not_complete,
 )
+from synthorg.api.controllers.setup_helpers import (
+    collect_model_ids as _collect_model_ids,
+)
 from synthorg.api.controllers.setup_helpers import (
     persist_company_settings as _persist_company_settings,
 )
@@ -96,6 +102,7 @@
     SETUP_AGENTS_AUTO_CREATED,
     SETUP_AGENTS_LISTED,
     SETUP_COMPANY_CREATED,
+    SETUP_COMPLETE_CHECK_ERROR,
     SETUP_COMPLETED,
     SETUP_NAME_LOCALES_LISTED,
     SETUP_NAME_LOCALES_SAVED,
@@ -745,6 +752,25 @@ async def complete_setup(
             logger.warning(SETUP_NO_PROVIDERS)
             raise ApiValidationError(msg)
 
+        # Auto-select embedding model from configured providers.
+        # Best-effort: does not block setup completion on failure.
+        # TODO(#1001): forward provider_preset_name and has_gpu from
+        # the setup context so tier inference uses real hardware info.
+        try:
+            model_ids = await _collect_model_ids(app_state)
+            await auto_select_embedder(
+                settings_svc=settings_svc,
+                available_model_ids=model_ids,
+            )
+        except MemoryError, RecursionError:
+            raise
+        except Exception:
+            logger.warning(
+                SETUP_COMPLETE_CHECK_ERROR,
+                check="auto_select_embedder",
+                exc_info=True,
+            )
+
         await settings_svc.set("api", "setup_complete", "true")
 
         logger.info(SETUP_COMPLETED)