add model loading status, effects preset dropdown, clean up UI

jamiepine · jamiepine · commit 3187344f019c · 2026-03-16T02:58:41.000-07:00
Backend:
- Generation service reports 'loading_model' status only when model
  is not yet in memory, then 'generating' once inference starts
- Migrate hf_offline_patch.py from print() to logging module
- Update ADDING_TTS_ENGINES.md for post-refactor file paths

Frontend:
- HistoryTable shows 'Loading model...' vs 'Generating...' based on step
- FloatingGenerateBox: replace instruct toggle + inline effects editor
  with an effects preset dropdown (third dropdown after language and engine)
- Instruct UI removed for now (form field preserved for future models)
- Remove focus ring from Select component globally
diff --git a/app/src/components/Generation/FloatingGenerateBox.tsx b/app/src/components/Generation/FloatingGenerateBox.tsx
diff --git a/app/src/components/History/HistoryTable.tsx b/app/src/components/History/HistoryTable.tsx
@@ -394,7 +394,8 @@ export function HistoryTable() {
           >
             {history.map((gen) => {
               const isCurrentlyPlaying = currentAudioId === gen.id && isPlaying;
-              const isGenerating = gen.status === 'generating';
+              const isInProgress = gen.status === 'loading_model' || gen.status === 'generating';
+              const isGenerating = isInProgress;
               const isFailed = gen.status === 'failed';
               const isPlayable = !isGenerating && !isFailed;
               const hasVersions = gen.versions && gen.versions.length > 1;
@@ -472,8 +473,10 @@ export function HistoryTable() {
                         ) : null}
                       </div>
                       <div className="text-xs text-muted-foreground">
-                        {isGenerating ? (
-                          <span className="text-accent">Generating...</span>
+                        {isInProgress ? (
+                          <span className="text-accent">
+                            {gen.status === 'loading_model' ? 'Loading model...' : 'Generating...'}
+                          </span>
                         ) : (
                           formatDate(gen.created_at)
                         )}
diff --git a/app/src/components/ui/select.tsx b/app/src/components/ui/select.tsx
@@ -16,7 +16,7 @@ const SelectTrigger = React.forwardRef<
   <SelectPrimitive.Trigger
     ref={ref}
     className={cn(
-      'flex h-10 w-full items-center justify-between rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background placeholder:text-muted-foreground focus:outline-none focus:ring-2 focus:ring-ring focus:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50 [&>span]:line-clamp-1',
+      'flex h-10 w-full items-center justify-between rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background placeholder:text-muted-foreground focus:outline-none disabled:cursor-not-allowed disabled:opacity-50 [&>span]:line-clamp-1',
       className,
     )}
     {...props}
diff --git a/app/src/lib/api/types.ts b/app/src/lib/api/types.ts
@@ -73,7 +73,7 @@ export interface GenerationResponse {
   instruct?: string;
   engine?: string;
   model_size?: string;
-  status: 'generating' | 'completed' | 'failed';
+  status: 'loading_model' | 'generating' | 'completed' | 'failed';
   error?: string;
   is_favorited?: boolean;
   created_at: string;
diff --git a/app/src/lib/hooks/useGenerationProgress.ts b/app/src/lib/hooks/useGenerationProgress.ts
@@ -8,7 +8,7 @@ import { useServerStore } from '@/stores/serverStore';
 
 interface GenerationStatusEvent {
   id: string;
-  status: 'generating' | 'completed' | 'failed' | 'not_found';
+  status: 'loading_model' | 'generating' | 'completed' | 'failed' | 'not_found';
   duration?: number;
   error?: string;
 }
diff --git a/backend/services/generation.py b/backend/services/generation.py
@@ -55,20 +55,21 @@ async def run_generation(
     bg_db = next(get_db())
 
     try:
-        # --- Load model --------------------------------------------------
-        await load_engine_model(engine, model_size)
-
         tts_model = get_tts_backend_for_engine(engine)
 
-        # --- Build voice prompt ------------------------------------------
+        if not tts_model.is_loaded():
+            await history.update_generation_status(generation_id, "loading_model", bg_db)
+
+        await load_engine_model(engine, model_size)
+
         voice_prompt = await profiles.create_voice_prompt_for_profile(
             profile_id,
             bg_db,
             use_cache=True,
             engine=engine,
         )
 
-        # --- Inference ---------------------------------------------------
+        await history.update_generation_status(generation_id, "generating", bg_db)
         trim_fn = trim_tts_output if engine_needs_trim(engine) else None
 
         gen_kwargs: dict = dict(
diff --git a/backend/utils/hf_offline_patch.py b/backend/utils/hf_offline_patch.py
@@ -1,100 +1,88 @@
-"""
-Monkey patch for huggingface_hub to force offline mode with cached models.
-This prevents mlx_audio from making network requests when models are already downloaded.
+"""Monkey-patch huggingface_hub to force offline mode with cached models.
+
+Prevents mlx_audio from making network requests when models are already
+downloaded. Must be imported BEFORE mlx_audio.
 """
 
+import logging
 import os
 from pathlib import Path
 from typing import Optional, Union
 
+logger = logging.getLogger(__name__)
+
 
 def patch_huggingface_hub_offline():
-    """
-    Monkey-patch huggingface_hub to force offline mode.
-    This must be called BEFORE importing mlx_audio.
-    """
+    """Monkey-patch huggingface_hub to force offline mode."""
     try:
-        import huggingface_hub
+        import huggingface_hub  # noqa: F401 -- need the package loaded
         from huggingface_hub import constants as hf_constants
         from huggingface_hub.file_download import _try_to_load_from_cache
-        
-        # Store original function
+
         original_try_load = _try_to_load_from_cache
-        
+
         def _patched_try_to_load_from_cache(
             repo_id: str,
             filename: str,
             cache_dir: Union[str, Path, None] = None,
             revision: Optional[str] = None,
             repo_type: Optional[str] = None,
         ):
-            """
-            Patched version that forces offline mode.
-            Returns None if not cached (instead of making network request).
-            """
-            # Always use the original function, but we're already in HF_HUB_OFFLINE mode
             result = original_try_load(
                 repo_id=repo_id,
                 filename=filename,
                 cache_dir=cache_dir,
                 revision=revision,
                 repo_type=repo_type,
             )
-            
+
             if result is None:
-                # File not in cache - log this for debugging
                 cache_path = Path(hf_constants.HF_HUB_CACHE) / f"models--{repo_id.replace('/', '--')}"
-                print(f"[HF_PATCH] File not cached: {repo_id}/{filename}")
-                print(f"[HF_PATCH] Expected at: {cache_path}")
+                logger.debug("file not cached: %s/%s (expected at %s)", repo_id, filename, cache_path)
             else:
-                print(f"[HF_PATCH] Cache hit: {repo_id}/{filename}")
-            
+                logger.debug("cache hit: %s/%s", repo_id, filename)
+
             return result
-        
-        # Replace the function
+
         import huggingface_hub.file_download as fd
+
         fd._try_to_load_from_cache = _patched_try_to_load_from_cache
-        
-        print("[HF_PATCH] huggingface_hub patched for offline mode")
-        
+        logger.debug("huggingface_hub patched for offline mode")
+
     except ImportError:
-        print("[HF_PATCH] huggingface_hub not found, skipping patch")
-    except Exception as e:
-        print(f"[HF_PATCH] Error patching huggingface_hub: {e}")
+        logger.debug("huggingface_hub not available, skipping offline patch")
+    except Exception:
+        logger.exception("failed to patch huggingface_hub for offline mode")
 
 
 def ensure_original_qwen_config_cached():
+    """Symlink the original Qwen repo cache to the MLX community version.
+
+    mlx_audio may try to fetch config from the original Qwen repo. If only
+    the MLX community variant is cached, create a symlink so the cache lookup
+    succeeds without a network request.
     """
-    The MLX community model is based on the original Qwen model.
-    mlx_audio may try to fetch config from the original repo.
-    We need to ensure that config is available in the cache.
-    """
-    from huggingface_hub import constants as hf_constants
-    
-    # Original Qwen model that mlx_audio might reference
+    try:
+        from huggingface_hub import constants as hf_constants
+    except ImportError:
+        return
+
     original_repo = "Qwen/Qwen3-TTS-12Hz-1.7B-Base"
     mlx_repo = "mlx-community/Qwen3-TTS-12Hz-1.7B-Base-bf16"
-    
+
     cache_dir = Path(hf_constants.HF_HUB_CACHE)
-    
     original_path = cache_dir / f"models--{original_repo.replace('/', '--')}"
     mlx_path = cache_dir / f"models--{mlx_repo.replace('/', '--')}"
-    
-    # If original repo cache doesn't exist but MLX does, create a symlink or copy config
+
     if not original_path.exists() and mlx_path.exists():
-        print(f"[HF_PATCH] Original repo not cached, but MLX version is")
-        print(f"[HF_PATCH] Creating symlink from {original_repo} -> {mlx_repo}")
-        
         try:
-            # Create a symlink so the cache lookup succeeds
             original_path.parent.mkdir(parents=True, exist_ok=True)
             original_path.symlink_to(mlx_path, target_is_directory=True)
-            print(f"[HF_PATCH] Symlink created successfully")
-        except Exception as e:
-            print(f"[HF_PATCH] Could not create symlink: {e}")
+            logger.info("created cache symlink: %s -> %s", original_repo, mlx_repo)
+        except Exception:
+            logger.warning("could not create cache symlink for %s", original_repo, exc_info=True)
 
 
-# Auto-apply patch when module is imported
 if os.environ.get("VOICEBOX_OFFLINE_PATCH", "1") != "0":
     patch_huggingface_hub_offline()
     ensure_original_qwen_config_cached()
diff --git a/docs/plans/ADDING_TTS_ENGINES.md b/docs/plans/ADDING_TTS_ENGINES.md
@@ -6,7 +6,9 @@ Guide for adding new TTS model backends. Based on the implementation of LuxTTS (
 
 ## Overview
 
-Adding an engine touches ~12 files across 4 layers (down from ~19 after the model config registry refactor). The backend protocol work is straightforward — the real time sink is dependency hell, upstream library bugs, and PyInstaller bundling.
+Adding an engine touches ~10 files across 4 layers. The backend protocol work is straightforward — the real time sink is dependency hell, upstream library bugs, and PyInstaller bundling.
+
+The backend is split into layers: `routes/` (thin HTTP handlers), `services/` (business logic), `backends/` (engine implementations), and `utils/` (shared utilities). New engines only need to touch `backends/` and `models.py` on the backend side — the route and service layers use a model config registry that handles dispatch automatically.
 
 ---
 
@@ -119,26 +121,24 @@ In `backend/models.py`:
 
 ---
 
-## Phase 2: API Integration (`main.py`)
+## Phase 2: Route and Service Integration
 
-With the model config registry, `main.py` has **zero per-engine dispatch points**. All endpoints use registry helpers like `get_model_config()`, `load_engine_model()`, `engine_needs_trim()`, `check_model_loaded()`, etc.
+With the model config registry, the route and service layers have **zero per-engine dispatch points**. All endpoints use registry helpers like `get_model_config()`, `load_engine_model()`, `engine_needs_trim()`, `check_model_loaded()`, etc.
 
-**You don't need to touch `main.py` at all** unless your engine needs custom behavior in the generate endpoint (e.g. a new post-processing step beyond `trim_tts_output`).
+**You don't need to touch any route or service files** unless your engine needs custom behavior in the generate pipeline (e.g. a new post-processing step beyond `trim_tts_output`).
 
 ### 2.1 What the registry handles automatically
 
-| Endpoint | Registry function used |
-|----------|----------------------|
-| `POST /generate` | `load_engine_model(engine, size)` + `engine_needs_trim(engine)` |
-| `POST /generate/stream` | `ensure_model_cached_or_raise(engine, size)` + `load_engine_model()` |
-| `GET /models/status` | `get_all_model_configs()` + `check_model_loaded(config)` |
-| `POST /models/download` | `get_model_config(name)` + `get_model_load_func(config)` |
-| `POST /models/{name}/unload` | `get_model_config(name)` + `unload_model_by_config(config)` |
-| `DELETE /models/{name}` | `get_model_config(name)` + `unload_model_by_config(config)` |
+| Route file | Registry function used |
+|------------|----------------------|
+| `routes/generations.py` | `load_engine_model(engine, size)` + `engine_needs_trim(engine)` |
+| `routes/models.py` | `get_all_model_configs()` + `check_model_loaded(config)` |
+| `routes/models.py` | `get_model_config(name)` + `get_model_load_func(config)` |
+| `services/generation.py` | `get_tts_backend_for_engine()` + `ensure_model_cached_or_raise()` |
 
 ### 2.2 Post-processing
 
-If your model produces trailing silence or hallucinated audio, set `needs_trim=True` on your `ModelConfig`. The generate endpoint checks `engine_needs_trim(engine)` and applies `trim_tts_output()` automatically.
+If your model produces trailing silence or hallucinated audio, set `needs_trim=True` on your `ModelConfig`. The generation service checks `engine_needs_trim(engine)` and applies `trim_tts_output()` automatically.
 
 ---
 
@@ -321,7 +321,7 @@ Used by both Chatterbox backends. LuxTTS works fine on MPS.
 
 To get download progress bars in the UI, wrap model loading with `HFProgressTracker`:
 ```python
-from backend.utils.hf_progress import HFProgressTracker
+from ..utils.hf_progress import HFProgressTracker
 tracker = HFProgressTracker(model_name, progress_manager)
 with tracker.patch_download():
     model = ModelClass.from_pretrained(repo_id)
@@ -339,7 +339,7 @@ The tracker monkey-patches tqdm to intercept HuggingFace's internal progress bar
 - [ ] `backend/requirements.txt` — dependencies added (check for `--no-deps` needs)
 - [ ] `justfile` — `--no-deps` install step if needed
 
-### API (`backend/main.py`)
+### Routes and services
 No changes needed — the model config registry handles all dispatch automatically.
 
 ### Frontend

Original file line number	Diff line number	Diff line change
`@@ -8,7 +8,7 @@ import { useServerStore } from '@/stores/serverStore';`
`8`	`8`
`9`	`9`	`interface GenerationStatusEvent {`
`10`	`10`	`id: string;`
`11`		`- status: 'generating' \| 'completed' \| 'failed' \| 'not_found';`
	`11`	`+ status: 'loading_model' \| 'generating' \| 'completed' \| 'failed' \| 'not_found';`
`12`	`12`	`duration?: number;`
`13`	`13`	`error?: string;`
`14`	`14`	`}`