feat: we successfully have the installer working for windows!

Models are downloaded at runtime instead of build.
2026-01-28 17:15:13 -08:00
parent 3c8a46e5a6
commit 74c334c939
684 changed files with 431984 additions and 192 deletions
@@ -22,9 +22,16 @@ app.add_middleware(
 )

 # Initialize models
-MODEL_DIR = Path(__file__).parent.parent.parent / "models"
+# Use environment variable if set (production), otherwise use development path
+models_env = os.environ.get('CHRONARA_MODELS_DIR')
+if models_env:
+    MODEL_DIR = Path(models_env)
+else:
+    MODEL_DIR = Path(__file__).parent.parent.parent / "models"
+
+print(f"Using models directory: {MODEL_DIR}")
 transcriber = WhisperXTranscriber(model_dir=MODEL_DIR)
-summarizer = LlamaSummarizer(model_dir=MODEL_DIR)
+summarizer = LlamaSummarizer(model_dir=MODEL_DIR, model_size="3B")
 audio_processor = AudioProcessor()


@@ -14,7 +14,7 @@ class LlamaSummarizer:
        self.model_dir = model_dir
        self.is_loaded = False

-        model_path = model_dir / f"llama-3.2-{model_size}-instruct-Q4_K_M.gguf"
+        model_path = model_dir / f"Llama-3.2-{model_size}-Instruct-Q4_K_M.gguf"

        try:
            self.llm = Llama(
@@ -1,12 +1,22 @@
 """WhisperX transcription with speaker diarization."""

-import json
+import os
+import tempfile
 from pathlib import Path
 from typing import Any, Optional

 import numpy as np
+import soundfile as sf
 import torch
+import torch.serialization
 import whisperx
+from simple_diarizer.diarizer import Diarizer
+
+# Add safe globals for PyTorch 2.8+ weights loading
+torch.serialization.add_safe_globals([
+    "omegaconf.listconfig.ListConfig",
+    "omegaconf.dictconfig.DictConfig"
+])


 class WhisperXTranscriber:
@@ -18,38 +28,59 @@ class WhisperXTranscriber:
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.compute_type = "float16" if self.device == "cuda" else "int8"
        self.is_loaded = False
+        self.diarizer = None
+
+        # Delay loading until first use to avoid startup issues
+        self.model = None
+        self.align_model = None
+        self.align_metadata = None
+        self.model_size = model_size

        try:
-            # Load ASR model
-            self.model = whisperx.load_model(
-                model_size,
-                self.device,
-                compute_type=self.compute_type,
-                download_root=str(model_dir / "whisper"),
+            # Load simple_diarizer (no authentication required!)
+            self.diarizer = Diarizer(
+                embed_model="xvec",
+                cluster_method="sc",
            )
-
-            # Load alignment model
-            self.align_model, self.align_metadata = whisperx.load_align_model(
-                language_code="en",
-                device=self.device,
-                model_dir=str(model_dir / "alignment"),
-            )
-
-            # Load diarization pipeline
-            self.diarize_model = whisperx.DiarizationPipeline(
-                device=self.device,
-                model_name=str(model_dir / "diarization"),
-            )
-
-            self.is_loaded = True
+            print("Speaker diarization enabled (using simple_diarizer)")
        except Exception as e:
-            print(f"Failed to load WhisperX models: {e}")
-            self.is_loaded = False
+            print(f"Warning: Failed to load diarizer: {e}")
+            self.diarizer = None
+
+    def _ensure_models_loaded(self):
+        """Load models on first use."""
+        if self.model is None:
+            try:
+                print("Loading WhisperX models...")
+                # Load ASR model
+                self.model = whisperx.load_model(
+                    self.model_size,
+                    self.device,
+                    compute_type=self.compute_type,
+                    download_root=str(self.model_dir / "whisper"),
+                )
+
+                # Load alignment model
+                self.align_model, self.align_metadata = whisperx.load_align_model(
+                    language_code="en",
+                    device=self.device,
+                    model_dir=str(self.model_dir / "alignment"),
+                )
+                self.is_loaded = True
+                print("✓ WhisperX models loaded successfully")
+            except Exception as e:
+                print(f"Failed to load WhisperX models: {e}")
+                self.is_loaded = False
+                raise

    async def transcribe_chunk(self, audio_chunk: np.ndarray) -> Optional[dict[str, Any]]:
        """Transcribe an audio chunk with speaker diarization."""
+        # Load models on first use
        if not self.is_loaded:
-            return None
+            try:
+                self._ensure_models_loaded()
+            except Exception:
+                return None

        try:
            # Transcribe
@@ -67,9 +98,9 @@ class WhisperXTranscriber:
                self.device,
            )

-            # Diarize
-            diarize_segments = self.diarize_model(audio_chunk)
-            result = whisperx.assign_word_speakers(diarize_segments, result)
+            # Diarize using simple_diarizer
+            if self.diarizer:
+                result = self._apply_diarization(audio_chunk, result)

            # Format output
            formatted_result = []
@@ -85,4 +116,37 @@ class WhisperXTranscriber:

        except Exception as e:
            print(f"Transcription error: {e}")
-            return None
+            return None
+
+    def _apply_diarization(
+        self, audio_chunk: np.ndarray, transcript_result: dict
+    ) -> dict:
+        """Apply speaker diarization to transcript segments."""
+        try:
+            # simple_diarizer needs a file path, so write to temp file
+            with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
+                temp_path = f.name
+                sf.write(temp_path, audio_chunk, 16000)
+
+            # Run diarization
+            diarization = self.diarizer.diarize(temp_path, num_speakers=None)
+
+            # Clean up temp file
+            os.unlink(temp_path)
+
+            # Assign speakers to segments based on time overlap
+            for segment in transcript_result["segments"]:
+                segment_mid = (segment["start"] + segment["end"]) / 2
+                speaker = "Unknown"
+
+                for diar_seg in diarization:
+                    if diar_seg["start"] <= segment_mid <= diar_seg["end"]:
+                        speaker = f"Speaker {diar_seg['label']}"
+                        break
+
+                segment["speaker"] = speaker
+
+        except Exception as e:
+            print(f"Diarization error: {e}")
+
+        return transcript_result
@@ -0,0 +1,64 @@
+#!/usr/bin/env python3
+"""Production runner for Chronara backend - handles installed app paths."""
+
+import os
+import sys
+from pathlib import Path
+
+# In production, the app structure will be:
+# C:\Program Files\Chronara\
+#   ├── Chronara.exe
+#   ├── resources/
+#   │   ├── models/
+#   │   └── src/
+#   │       └── backend/
+
+# Find the app installation directory
+if getattr(sys, 'frozen', False):
+    # Running in PyInstaller bundle
+    app_dir = Path(sys.executable).parent
+else:
+    # Running as script - check if we're in an installed app
+    current_file = Path(__file__).resolve()
+
+    # Check if we're in a typical Windows install location
+    if "Program Files" in str(current_file) or "AppData" in str(current_file):
+        # We're installed - go up to find resources
+        app_dir = current_file.parent.parent.parent.parent
+    else:
+        # Development mode
+        app_dir = current_file.parent.parent.parent
+
+# Set up paths
+resources_dir = app_dir / "resources"
+if resources_dir.exists():
+    models_dir = resources_dir / "models"
+    src_dir = resources_dir / "src"
+else:
+    # Fallback to development structure
+    models_dir = app_dir / "models"
+    src_dir = app_dir / "src"
+
+# Add src to path so imports work
+sys.path.insert(0, str(src_dir))
+
+# Set environment variable for models
+os.environ['CHRONARA_MODELS_DIR'] = str(models_dir)
+
+# Now import and run the app
+from backend.main import app
+import uvicorn
+
+if __name__ == "__main__":
+    print(f"Starting Chronara backend...")
+    print(f"App directory: {app_dir}")
+    print(f"Models directory: {models_dir}")
+    print(f"Source directory: {src_dir}")
+
+    # Run the app
+    uvicorn.run(
+        app,
+        host="127.0.0.1",
+        port=8000,
+        log_level="info"
+    )