generated from nhcarrigan/template
feat: we successfully have the installer working for windows!
Models are downloaded at runtime instead of build.
This commit is contained in:
+9
-2
@@ -22,9 +22,16 @@ app.add_middleware(
|
||||
)
|
||||
|
||||
# Initialize models
|
||||
MODEL_DIR = Path(__file__).parent.parent.parent / "models"
|
||||
# Use environment variable if set (production), otherwise use development path
|
||||
models_env = os.environ.get('CHRONARA_MODELS_DIR')
|
||||
if models_env:
|
||||
MODEL_DIR = Path(models_env)
|
||||
else:
|
||||
MODEL_DIR = Path(__file__).parent.parent.parent / "models"
|
||||
|
||||
print(f"Using models directory: {MODEL_DIR}")
|
||||
transcriber = WhisperXTranscriber(model_dir=MODEL_DIR)
|
||||
summarizer = LlamaSummarizer(model_dir=MODEL_DIR)
|
||||
summarizer = LlamaSummarizer(model_dir=MODEL_DIR, model_size="3B")
|
||||
audio_processor = AudioProcessor()
|
||||
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@ class LlamaSummarizer:
|
||||
self.model_dir = model_dir
|
||||
self.is_loaded = False
|
||||
|
||||
model_path = model_dir / f"llama-3.2-{model_size}-instruct-Q4_K_M.gguf"
|
||||
model_path = model_dir / f"Llama-3.2-{model_size}-Instruct-Q4_K_M.gguf"
|
||||
|
||||
try:
|
||||
self.llm = Llama(
|
||||
|
||||
@@ -1,12 +1,22 @@
|
||||
"""WhisperX transcription with speaker diarization."""
|
||||
|
||||
import json
|
||||
import os
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from typing import Any, Optional
|
||||
|
||||
import numpy as np
|
||||
import soundfile as sf
|
||||
import torch
|
||||
import torch.serialization
|
||||
import whisperx
|
||||
from simple_diarizer.diarizer import Diarizer
|
||||
|
||||
# Add safe globals for PyTorch 2.8+ weights loading
|
||||
torch.serialization.add_safe_globals([
|
||||
"omegaconf.listconfig.ListConfig",
|
||||
"omegaconf.dictconfig.DictConfig"
|
||||
])
|
||||
|
||||
|
||||
class WhisperXTranscriber:
|
||||
@@ -18,38 +28,59 @@ class WhisperXTranscriber:
|
||||
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
self.compute_type = "float16" if self.device == "cuda" else "int8"
|
||||
self.is_loaded = False
|
||||
self.diarizer = None
|
||||
|
||||
# Delay loading until first use to avoid startup issues
|
||||
self.model = None
|
||||
self.align_model = None
|
||||
self.align_metadata = None
|
||||
self.model_size = model_size
|
||||
|
||||
try:
|
||||
# Load ASR model
|
||||
self.model = whisperx.load_model(
|
||||
model_size,
|
||||
self.device,
|
||||
compute_type=self.compute_type,
|
||||
download_root=str(model_dir / "whisper"),
|
||||
# Load simple_diarizer (no authentication required!)
|
||||
self.diarizer = Diarizer(
|
||||
embed_model="xvec",
|
||||
cluster_method="sc",
|
||||
)
|
||||
|
||||
# Load alignment model
|
||||
self.align_model, self.align_metadata = whisperx.load_align_model(
|
||||
language_code="en",
|
||||
device=self.device,
|
||||
model_dir=str(model_dir / "alignment"),
|
||||
)
|
||||
|
||||
# Load diarization pipeline
|
||||
self.diarize_model = whisperx.DiarizationPipeline(
|
||||
device=self.device,
|
||||
model_name=str(model_dir / "diarization"),
|
||||
)
|
||||
|
||||
self.is_loaded = True
|
||||
print("Speaker diarization enabled (using simple_diarizer)")
|
||||
except Exception as e:
|
||||
print(f"Failed to load WhisperX models: {e}")
|
||||
self.is_loaded = False
|
||||
print(f"Warning: Failed to load diarizer: {e}")
|
||||
self.diarizer = None
|
||||
|
||||
def _ensure_models_loaded(self):
|
||||
"""Load models on first use."""
|
||||
if self.model is None:
|
||||
try:
|
||||
print("Loading WhisperX models...")
|
||||
# Load ASR model
|
||||
self.model = whisperx.load_model(
|
||||
self.model_size,
|
||||
self.device,
|
||||
compute_type=self.compute_type,
|
||||
download_root=str(self.model_dir / "whisper"),
|
||||
)
|
||||
|
||||
# Load alignment model
|
||||
self.align_model, self.align_metadata = whisperx.load_align_model(
|
||||
language_code="en",
|
||||
device=self.device,
|
||||
model_dir=str(self.model_dir / "alignment"),
|
||||
)
|
||||
self.is_loaded = True
|
||||
print("✓ WhisperX models loaded successfully")
|
||||
except Exception as e:
|
||||
print(f"Failed to load WhisperX models: {e}")
|
||||
self.is_loaded = False
|
||||
raise
|
||||
|
||||
async def transcribe_chunk(self, audio_chunk: np.ndarray) -> Optional[dict[str, Any]]:
|
||||
"""Transcribe an audio chunk with speaker diarization."""
|
||||
# Load models on first use
|
||||
if not self.is_loaded:
|
||||
return None
|
||||
try:
|
||||
self._ensure_models_loaded()
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
try:
|
||||
# Transcribe
|
||||
@@ -67,9 +98,9 @@ class WhisperXTranscriber:
|
||||
self.device,
|
||||
)
|
||||
|
||||
# Diarize
|
||||
diarize_segments = self.diarize_model(audio_chunk)
|
||||
result = whisperx.assign_word_speakers(diarize_segments, result)
|
||||
# Diarize using simple_diarizer
|
||||
if self.diarizer:
|
||||
result = self._apply_diarization(audio_chunk, result)
|
||||
|
||||
# Format output
|
||||
formatted_result = []
|
||||
@@ -85,4 +116,37 @@ class WhisperXTranscriber:
|
||||
|
||||
except Exception as e:
|
||||
print(f"Transcription error: {e}")
|
||||
return None
|
||||
return None
|
||||
|
||||
def _apply_diarization(
|
||||
self, audio_chunk: np.ndarray, transcript_result: dict
|
||||
) -> dict:
|
||||
"""Apply speaker diarization to transcript segments."""
|
||||
try:
|
||||
# simple_diarizer needs a file path, so write to temp file
|
||||
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
|
||||
temp_path = f.name
|
||||
sf.write(temp_path, audio_chunk, 16000)
|
||||
|
||||
# Run diarization
|
||||
diarization = self.diarizer.diarize(temp_path, num_speakers=None)
|
||||
|
||||
# Clean up temp file
|
||||
os.unlink(temp_path)
|
||||
|
||||
# Assign speakers to segments based on time overlap
|
||||
for segment in transcript_result["segments"]:
|
||||
segment_mid = (segment["start"] + segment["end"]) / 2
|
||||
speaker = "Unknown"
|
||||
|
||||
for diar_seg in diarization:
|
||||
if diar_seg["start"] <= segment_mid <= diar_seg["end"]:
|
||||
speaker = f"Speaker {diar_seg['label']}"
|
||||
break
|
||||
|
||||
segment["speaker"] = speaker
|
||||
|
||||
except Exception as e:
|
||||
print(f"Diarization error: {e}")
|
||||
|
||||
return transcript_result
|
||||
|
||||
@@ -0,0 +1,64 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Production runner for Chronara backend - handles installed app paths."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# In production, the app structure will be:
|
||||
# C:\Program Files\Chronara\
|
||||
# ├── Chronara.exe
|
||||
# ├── resources/
|
||||
# │ ├── models/
|
||||
# │ └── src/
|
||||
# │ └── backend/
|
||||
|
||||
# Find the app installation directory
|
||||
if getattr(sys, 'frozen', False):
|
||||
# Running in PyInstaller bundle
|
||||
app_dir = Path(sys.executable).parent
|
||||
else:
|
||||
# Running as script - check if we're in an installed app
|
||||
current_file = Path(__file__).resolve()
|
||||
|
||||
# Check if we're in a typical Windows install location
|
||||
if "Program Files" in str(current_file) or "AppData" in str(current_file):
|
||||
# We're installed - go up to find resources
|
||||
app_dir = current_file.parent.parent.parent.parent
|
||||
else:
|
||||
# Development mode
|
||||
app_dir = current_file.parent.parent.parent
|
||||
|
||||
# Set up paths
|
||||
resources_dir = app_dir / "resources"
|
||||
if resources_dir.exists():
|
||||
models_dir = resources_dir / "models"
|
||||
src_dir = resources_dir / "src"
|
||||
else:
|
||||
# Fallback to development structure
|
||||
models_dir = app_dir / "models"
|
||||
src_dir = app_dir / "src"
|
||||
|
||||
# Add src to path so imports work
|
||||
sys.path.insert(0, str(src_dir))
|
||||
|
||||
# Set environment variable for models
|
||||
os.environ['CHRONARA_MODELS_DIR'] = str(models_dir)
|
||||
|
||||
# Now import and run the app
|
||||
from backend.main import app
|
||||
import uvicorn
|
||||
|
||||
if __name__ == "__main__":
|
||||
print(f"Starting Chronara backend...")
|
||||
print(f"App directory: {app_dir}")
|
||||
print(f"Models directory: {models_dir}")
|
||||
print(f"Source directory: {src_dir}")
|
||||
|
||||
# Run the app
|
||||
uvicorn.run(
|
||||
app,
|
||||
host="127.0.0.1",
|
||||
port=8000,
|
||||
log_level="info"
|
||||
)
|
||||
Reference in New Issue
Block a user