generated from nhcarrigan/template
feat: we successfully have the installer working for windows!
Models are downloaded at runtime instead of build.
This commit is contained in:
+238
@@ -155,6 +155,74 @@ body {
|
||||
}
|
||||
}
|
||||
|
||||
/* Recording Controls */
|
||||
.recording-controls {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 1rem;
|
||||
padding: 1rem 1.5rem;
|
||||
background-color: var(--surface-color);
|
||||
border: 1px solid var(--danger-color);
|
||||
border-radius: 0.75rem;
|
||||
box-shadow: var(--shadow);
|
||||
}
|
||||
|
||||
.recording-dot {
|
||||
width: 0.75rem;
|
||||
height: 0.75rem;
|
||||
background-color: var(--danger-color);
|
||||
border-radius: 50%;
|
||||
animation: pulse 1.5s infinite;
|
||||
}
|
||||
|
||||
.recording-duration {
|
||||
font-weight: 600;
|
||||
font-variant-numeric: tabular-nums;
|
||||
min-width: 4rem;
|
||||
}
|
||||
|
||||
.stop-button {
|
||||
padding: 0.5rem 1rem;
|
||||
border-radius: 0.5rem;
|
||||
border: 2px solid var(--danger-color);
|
||||
background-color: transparent;
|
||||
color: var(--danger-color);
|
||||
font-size: 0.875rem;
|
||||
font-weight: 600;
|
||||
cursor: pointer;
|
||||
transition: all 0.2s;
|
||||
}
|
||||
|
||||
.stop-button:hover {
|
||||
background-color: var(--danger-color);
|
||||
color: white;
|
||||
}
|
||||
|
||||
/* Transcribing Indicator */
|
||||
.transcribing-indicator {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
gap: 0.75rem;
|
||||
padding: 1.5rem;
|
||||
background-color: var(--surface-color);
|
||||
border: 1px solid var(--border-color);
|
||||
border-radius: 0.75rem;
|
||||
}
|
||||
|
||||
.transcribing-indicator span {
|
||||
color: var(--text-secondary);
|
||||
font-size: 0.875rem;
|
||||
}
|
||||
|
||||
/* Small loading spinner variant */
|
||||
.loading-spinner.small {
|
||||
width: 1.5rem;
|
||||
height: 1.5rem;
|
||||
border-width: 2px;
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
/* Action Buttons */
|
||||
.action-buttons {
|
||||
display: flex;
|
||||
@@ -326,6 +394,176 @@ body {
|
||||
}
|
||||
}
|
||||
|
||||
/* Backend Logs */
|
||||
.logs-toggle-button {
|
||||
position: fixed;
|
||||
bottom: 1rem;
|
||||
right: 1rem;
|
||||
padding: 0.5rem 1rem;
|
||||
border-radius: 0.5rem;
|
||||
border: 1px solid var(--border-color);
|
||||
background-color: var(--surface-color);
|
||||
color: var(--text-color);
|
||||
font-size: 0.875rem;
|
||||
cursor: pointer;
|
||||
transition: all 0.2s;
|
||||
box-shadow: var(--shadow);
|
||||
z-index: 100;
|
||||
}
|
||||
|
||||
.logs-toggle-button:hover {
|
||||
background-color: var(--border-color);
|
||||
}
|
||||
|
||||
.backend-logs {
|
||||
position: fixed;
|
||||
bottom: 0;
|
||||
left: 0;
|
||||
right: 0;
|
||||
height: 300px;
|
||||
background-color: var(--surface-color);
|
||||
border-top: 1px solid var(--border-color);
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
z-index: 100;
|
||||
box-shadow: 0 -2px 10px rgba(0, 0, 0, 0.1);
|
||||
}
|
||||
|
||||
.logs-header {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
padding: 0.75rem 1rem;
|
||||
border-bottom: 1px solid var(--border-color);
|
||||
background-color: var(--bg-color);
|
||||
}
|
||||
|
||||
.logs-header h3 {
|
||||
font-size: 0.875rem;
|
||||
font-weight: 600;
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
.logs-close-button {
|
||||
padding: 0.25rem 0.75rem;
|
||||
border-radius: 0.25rem;
|
||||
border: 1px solid var(--border-color);
|
||||
background-color: var(--surface-color);
|
||||
font-size: 0.75rem;
|
||||
cursor: pointer;
|
||||
transition: all 0.2s;
|
||||
}
|
||||
|
||||
.logs-close-button:hover {
|
||||
background-color: var(--border-color);
|
||||
}
|
||||
|
||||
.logs-container {
|
||||
flex: 1;
|
||||
overflow-y: auto;
|
||||
padding: 0.5rem 1rem;
|
||||
font-family: 'Consolas', 'Monaco', 'Courier New', monospace;
|
||||
font-size: 0.75rem;
|
||||
background-color: #1a1a2e;
|
||||
color: #d4d4d4;
|
||||
}
|
||||
|
||||
.logs-empty {
|
||||
color: var(--text-secondary);
|
||||
text-align: center;
|
||||
padding: 2rem;
|
||||
}
|
||||
|
||||
.log-line {
|
||||
margin: 0;
|
||||
padding: 0.125rem 0;
|
||||
white-space: pre-wrap;
|
||||
word-break: break-all;
|
||||
line-height: 1.4;
|
||||
}
|
||||
|
||||
/* Setup Screen */
|
||||
.setup-screen {
|
||||
flex: 1;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
padding: 2rem;
|
||||
}
|
||||
|
||||
.setup-content {
|
||||
text-align: center;
|
||||
max-width: 500px;
|
||||
margin-bottom: 2rem;
|
||||
}
|
||||
|
||||
.setup-content h2 {
|
||||
font-size: 1.5rem;
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
.setup-message {
|
||||
color: var(--text-secondary);
|
||||
margin-bottom: 1.5rem;
|
||||
}
|
||||
|
||||
.setup-info {
|
||||
text-align: left;
|
||||
background-color: var(--surface-color);
|
||||
border: 1px solid var(--border-color);
|
||||
border-radius: 0.75rem;
|
||||
padding: 1.5rem;
|
||||
margin-bottom: 1.5rem;
|
||||
}
|
||||
|
||||
.setup-info p {
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
.setup-info ul {
|
||||
margin-left: 1.5rem;
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
.setup-info li {
|
||||
margin-bottom: 0.5rem;
|
||||
color: var(--text-secondary);
|
||||
}
|
||||
|
||||
.setup-note {
|
||||
font-size: 0.875rem;
|
||||
color: var(--text-secondary);
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
.error-content {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
gap: 1rem;
|
||||
}
|
||||
|
||||
.error-message {
|
||||
color: var(--danger-color);
|
||||
background-color: rgba(239, 68, 68, 0.1);
|
||||
padding: 1rem;
|
||||
border-radius: 0.5rem;
|
||||
border: 1px solid var(--danger-color);
|
||||
max-width: 400px;
|
||||
word-break: break-word;
|
||||
}
|
||||
|
||||
.loading-spinner {
|
||||
width: 3rem;
|
||||
height: 3rem;
|
||||
border: 3px solid var(--border-color);
|
||||
border-top-color: var(--primary-color);
|
||||
border-radius: 50%;
|
||||
animation: spin 1s linear infinite;
|
||||
margin: 1rem auto;
|
||||
}
|
||||
|
||||
/* Dark Mode */
|
||||
@media (prefers-color-scheme: dark) {
|
||||
:root {
|
||||
|
||||
+232
-85
@@ -1,9 +1,9 @@
|
||||
import { useState, useEffect, useRef } from "react";
|
||||
import { invoke } from "@tauri-apps/api/core";
|
||||
import "./App.css";
|
||||
import { AudioRecorder } from "./components/AudioRecorder";
|
||||
import { TranscriptDisplay } from "./components/TranscriptDisplay";
|
||||
import { SummaryDisplay } from "./components/SummaryDisplay";
|
||||
import { BackendLogs } from "./components/BackendLogs";
|
||||
|
||||
interface TranscriptSegment {
|
||||
start: number;
|
||||
@@ -12,69 +12,127 @@ interface TranscriptSegment {
|
||||
speaker: string;
|
||||
}
|
||||
|
||||
type AppState = "checking" | "downloading_models" | "initializing" | "ready" | "recording" | "transcribing" | "error";
|
||||
|
||||
function App() {
|
||||
const [isRecording, setIsRecording] = useState(false);
|
||||
const [transcriptSegments, setTranscriptSegments] = useState<TranscriptSegment[]>([]);
|
||||
const [summary, setSummary] = useState<string | null>(null);
|
||||
const [isGeneratingSummary, setIsGeneratingSummary] = useState(false);
|
||||
const [backendReady, setBackendReady] = useState(false);
|
||||
const wsRef = useRef<WebSocket | null>(null);
|
||||
const [appState, setAppState] = useState<AppState>("checking");
|
||||
const [statusMessage, setStatusMessage] = useState("Checking setup...");
|
||||
const [showLogs, setShowLogs] = useState(false);
|
||||
const [errorMessage, setErrorMessage] = useState<string | null>(null);
|
||||
const [recordingDuration, setRecordingDuration] = useState(0);
|
||||
const initStarted = useRef(false);
|
||||
const recordingTimer = useRef<number | null>(null);
|
||||
|
||||
useEffect(() => {
|
||||
// Start Python backend through Tauri
|
||||
startPythonBackend();
|
||||
if (initStarted.current) return;
|
||||
initStarted.current = true;
|
||||
|
||||
initializeApp();
|
||||
}, []);
|
||||
|
||||
const startPythonBackend = async () => {
|
||||
try {
|
||||
// Start backend through Tauri command
|
||||
await invoke("start_backend");
|
||||
|
||||
// Give backend time to start up
|
||||
setTimeout(() => {
|
||||
checkBackendHealth();
|
||||
}, 2000);
|
||||
} catch (error) {
|
||||
console.error("Failed to start backend:", error);
|
||||
}
|
||||
};
|
||||
|
||||
const checkBackendHealth = async () => {
|
||||
try {
|
||||
const response = await fetch("http://localhost:8000/health");
|
||||
if (response.ok) {
|
||||
setBackendReady(true);
|
||||
// Cleanup timer on unmount
|
||||
useEffect(() => {
|
||||
return () => {
|
||||
if (recordingTimer.current) {
|
||||
clearInterval(recordingTimer.current);
|
||||
}
|
||||
};
|
||||
}, []);
|
||||
|
||||
const initializeApp = async () => {
|
||||
try {
|
||||
setAppState("checking");
|
||||
setStatusMessage("Checking if models are present...");
|
||||
|
||||
const modelsPresent = await invoke<boolean>("check_models");
|
||||
|
||||
if (!modelsPresent) {
|
||||
setAppState("downloading_models");
|
||||
setStatusMessage("Downloading AI models (~2GB)...");
|
||||
setShowLogs(true);
|
||||
|
||||
await invoke("download_models");
|
||||
|
||||
// Check again after download
|
||||
const modelsVerified = await invoke<boolean>("check_models");
|
||||
if (!modelsVerified) {
|
||||
throw new Error("Download completed but model file not found. This might be a path or permissions issue.");
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize ML models
|
||||
setAppState("initializing");
|
||||
setStatusMessage("Loading AI models (this may take a moment)...");
|
||||
setShowLogs(true);
|
||||
|
||||
await invoke("initialize_models");
|
||||
|
||||
// Check if models are ready
|
||||
const ready = await invoke<boolean>("check_ready");
|
||||
if (!ready) {
|
||||
throw new Error("Models failed to initialize. Check logs for details.");
|
||||
}
|
||||
|
||||
setAppState("ready");
|
||||
setStatusMessage("");
|
||||
setShowLogs(false);
|
||||
} catch (error) {
|
||||
console.error("Backend not ready:", error);
|
||||
// In production, Tauri will start the backend automatically
|
||||
console.error("Initialization failed:", error);
|
||||
setAppState("error");
|
||||
setErrorMessage(String(error));
|
||||
setShowLogs(true);
|
||||
}
|
||||
};
|
||||
|
||||
const handleAudioData = (audioData: ArrayBuffer) => {
|
||||
if (!wsRef.current || wsRef.current.readyState !== WebSocket.OPEN) {
|
||||
// Create WebSocket connection
|
||||
wsRef.current = new WebSocket("ws://localhost:8000/ws/transcribe");
|
||||
const retrySetup = () => {
|
||||
setErrorMessage(null);
|
||||
initStarted.current = false;
|
||||
initializeApp();
|
||||
};
|
||||
|
||||
wsRef.current.onopen = () => {
|
||||
console.log("WebSocket connected");
|
||||
// Send the audio data
|
||||
wsRef.current?.send(audioData);
|
||||
};
|
||||
const startRecording = async () => {
|
||||
try {
|
||||
setAppState("recording");
|
||||
setRecordingDuration(0);
|
||||
setTranscriptSegments([]);
|
||||
setSummary(null);
|
||||
|
||||
wsRef.current.onmessage = (event) => {
|
||||
const data = JSON.parse(event.data);
|
||||
if (data.type === "transcription" && data.data.segments) {
|
||||
setTranscriptSegments((prev) => [...prev, ...data.data.segments]);
|
||||
}
|
||||
};
|
||||
await invoke("start_recording");
|
||||
|
||||
wsRef.current.onclose = () => {
|
||||
console.log("WebSocket disconnected");
|
||||
};
|
||||
} else {
|
||||
// Send audio data through existing connection
|
||||
wsRef.current.send(audioData);
|
||||
// Start timer to show recording duration
|
||||
recordingTimer.current = window.setInterval(() => {
|
||||
setRecordingDuration(d => d + 1);
|
||||
}, 1000);
|
||||
} catch (error) {
|
||||
console.error("Failed to start recording:", error);
|
||||
setAppState("ready");
|
||||
setErrorMessage(String(error));
|
||||
}
|
||||
};
|
||||
|
||||
const stopRecording = async () => {
|
||||
try {
|
||||
// Stop the timer
|
||||
if (recordingTimer.current) {
|
||||
clearInterval(recordingTimer.current);
|
||||
recordingTimer.current = null;
|
||||
}
|
||||
|
||||
setAppState("transcribing");
|
||||
setStatusMessage("Transcribing audio...");
|
||||
|
||||
const segments = await invoke<TranscriptSegment[]>("stop_recording");
|
||||
setTranscriptSegments(segments);
|
||||
|
||||
setAppState("ready");
|
||||
setStatusMessage("");
|
||||
} catch (error) {
|
||||
console.error("Failed to stop recording:", error);
|
||||
setAppState("ready");
|
||||
setErrorMessage(String(error));
|
||||
}
|
||||
};
|
||||
|
||||
@@ -83,24 +141,16 @@ function App() {
|
||||
|
||||
setIsGeneratingSummary(true);
|
||||
|
||||
// Combine all transcript segments into text
|
||||
const fullTranscript = transcriptSegments
|
||||
.map((seg) => `${seg.speaker}: ${seg.text}`)
|
||||
.join("\n");
|
||||
|
||||
try {
|
||||
const response = await fetch("http://localhost:8000/summarize", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({ transcript: fullTranscript }),
|
||||
});
|
||||
|
||||
const data = await response.json();
|
||||
setSummary(data.summary);
|
||||
const summaryResult = await invoke<string>("summarize", { transcript: fullTranscript });
|
||||
setSummary(summaryResult);
|
||||
} catch (error) {
|
||||
console.error("Failed to generate summary:", error);
|
||||
setErrorMessage(String(error));
|
||||
} finally {
|
||||
setIsGeneratingSummary(false);
|
||||
}
|
||||
@@ -138,6 +188,126 @@ function App() {
|
||||
return `${mins}:${secs.toString().padStart(2, "0")}`;
|
||||
};
|
||||
|
||||
const formatDuration = (seconds: number) => {
|
||||
const mins = Math.floor(seconds / 60);
|
||||
const secs = seconds % 60;
|
||||
return `${mins}:${secs.toString().padStart(2, "0")}`;
|
||||
};
|
||||
|
||||
const renderSetupScreen = () => (
|
||||
<div className="setup-screen">
|
||||
<div className="setup-content">
|
||||
<h2>
|
||||
{appState === "checking" && "🔍 Checking Setup..."}
|
||||
{appState === "downloading_models" && "📥 Downloading AI Models"}
|
||||
{appState === "initializing" && "🚀 Loading AI Models"}
|
||||
{appState === "error" && "❌ Setup Failed"}
|
||||
</h2>
|
||||
|
||||
<p className="setup-message">{statusMessage}</p>
|
||||
|
||||
{appState === "downloading_models" && (
|
||||
<div className="setup-info">
|
||||
<p>Downloading the AI models:</p>
|
||||
<ul>
|
||||
<li>Llama 3.2 3B Instruct (~2GB) - for summarization</li>
|
||||
<li>Whisper Base (~142MB) - for transcription</li>
|
||||
</ul>
|
||||
<p className="setup-note">
|
||||
This is a one-time download. Models are stored locally for offline use.
|
||||
</p>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{appState === "initializing" && (
|
||||
<div className="setup-info">
|
||||
<p>Loading the AI models into memory:</p>
|
||||
<ul>
|
||||
<li>LLaMA - Meeting summarization</li>
|
||||
<li>Whisper - Speech recognition</li>
|
||||
<li>VAD - Speaker detection</li>
|
||||
</ul>
|
||||
<p className="setup-note">
|
||||
This is a pure Rust backend - no Python required!
|
||||
</p>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{appState === "error" && (
|
||||
<div className="error-content">
|
||||
<p className="error-message">{errorMessage}</p>
|
||||
<button className="primary-button" onClick={retrySetup}>
|
||||
🔄 Retry Setup
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{(appState === "checking" || appState === "initializing" || appState === "downloading_models") && (
|
||||
<div className="loading-spinner" />
|
||||
)}
|
||||
</div>
|
||||
|
||||
<BackendLogs isVisible={showLogs} onToggle={() => setShowLogs(!showLogs)} />
|
||||
</div>
|
||||
);
|
||||
|
||||
const renderRecordingControls = () => (
|
||||
<section className="controls-section">
|
||||
{appState === "ready" && (
|
||||
<button className="record-button" onClick={startRecording}>
|
||||
🎙️ Start Recording
|
||||
</button>
|
||||
)}
|
||||
|
||||
{appState === "recording" && (
|
||||
<div className="recording-controls">
|
||||
<div className="recording-indicator">
|
||||
<span className="recording-dot" />
|
||||
Recording: {formatDuration(recordingDuration)}
|
||||
</div>
|
||||
<button className="stop-button" onClick={stopRecording}>
|
||||
⏹️ Stop Recording
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{appState === "transcribing" && (
|
||||
<div className="transcribing-indicator">
|
||||
<div className="loading-spinner small" />
|
||||
<span>Transcribing audio...</span>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{appState === "ready" && transcriptSegments.length > 0 && (
|
||||
<div className="action-buttons">
|
||||
<button className="secondary-button" onClick={downloadTranscript}>
|
||||
📄 Download Transcript
|
||||
</button>
|
||||
<button
|
||||
className="primary-button"
|
||||
onClick={generateSummary}
|
||||
disabled={isGeneratingSummary}
|
||||
>
|
||||
✨ Generate Summary
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
</section>
|
||||
);
|
||||
|
||||
// Show setup screen for non-ready states
|
||||
if (appState === "checking" || appState === "downloading_models" || appState === "initializing" || appState === "error") {
|
||||
return (
|
||||
<main className="container">
|
||||
<header className="app-header">
|
||||
<h1>🎙️ Chronara</h1>
|
||||
<p>Local Meeting Transcription & Summarization</p>
|
||||
</header>
|
||||
{renderSetupScreen()}
|
||||
</main>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<main className="container">
|
||||
<header className="app-header">
|
||||
@@ -145,33 +315,8 @@ function App() {
|
||||
<p>Local Meeting Transcription & Summarization</p>
|
||||
</header>
|
||||
|
||||
{!backendReady && (
|
||||
<div className="warning-banner">⚠️ Backend is starting up. This may take a moment...</div>
|
||||
)}
|
||||
|
||||
<div className="app-content">
|
||||
<section className="controls-section">
|
||||
<AudioRecorder
|
||||
onAudioData={handleAudioData}
|
||||
isRecording={isRecording}
|
||||
setIsRecording={setIsRecording}
|
||||
/>
|
||||
|
||||
{!isRecording && transcriptSegments.length > 0 && (
|
||||
<div className="action-buttons">
|
||||
<button className="secondary-button" onClick={downloadTranscript}>
|
||||
📄 Download Transcript
|
||||
</button>
|
||||
<button
|
||||
className="primary-button"
|
||||
onClick={generateSummary}
|
||||
disabled={isGeneratingSummary}
|
||||
>
|
||||
✨ Generate Summary
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
</section>
|
||||
{renderRecordingControls()}
|
||||
|
||||
<div className="content-grid">
|
||||
<TranscriptDisplay segments={transcriptSegments} />
|
||||
@@ -181,6 +326,8 @@ function App() {
|
||||
onDownload={downloadSummary}
|
||||
/>
|
||||
</div>
|
||||
|
||||
<BackendLogs isVisible={showLogs} onToggle={() => setShowLogs(!showLogs)} />
|
||||
</div>
|
||||
</main>
|
||||
);
|
||||
|
||||
+9
-2
@@ -22,9 +22,16 @@ app.add_middleware(
|
||||
)
|
||||
|
||||
# Initialize models
|
||||
MODEL_DIR = Path(__file__).parent.parent.parent / "models"
|
||||
# Use environment variable if set (production), otherwise use development path
|
||||
models_env = os.environ.get('CHRONARA_MODELS_DIR')
|
||||
if models_env:
|
||||
MODEL_DIR = Path(models_env)
|
||||
else:
|
||||
MODEL_DIR = Path(__file__).parent.parent.parent / "models"
|
||||
|
||||
print(f"Using models directory: {MODEL_DIR}")
|
||||
transcriber = WhisperXTranscriber(model_dir=MODEL_DIR)
|
||||
summarizer = LlamaSummarizer(model_dir=MODEL_DIR)
|
||||
summarizer = LlamaSummarizer(model_dir=MODEL_DIR, model_size="3B")
|
||||
audio_processor = AudioProcessor()
|
||||
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@ class LlamaSummarizer:
|
||||
self.model_dir = model_dir
|
||||
self.is_loaded = False
|
||||
|
||||
model_path = model_dir / f"llama-3.2-{model_size}-instruct-Q4_K_M.gguf"
|
||||
model_path = model_dir / f"Llama-3.2-{model_size}-Instruct-Q4_K_M.gguf"
|
||||
|
||||
try:
|
||||
self.llm = Llama(
|
||||
|
||||
@@ -1,12 +1,22 @@
|
||||
"""WhisperX transcription with speaker diarization."""
|
||||
|
||||
import json
|
||||
import os
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from typing import Any, Optional
|
||||
|
||||
import numpy as np
|
||||
import soundfile as sf
|
||||
import torch
|
||||
import torch.serialization
|
||||
import whisperx
|
||||
from simple_diarizer.diarizer import Diarizer
|
||||
|
||||
# Add safe globals for PyTorch 2.8+ weights loading
|
||||
torch.serialization.add_safe_globals([
|
||||
"omegaconf.listconfig.ListConfig",
|
||||
"omegaconf.dictconfig.DictConfig"
|
||||
])
|
||||
|
||||
|
||||
class WhisperXTranscriber:
|
||||
@@ -18,38 +28,59 @@ class WhisperXTranscriber:
|
||||
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
self.compute_type = "float16" if self.device == "cuda" else "int8"
|
||||
self.is_loaded = False
|
||||
self.diarizer = None
|
||||
|
||||
# Delay loading until first use to avoid startup issues
|
||||
self.model = None
|
||||
self.align_model = None
|
||||
self.align_metadata = None
|
||||
self.model_size = model_size
|
||||
|
||||
try:
|
||||
# Load ASR model
|
||||
self.model = whisperx.load_model(
|
||||
model_size,
|
||||
self.device,
|
||||
compute_type=self.compute_type,
|
||||
download_root=str(model_dir / "whisper"),
|
||||
# Load simple_diarizer (no authentication required!)
|
||||
self.diarizer = Diarizer(
|
||||
embed_model="xvec",
|
||||
cluster_method="sc",
|
||||
)
|
||||
|
||||
# Load alignment model
|
||||
self.align_model, self.align_metadata = whisperx.load_align_model(
|
||||
language_code="en",
|
||||
device=self.device,
|
||||
model_dir=str(model_dir / "alignment"),
|
||||
)
|
||||
|
||||
# Load diarization pipeline
|
||||
self.diarize_model = whisperx.DiarizationPipeline(
|
||||
device=self.device,
|
||||
model_name=str(model_dir / "diarization"),
|
||||
)
|
||||
|
||||
self.is_loaded = True
|
||||
print("Speaker diarization enabled (using simple_diarizer)")
|
||||
except Exception as e:
|
||||
print(f"Failed to load WhisperX models: {e}")
|
||||
self.is_loaded = False
|
||||
print(f"Warning: Failed to load diarizer: {e}")
|
||||
self.diarizer = None
|
||||
|
||||
def _ensure_models_loaded(self):
|
||||
"""Load models on first use."""
|
||||
if self.model is None:
|
||||
try:
|
||||
print("Loading WhisperX models...")
|
||||
# Load ASR model
|
||||
self.model = whisperx.load_model(
|
||||
self.model_size,
|
||||
self.device,
|
||||
compute_type=self.compute_type,
|
||||
download_root=str(self.model_dir / "whisper"),
|
||||
)
|
||||
|
||||
# Load alignment model
|
||||
self.align_model, self.align_metadata = whisperx.load_align_model(
|
||||
language_code="en",
|
||||
device=self.device,
|
||||
model_dir=str(self.model_dir / "alignment"),
|
||||
)
|
||||
self.is_loaded = True
|
||||
print("✓ WhisperX models loaded successfully")
|
||||
except Exception as e:
|
||||
print(f"Failed to load WhisperX models: {e}")
|
||||
self.is_loaded = False
|
||||
raise
|
||||
|
||||
async def transcribe_chunk(self, audio_chunk: np.ndarray) -> Optional[dict[str, Any]]:
|
||||
"""Transcribe an audio chunk with speaker diarization."""
|
||||
# Load models on first use
|
||||
if not self.is_loaded:
|
||||
return None
|
||||
try:
|
||||
self._ensure_models_loaded()
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
try:
|
||||
# Transcribe
|
||||
@@ -67,9 +98,9 @@ class WhisperXTranscriber:
|
||||
self.device,
|
||||
)
|
||||
|
||||
# Diarize
|
||||
diarize_segments = self.diarize_model(audio_chunk)
|
||||
result = whisperx.assign_word_speakers(diarize_segments, result)
|
||||
# Diarize using simple_diarizer
|
||||
if self.diarizer:
|
||||
result = self._apply_diarization(audio_chunk, result)
|
||||
|
||||
# Format output
|
||||
formatted_result = []
|
||||
@@ -85,4 +116,37 @@ class WhisperXTranscriber:
|
||||
|
||||
except Exception as e:
|
||||
print(f"Transcription error: {e}")
|
||||
return None
|
||||
return None
|
||||
|
||||
def _apply_diarization(
|
||||
self, audio_chunk: np.ndarray, transcript_result: dict
|
||||
) -> dict:
|
||||
"""Apply speaker diarization to transcript segments."""
|
||||
try:
|
||||
# simple_diarizer needs a file path, so write to temp file
|
||||
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
|
||||
temp_path = f.name
|
||||
sf.write(temp_path, audio_chunk, 16000)
|
||||
|
||||
# Run diarization
|
||||
diarization = self.diarizer.diarize(temp_path, num_speakers=None)
|
||||
|
||||
# Clean up temp file
|
||||
os.unlink(temp_path)
|
||||
|
||||
# Assign speakers to segments based on time overlap
|
||||
for segment in transcript_result["segments"]:
|
||||
segment_mid = (segment["start"] + segment["end"]) / 2
|
||||
speaker = "Unknown"
|
||||
|
||||
for diar_seg in diarization:
|
||||
if diar_seg["start"] <= segment_mid <= diar_seg["end"]:
|
||||
speaker = f"Speaker {diar_seg['label']}"
|
||||
break
|
||||
|
||||
segment["speaker"] = speaker
|
||||
|
||||
except Exception as e:
|
||||
print(f"Diarization error: {e}")
|
||||
|
||||
return transcript_result
|
||||
|
||||
@@ -0,0 +1,64 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Production runner for Chronara backend - handles installed app paths."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# In production, the app structure will be:
|
||||
# C:\Program Files\Chronara\
|
||||
# ├── Chronara.exe
|
||||
# ├── resources/
|
||||
# │ ├── models/
|
||||
# │ └── src/
|
||||
# │ └── backend/
|
||||
|
||||
# Find the app installation directory
|
||||
if getattr(sys, 'frozen', False):
|
||||
# Running in PyInstaller bundle
|
||||
app_dir = Path(sys.executable).parent
|
||||
else:
|
||||
# Running as script - check if we're in an installed app
|
||||
current_file = Path(__file__).resolve()
|
||||
|
||||
# Check if we're in a typical Windows install location
|
||||
if "Program Files" in str(current_file) or "AppData" in str(current_file):
|
||||
# We're installed - go up to find resources
|
||||
app_dir = current_file.parent.parent.parent.parent
|
||||
else:
|
||||
# Development mode
|
||||
app_dir = current_file.parent.parent.parent
|
||||
|
||||
# Set up paths
|
||||
resources_dir = app_dir / "resources"
|
||||
if resources_dir.exists():
|
||||
models_dir = resources_dir / "models"
|
||||
src_dir = resources_dir / "src"
|
||||
else:
|
||||
# Fallback to development structure
|
||||
models_dir = app_dir / "models"
|
||||
src_dir = app_dir / "src"
|
||||
|
||||
# Add src to path so imports work
|
||||
sys.path.insert(0, str(src_dir))
|
||||
|
||||
# Set environment variable for models
|
||||
os.environ['CHRONARA_MODELS_DIR'] = str(models_dir)
|
||||
|
||||
# Now import and run the app
|
||||
from backend.main import app
|
||||
import uvicorn
|
||||
|
||||
if __name__ == "__main__":
|
||||
print(f"Starting Chronara backend...")
|
||||
print(f"App directory: {app_dir}")
|
||||
print(f"Models directory: {models_dir}")
|
||||
print(f"Source directory: {src_dir}")
|
||||
|
||||
# Run the app
|
||||
uvicorn.run(
|
||||
app,
|
||||
host="127.0.0.1",
|
||||
port=8000,
|
||||
log_level="info"
|
||||
)
|
||||
@@ -0,0 +1,74 @@
|
||||
import { useState, useEffect, useRef } from "react";
|
||||
import { listen } from "@tauri-apps/api/event";
|
||||
import { invoke } from "@tauri-apps/api/core";
|
||||
|
||||
interface BackendLogsProps {
|
||||
isVisible: boolean;
|
||||
onToggle: () => void;
|
||||
}
|
||||
|
||||
export function BackendLogs({ isVisible, onToggle }: BackendLogsProps) {
|
||||
const [logs, setLogs] = useState<string[]>([]);
|
||||
const logsEndRef = useRef<HTMLDivElement>(null);
|
||||
|
||||
useEffect(() => {
|
||||
// Get existing logs on mount
|
||||
invoke<string[]>("get_backend_logs").then((existingLogs) => {
|
||||
setLogs(existingLogs);
|
||||
});
|
||||
|
||||
// Listen for new log events
|
||||
const unlisten = listen<string>("backend-log", (event) => {
|
||||
setLogs((prev) => {
|
||||
const newLogs = [...prev, event.payload];
|
||||
// Keep only last 100 lines
|
||||
if (newLogs.length > 100) {
|
||||
return newLogs.slice(-100);
|
||||
}
|
||||
return newLogs;
|
||||
});
|
||||
});
|
||||
|
||||
return () => {
|
||||
unlisten.then((fn) => fn());
|
||||
};
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
// Auto-scroll to bottom when new logs arrive
|
||||
if (logsEndRef.current && isVisible) {
|
||||
logsEndRef.current.scrollIntoView({ behavior: "smooth" });
|
||||
}
|
||||
}, [logs, isVisible]);
|
||||
|
||||
if (!isVisible) {
|
||||
return (
|
||||
<button className="logs-toggle-button" onClick={onToggle}>
|
||||
Show Backend Logs ({logs.length})
|
||||
</button>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="backend-logs">
|
||||
<div className="logs-header">
|
||||
<h3>Backend Logs</h3>
|
||||
<button className="logs-close-button" onClick={onToggle}>
|
||||
Hide
|
||||
</button>
|
||||
</div>
|
||||
<div className="logs-container">
|
||||
{logs.length === 0 ? (
|
||||
<p className="logs-empty">No logs yet...</p>
|
||||
) : (
|
||||
logs.map((log, index) => (
|
||||
<pre key={index} className="log-line">
|
||||
{log}
|
||||
</pre>
|
||||
))
|
||||
)}
|
||||
<div ref={logsEndRef} />
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
Reference in New Issue
Block a user