feat: Meeting transcription app with WhisperX and Llama #1

Open
naomi wants to merge 17 commits from feat/prototype into main
11 changed files with 187 additions and 149 deletions
Showing only changes of commit f9cb13f53f - Show all commits
+2 -2
View File
@@ -6,8 +6,8 @@
"scripts": {
"dev": "vite",
"build": "tsc && vite build",
"lint": "eslint .",
"lint:fix": "eslint . --fix",
"lint": "eslint src",
"lint:fix": "eslint src --fix",
"format": "prettier --write .",
"format:check": "prettier --check .",
"preview": "vite preview",
+1
View File
@@ -531,6 +531,7 @@ dependencies = [
"tauri",
"tauri-build",
"tauri-plugin-opener",
"tempfile",
"thiserror 2.0.18",
"tokio",
"tracing",
+3
View File
@@ -67,6 +67,9 @@ windows = { version = "0.62", features = [
"Win32_UI_Shell_PropertiesSystem",
] }
[dev-dependencies]
tempfile = "3.10"
[patch.crates-io]
# Fix cross-compilation: use CARGO_CFG_TARGET_OS instead of cfg!(windows)
llama-cpp-sys-2 = { path = "../patches/llama-cpp-sys-2" }
+1 -1
View File
@@ -487,7 +487,7 @@ fn check_ready(state: State<'_, AppState>) -> Result<bool, String> {
// At minimum, we need the summarizer loaded
// Whisper can be loaded on first use
let ready = summarizer.as_ref().map_or(false, |s| s.is_loaded());
let ready = summarizer.as_ref().is_some_and(|s| s.is_loaded());
Ok(ready)
}
+1 -1
View File
@@ -158,7 +158,7 @@ impl LlamaSummarizer {
for i in 0..max_tokens {
// Sample the next token using the sampler
// The sampler.sample() takes context and the index of the last token in the batch
let token = sampler.sample(&ctx, (batch.n_tokens() - 1) as i32);
let token = sampler.sample(&ctx, batch.n_tokens() - 1);
// Check for end of generation
if model.is_eog_token(token) {
+1
View File
@@ -415,6 +415,7 @@ mod tests {
start_sample: 16000,
end_sample: 32000,
speaker_id: 0,
fingerprint: None,
};
assert!((segment.start_seconds() - 1.0).abs() < 0.001);
+1 -3
View File
@@ -37,9 +37,7 @@
"installerIcon": "icons/icon.ico"
}
},
"resources": [
"resources/"
],
"resources": ["resources/"],
"icon": [
"icons/32x32.png",
"icons/128x128.png",
+11 -11
View File
@@ -5,21 +5,21 @@
font-weight: 400;
/* Witchy Purple Rose Palette */
--witch-purple: #2B1B3D;
--witch-plum: #44275A;
--witch-rose: #A8577E;
--witch-mauve: #D4A5C7;
--witch-lavender: #E8D5E8;
--witch-black: #0A0009;
--witch-silver: #C0C0C0;
--witch-moon: #F5F5F5;
--witch-purple: #2b1b3d;
--witch-plum: #44275a;
--witch-rose: #a8577e;
--witch-mauve: #d4a5c7;
--witch-lavender: #e8d5e8;
--witch-black: #0a0009;
--witch-silver: #c0c0c0;
--witch-moon: #f5f5f5;
--witch-shadow: rgba(10, 0, 9, 0.7);
/* Theme mappings */
--primary-color: var(--witch-rose);
--primary-hover: var(--witch-plum);
--secondary-color: var(--witch-mauve);
--danger-color: #D4658E;
--danger-color: #d4658e;
--bg-color: var(--witch-black);
--surface-color: var(--witch-purple);
--text-color: var(--witch-moon);
@@ -156,7 +156,7 @@ body {
}
.record-button.recording:hover {
background-color: #B94A6D;
background-color: #b94a6d;
}
.recording-indicator {
@@ -514,7 +514,7 @@ body {
flex: 1;
overflow-y: auto;
padding: 0.5rem 1rem;
font-family: 'Consolas', 'Monaco', 'Courier New', monospace;
font-family: "Consolas", "Monaco", "Courier New", monospace;
font-size: 0.75rem;
background-color: var(--witch-black);
color: var(--witch-silver);
+164 -126
View File
@@ -1,4 +1,4 @@
import { useState, useEffect, useRef } from "react";
import { useState, useEffect, useRef, useCallback } from "react";
import { invoke } from "@tauri-apps/api/core";
import { listen, type UnlistenFn } from "@tauri-apps/api/event";
import { getVersion } from "@tauri-apps/api/app";
@@ -42,7 +42,14 @@ interface Recording {
summaryProgress?: number;
}
type AppState = "checking" | "downloading_models" | "initializing" | "ready" | "recording" | "transcribing" | "error";
type AppState =
| "checking"
| "downloading_models"
| "initializing"
| "ready"
| "recording"
| "transcribing"
| "error";
function App() {
const [recordings, setRecordings] = useState<Recording[]>([]);
@@ -63,17 +70,75 @@ function App() {
const transcriptionProgressUnlisten = useRef<UnlistenFn | null>(null);
const summaryProgressUnlisten = useRef<UnlistenFn | null>(null);
// Define initializeApp before using it
const initializeApp = useCallback(async () => {
try {
setAppState("checking");
setStatusMessage("Checking if models are present...");
const modelsPresent = await invoke<boolean>("check_models");
if (!modelsPresent) {
setAppState("downloading_models");
setStatusMessage("Downloading AI models (~2GB)...");
setShowLogs(true);
await invoke("download_models");
// Check again after download
const modelsVerified = await invoke<boolean>("check_models");
if (!modelsVerified) {
throw new Error(
"Download completed but model file not found. This might be a path or permissions issue."
);
}
}
// Initialize ML models
setAppState("initializing");
setStatusMessage("Loading AI models (this may take a moment)...");
setShowLogs(true);
await invoke("initialize_models");
const ready = await invoke<boolean>("check_ready");
if (!ready) {
throw new Error("Models failed to initialize. Check logs for details.");
}
setAppState("ready");
setStatusMessage("");
setShowLogs(false);
// Load saved recordings
try {
const savedRecordings = await invoke<StoredRecording[]>("load_recordings");
const loadedRecordings = savedRecordings.map(storedToFrontend);
setRecordings(loadedRecordings);
console.log(`Loaded ${loadedRecordings.length} transcripts from storage`);
} catch (loadError) {
console.error("Failed to load transcripts:", loadError);
// Don't fail app init if we can't load transcripts
}
} catch (error) {
console.error("Initialization failed:", error);
setAppState("error");
setErrorMessage(String(error));
setShowLogs(true);
}
}, []);
useEffect(() => {
if (initStarted.current) return;
initStarted.current = true;
// Get app version
getVersion().then(version => {
getVersion().then((version) => {
setAppVersion(version);
});
initializeApp();
}, []);
}, [initializeApp]);
// Helper function to convert stored recording to frontend format
const storedToFrontend = (stored: StoredRecording): Recording => ({
@@ -119,17 +184,22 @@ function App() {
useEffect(() => {
const setupListeners = async () => {
// Listen for transcription progress
transcriptionProgressUnlisten.current = await listen<number>('transcription-progress', (event) => {
setTranscriptionProgress(event.payload * 100); // Convert 0-1 to 0-100
});
transcriptionProgressUnlisten.current = await listen<number>(
"transcription-progress",
(event) => {
setTranscriptionProgress(event.payload * 100); // Convert 0-1 to 0-100
}
);
// Listen for summary progress
summaryProgressUnlisten.current = await listen<number>('summary-progress', (event) => {
summaryProgressUnlisten.current = await listen<number>("summary-progress", (event) => {
const recordingId = selectedRecordingId || recordings[0]?.id;
if (recordingId) {
setRecordings(prev => prev.map(r =>
r.id === recordingId ? { ...r, summaryProgress: event.payload * 100 } : r
));
setRecordings((prev) =>
prev.map((r) =>
r.id === recordingId ? { ...r, summaryProgress: event.payload * 100 } : r
)
);
}
});
};
@@ -137,62 +207,6 @@ function App() {
setupListeners();
}, [selectedRecordingId, recordings]);
const initializeApp = async () => {
try {
setAppState("checking");
setStatusMessage("Checking if models are present...");
const modelsPresent = await invoke<boolean>("check_models");
if (!modelsPresent) {
setAppState("downloading_models");
setStatusMessage("Downloading AI models (~2GB)...");
setShowLogs(true);
await invoke("download_models");
// Check again after download
const modelsVerified = await invoke<boolean>("check_models");
if (!modelsVerified) {
throw new Error("Download completed but model file not found. This might be a path or permissions issue.");
}
}
// Initialize ML models
setAppState("initializing");
setStatusMessage("Loading AI models (this may take a moment)...");
setShowLogs(true);
await invoke("initialize_models");
// Check if models are ready
const ready = await invoke<boolean>("check_ready");
if (!ready) {
throw new Error("Models failed to initialize. Check logs for details.");
}
setAppState("ready");
setStatusMessage("");
setShowLogs(false);
// Load saved recordings
try {
const savedRecordings = await invoke<StoredRecording[]>("load_recordings");
const loadedRecordings = savedRecordings.map(storedToFrontend);
setRecordings(loadedRecordings);
console.log(`Loaded ${loadedRecordings.length} transcripts from storage`);
} catch (loadError) {
console.error("Failed to load transcripts:", loadError);
// Don't fail app init if we can't load transcripts
}
} catch (error) {
console.error("Initialization failed:", error);
setAppState("error");
setErrorMessage(String(error));
setShowLogs(true);
}
};
const retrySetup = () => {
setErrorMessage(null);
initStarted.current = false;
@@ -203,7 +217,7 @@ function App() {
try {
// Get the next chunk of audio
const [audioChunk, newOffset] = await invoke<[number[], number]>("get_audio_chunk", {
lastOffset: audioOffset.current
lastOffset: audioOffset.current,
});
// If we have enough audio (at least 5 seconds worth at 16kHz)
@@ -212,22 +226,26 @@ function App() {
const chunkStartTime = totalProcessedSamples.current / 16000;
const newSegments = await invoke<TranscriptSegment[]>("transcribe_chunk", {
audioData: audioChunk,
chunkStartTime: chunkStartTime
chunkStartTime: chunkStartTime,
});
if (newSegments.length > 0) {
// Calculate timestamps based on total processed samples
const baseTime = totalProcessedSamples.current / 16000;
const adjustedSegments = newSegments.map(seg => ({
const adjustedSegments = newSegments.map((seg) => ({
...seg,
start: seg.start + baseTime,
end: seg.end + baseTime,
}));
setActiveRecording(prev => prev ? {
...prev,
transcriptSegments: [...prev.transcriptSegments, ...adjustedSegments]
} : null);
setActiveRecording((prev) =>
prev
? {
...prev,
transcriptSegments: [...prev.transcriptSegments, ...adjustedSegments],
}
: null
);
}
// Track total processed samples
@@ -264,7 +282,7 @@ function App() {
// Start timer to show recording duration
recordingTimer.current = window.setInterval(() => {
setRecordingDuration(d => d + 1);
setRecordingDuration((d) => d + 1);
}, 1000);
// Start real-time transcription timer (every 5 seconds)
@@ -297,7 +315,7 @@ function App() {
// First, process any audio that hasn't been processed yet
try {
const finalChunk = await invoke<number[]>("get_remaining_audio", {
lastOffset: audioOffset.current
lastOffset: audioOffset.current,
});
if (finalChunk.length > 0) {
@@ -306,21 +324,25 @@ function App() {
// The progress will be updated via events from the backend
const finalSegments = await invoke<TranscriptSegment[]>("transcribe_chunk", {
audioData: finalChunk,
chunkStartTime: chunkStartTime
chunkStartTime: chunkStartTime,
});
if (finalSegments.length > 0) {
const baseTime = totalProcessedSamples.current / 16000;
const adjustedSegments = finalSegments.map(seg => ({
const adjustedSegments = finalSegments.map((seg) => ({
...seg,
start: seg.start + baseTime,
end: seg.end + baseTime,
}));
setActiveRecording(prev => prev ? {
...prev,
transcriptSegments: [...prev.transcriptSegments, ...adjustedSegments]
} : null);
setActiveRecording((prev) =>
prev
? {
...prev,
transcriptSegments: [...prev.transcriptSegments, ...adjustedSegments],
}
: null
);
}
}
} catch (chunkError) {
@@ -335,16 +357,16 @@ function App() {
if (activeRecording) {
const finalRecording = {
...activeRecording,
duration: recordingDuration
duration: recordingDuration,
};
setRecordings(prev => [finalRecording, ...prev]);
setRecordings((prev) => [finalRecording, ...prev]);
setSelectedRecordingId(finalRecording.id);
setActiveRecording(null);
// Save to persistent storage
try {
await invoke("save_recording", {
recording: frontendToStored(finalRecording)
recording: frontendToStored(finalRecording),
});
console.log("Transcript saved to storage");
} catch (saveError) {
@@ -354,7 +376,7 @@ function App() {
}
// Brief delay to show completion
await new Promise(resolve => setTimeout(resolve, 500));
await new Promise((resolve) => setTimeout(resolve, 500));
setAppState("ready");
setStatusMessage("");
@@ -368,13 +390,15 @@ function App() {
};
const generateSummary = async (recordingId: string) => {
const recording = recordings.find(r => r.id === recordingId);
const recording = recordings.find((r) => r.id === recordingId);
if (!recording || recording.transcriptSegments.length === 0) return;
// Update the recording to show it's generating
setRecordings(prev => prev.map(r =>
r.id === recordingId ? { ...r, isGeneratingSummary: true, summaryProgress: 0 } : r
));
setRecordings((prev) =>
prev.map((r) =>
r.id === recordingId ? { ...r, isGeneratingSummary: true, summaryProgress: 0 } : r
)
);
const fullTranscript = recording.transcriptSegments
.map((seg) => `${seg.speaker}: ${seg.text}`)
@@ -382,27 +406,27 @@ function App() {
try {
// Progress will be updated via events from the backend
const [summaryResult, titleResult] = await invoke<[string, string | null]>("summarize", { transcript: fullTranscript });
const [summaryResult, titleResult] = await invoke<[string, string | null]>("summarize", {
transcript: fullTranscript,
});
// Update the recording with the summary and title
const updatedRecording = recordings.find(r => r.id === recordingId);
const updatedRecording = recordings.find((r) => r.id === recordingId);
if (updatedRecording) {
const recordingWithSummary = {
...updatedRecording,
summary: summaryResult,
title: titleResult,
isGeneratingSummary: false,
summaryProgress: 100
summaryProgress: 100,
};
setRecordings(prev => prev.map(r =>
r.id === recordingId ? recordingWithSummary : r
));
setRecordings((prev) => prev.map((r) => (r.id === recordingId ? recordingWithSummary : r)));
// Update in persistent storage
try {
await invoke("update_recording", {
recording: frontendToStored(recordingWithSummary)
recording: frontendToStored(recordingWithSummary),
});
console.log("Transcript updated with summary");
} catch (updateError) {
@@ -411,9 +435,9 @@ function App() {
// Clear progress after a brief delay
setTimeout(() => {
setRecordings(prev => prev.map(r =>
r.id === recordingId ? { ...r, summaryProgress: undefined } : r
));
setRecordings((prev) =>
prev.map((r) => (r.id === recordingId ? { ...r, summaryProgress: undefined } : r))
);
}, 1000);
}
} catch (error) {
@@ -421,9 +445,13 @@ function App() {
setErrorMessage(String(error));
// Reset generating state on error
setRecordings(prev => prev.map(r =>
r.id === recordingId ? { ...r, isGeneratingSummary: false, summaryProgress: undefined } : r
));
setRecordings((prev) =>
prev.map((r) =>
r.id === recordingId
? { ...r, isGeneratingSummary: false, summaryProgress: undefined }
: r
)
);
}
};
@@ -438,7 +466,9 @@ function App() {
const deleteRecording = async (recordingId: string) => {
// Confirm deletion
if (!confirm("Are you sure you want to delete this transcript? This action cannot be undone.")) {
if (
!confirm("Are you sure you want to delete this transcript? This action cannot be undone.")
) {
return;
}
@@ -447,7 +477,7 @@ function App() {
await invoke("delete_recording", { recordingId });
// Remove from state
setRecordings(prev => prev.filter(r => r.id !== recordingId));
setRecordings((prev) => prev.filter((r) => r.id !== recordingId));
// Clear selection if we deleted the selected recording
if (selectedRecordingId === recordingId) {
@@ -462,7 +492,7 @@ function App() {
};
const downloadTranscript = (recordingId: string) => {
const recording = recordings.find(r => r.id === recordingId);
const recording = recordings.find((r) => r.id === recordingId);
if (!recording) return;
const content = recording.transcriptSegments
@@ -479,7 +509,7 @@ function App() {
};
const downloadSummary = (recordingId: string) => {
const recording = recordings.find(r => r.id === recordingId);
const recording = recordings.find((r) => r.id === recordingId);
if (!recording || !recording.summary) return;
const blob = new Blob([recording.summary], { type: "text/plain" });
@@ -504,8 +534,9 @@ function App() {
};
// Get the currently selected recording or active recording
const displayedRecording = activeRecording ||
(selectedRecordingId ? recordings.find(r => r.id === selectedRecordingId) : null);
const displayedRecording =
activeRecording ||
(selectedRecordingId ? recordings.find((r) => r.id === selectedRecordingId) : null);
const renderSetupScreen = () => (
<div className="setup-screen">
@@ -540,9 +571,7 @@ function App() {
<li>Whisper - Speech recognition</li>
<li>VAD - Speaker detection</li>
</ul>
<p className="setup-note">
This is a pure Rust backend - no Python required!
</p>
<p className="setup-note">This is a pure Rust backend - no Python required!</p>
</div>
)}
@@ -555,9 +584,9 @@ function App() {
</div>
)}
{(appState === "checking" || appState === "initializing" || appState === "downloading_models") && (
<div className="loading-spinner" />
)}
{(appState === "checking" ||
appState === "initializing" ||
appState === "downloading_models") && <div className="loading-spinner" />}
</div>
<BackendLogs isVisible={showLogs} onToggle={() => setShowLogs(!showLogs)} />
@@ -589,13 +618,9 @@ function App() {
{appState === "transcribing" && (
<div className="transcribing-indicator">
<ProgressBar
progress={transcriptionProgress}
label="Processing final audio..."
/>
<ProgressBar progress={transcriptionProgress} label="Processing final audio..." />
</div>
)}
</section>
);
@@ -606,17 +631,18 @@ function App() {
<p className="no-transcripts">No transcripts yet</p>
) : (
<ul className="transcript-items">
{recordings.map(recording => (
{recordings.map((recording) => (
<li
key={recording.id}
className={`transcript-item ${selectedRecordingId === recording.id ? 'selected' : ''}`}
className={`transcript-item ${selectedRecordingId === recording.id ? "selected" : ""}`}
>
<div
className="transcript-content"
onClick={() => setSelectedRecordingId(recording.id)}
>
<div className="transcript-title">
{recording.title || `${recording.timestamp.toLocaleTimeString()} - ${formatDuration(recording.duration)}`}
{recording.title ||
`${recording.timestamp.toLocaleTimeString()} - ${formatDuration(recording.duration)}`}
</div>
{recording.title && (
<div className="transcript-time">
@@ -624,7 +650,11 @@ function App() {
</div>
)}
<div className="transcript-status">
{recording.summary ? '✓ Summary' : recording.isGeneratingSummary ? '⏳ Summarizing...' : ''}
{recording.summary
? "âś“ Summary"
: recording.isGeneratingSummary
? "⏳ Summarizing..."
: ""}
</div>
</div>
<button
@@ -645,7 +675,12 @@ function App() {
);
// Show setup screen for non-ready states
if (appState === "checking" || appState === "downloading_models" || appState === "initializing" || appState === "error") {
if (
appState === "checking" ||
appState === "downloading_models" ||
appState === "initializing" ||
appState === "error"
) {
return (
<main className="container">
<header className="app-header">
@@ -678,7 +713,10 @@ function App() {
{displayedRecording && (
<div className="transcript-details">
<div className="transcript-header">
<h2>{displayedRecording.title || `Transcript from ${displayedRecording.timestamp.toLocaleString()}`}</h2>
<h2>
{displayedRecording.title ||
`Transcript from ${displayedRecording.timestamp.toLocaleString()}`}
</h2>
<div className="transcript-actions">
<button
className="secondary-button"
+1 -1
View File
@@ -16,4 +16,4 @@ export function ProgressBar({ progress, label }: ProgressBarProps) {
<div className="progress-text">{Math.round(progress)}%</div>
</div>
);
}
}
+1 -4
View File
@@ -16,10 +16,7 @@ export function SummaryDisplay({ summary, isLoading, progress }: SummaryDisplayP
{isLoading ? (
<div className="loading">
{progress !== undefined ? (
<ProgressBar
progress={progress}
label="Generating summary..."
/>
<ProgressBar progress={progress} label="Generating summary..." />
) : (
<>
<div className="spinner"></div>