feat: Meeting transcription app with WhisperX and Llama #1

Open
naomi wants to merge 17 commits from feat/prototype into main
4 changed files with 457 additions and 42 deletions
Showing only changes of commit 9bf92d3365 - Show all commits
+83
View File
@@ -12,11 +12,13 @@ use tauri::{Emitter, Manager, State};
use tracing::{debug, info}; use tracing::{debug, info};
pub mod ml; pub mod ml;
pub mod storage;
use ml::summarizer::{get_model_filename, LlamaSummarizer}; use ml::summarizer::{get_model_filename, LlamaSummarizer};
use ml::transcriber::{TranscriptSegment, WhisperTranscriber}; use ml::transcriber::{TranscriptSegment, WhisperTranscriber};
use ml::vad::SpeakerSeparator; use ml::vad::SpeakerSeparator;
use ml::audio::AudioCapture; use ml::audio::AudioCapture;
use storage::{RecordingStorage, StoredRecording};
/// Application state containing the ML models and audio capture. /// Application state containing the ML models and audio capture.
struct AppState { struct AppState {
@@ -25,6 +27,7 @@ struct AppState {
speaker_separator: Mutex<Option<SpeakerSeparator>>, speaker_separator: Mutex<Option<SpeakerSeparator>>,
audio_capture: Mutex<Option<AudioCapture>>, audio_capture: Mutex<Option<AudioCapture>>,
logs: Arc<Mutex<Vec<String>>>, logs: Arc<Mutex<Vec<String>>>,
storage: Mutex<Option<RecordingStorage>>,
} }
impl AppState { impl AppState {
@@ -35,6 +38,7 @@ impl AppState {
speaker_separator: Mutex::new(None), speaker_separator: Mutex::new(None),
audio_capture: Mutex::new(None), audio_capture: Mutex::new(None),
logs: Arc::new(Mutex::new(Vec::new())), logs: Arc::new(Mutex::new(Vec::new())),
storage: Mutex::new(None),
} }
} }
} }
@@ -273,6 +277,20 @@ async fn initialize_models(
} }
} }
// Initialize storage
emit_log(&app_handle, &logs, "[Init] Initializing recording storage...");
if let Ok(app_data_dir) = app_handle.path().app_data_dir() {
match RecordingStorage::new(&app_data_dir) {
Ok(storage) => {
*state.storage.lock() = Some(storage);
emit_log(&app_handle, &logs, "[Init] Recording storage initialized successfully");
}
Err(e) => {
emit_log(&app_handle, &logs, &format!("[Init WARNING] Storage initialization failed: {}", e));
}
}
}
emit_log(&app_handle, &logs, "[Init] Model initialization complete"); emit_log(&app_handle, &logs, "[Init] Model initialization complete");
Ok("Models initialized".to_string()) Ok("Models initialized".to_string())
} }
@@ -501,6 +519,67 @@ fn check_ready(state: State<'_, AppState>) -> Result<bool, String> {
Ok(ready) Ok(ready)
} }
/// Save a recording to persistent storage.
#[tauri::command]
fn save_recording(
state: State<'_, AppState>,
recording: StoredRecording,
) -> Result<(), String> {
let storage_guard = state.storage.lock();
let storage = storage_guard.as_ref()
.ok_or("Storage not initialized")?;
storage.save_recording(&recording)
.map_err(|e| format!("Failed to save recording: {}", e))?;
Ok(())
}
/// Load all recordings from persistent storage.
#[tauri::command]
fn load_recordings(
state: State<'_, AppState>,
) -> Result<Vec<StoredRecording>, String> {
let storage_guard = state.storage.lock();
let storage = storage_guard.as_ref()
.ok_or("Storage not initialized")?;
storage.load_all_recordings()
.map_err(|e| format!("Failed to load recordings: {}", e))
}
/// Delete a recording from persistent storage.
#[tauri::command]
fn delete_recording(
state: State<'_, AppState>,
recording_id: String,
) -> Result<(), String> {
let storage_guard = state.storage.lock();
let storage = storage_guard.as_ref()
.ok_or("Storage not initialized")?;
storage.delete_recording(&recording_id)
.map_err(|e| format!("Failed to delete recording: {}", e))?;
Ok(())
}
/// Update a recording (e.g., to add summary).
#[tauri::command]
fn update_recording(
state: State<'_, AppState>,
recording: StoredRecording,
) -> Result<(), String> {
let storage_guard = state.storage.lock();
let storage = storage_guard.as_ref()
.ok_or("Storage not initialized")?;
storage.update_recording(&recording)
.map_err(|e| format!("Failed to update recording: {}", e))?;
Ok(())
}
#[cfg_attr(mobile, tauri::mobile_entry_point)] #[cfg_attr(mobile, tauri::mobile_entry_point)]
pub fn run() { pub fn run() {
// Initialize tracing // Initialize tracing
@@ -523,6 +602,10 @@ pub fn run() {
summarize, summarize,
get_backend_logs, get_backend_logs,
check_ready, check_ready,
save_recording,
load_recordings,
delete_recording,
update_recording,
]) ])
.run(tauri::generate_context!()) .run(tauri::generate_context!())
.expect("error while running tauri application"); .expect("error while running tauri application");
+182
View File
@@ -0,0 +1,182 @@
//! Storage module for persisting recording history.
use serde::{Deserialize, Serialize};
use std::fs;
use std::path::{Path, PathBuf};
use thiserror::Error;
use tracing::{debug, info, warn};
#[derive(Error, Debug)]
pub enum StorageError {
#[error("IO error: {0}")]
IoError(#[from] std::io::Error),
#[error("Serialization error: {0}")]
SerializationError(#[from] serde_json::Error),
#[error("Recording not found: {0}")]
RecordingNotFound(String),
}
/// A transcript segment with timing information.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StoredTranscriptSegment {
pub start: f64,
pub end: f64,
pub text: String,
pub speaker: String,
}
/// A stored recording with all its data.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StoredRecording {
pub id: String,
pub timestamp: String, // ISO 8601 timestamp
pub duration: f64,
pub transcript_segments: Vec<StoredTranscriptSegment>,
pub summary: Option<String>,
}
/// Storage manager for recordings.
pub struct RecordingStorage {
storage_dir: PathBuf,
}
impl RecordingStorage {
/// Create a new storage manager.
pub fn new(app_data_dir: &Path) -> Result<Self, StorageError> {
let storage_dir = app_data_dir.join("recordings");
// Ensure the directory exists
fs::create_dir_all(&storage_dir)?;
info!("Recording storage initialized at: {}", storage_dir.display());
Ok(Self { storage_dir })
}
/// Save a recording to disk.
pub fn save_recording(&self, recording: &StoredRecording) -> Result<(), StorageError> {
let file_path = self.storage_dir.join(format!("{}.json", recording.id));
debug!("Saving recording {} to {}", recording.id, file_path.display());
let json_data = serde_json::to_string_pretty(recording)?;
fs::write(&file_path, json_data)?;
info!("Recording {} saved successfully", recording.id);
Ok(())
}
/// Load all recordings from disk.
pub fn load_all_recordings(&self) -> Result<Vec<StoredRecording>, StorageError> {
let mut recordings = Vec::new();
debug!("Loading recordings from {}", self.storage_dir.display());
// Read all JSON files in the directory
if let Ok(entries) = fs::read_dir(&self.storage_dir) {
for entry in entries.flatten() {
let path = entry.path();
// Skip non-JSON files
if path.extension().and_then(|s| s.to_str()) != Some("json") {
continue;
}
// Try to load the recording
match fs::read_to_string(&path) {
Ok(contents) => {
match serde_json::from_str::<StoredRecording>(&contents) {
Ok(recording) => {
recordings.push(recording);
}
Err(e) => {
warn!("Failed to parse recording file {}: {}", path.display(), e);
}
}
}
Err(e) => {
warn!("Failed to read recording file {}: {}", path.display(), e);
}
}
}
}
// Sort by timestamp (newest first)
recordings.sort_by(|a, b| b.timestamp.cmp(&a.timestamp));
info!("Loaded {} recordings", recordings.len());
Ok(recordings)
}
/// Delete a specific recording.
pub fn delete_recording(&self, recording_id: &str) -> Result<(), StorageError> {
let file_path = self.storage_dir.join(format!("{}.json", recording_id));
if !file_path.exists() {
return Err(StorageError::RecordingNotFound(recording_id.to_string()));
}
debug!("Deleting recording {} at {}", recording_id, file_path.display());
fs::remove_file(&file_path)?;
info!("Recording {} deleted successfully", recording_id);
Ok(())
}
/// Update an existing recording (e.g., to add summary).
pub fn update_recording(&self, recording: &StoredRecording) -> Result<(), StorageError> {
// For now, just overwrite the file
self.save_recording(recording)
}
/// Check if a recording exists.
pub fn recording_exists(&self, recording_id: &str) -> bool {
let file_path = self.storage_dir.join(format!("{}.json", recording_id));
file_path.exists()
}
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
#[test]
fn test_storage_operations() {
let temp_dir = TempDir::new().unwrap();
let storage = RecordingStorage::new(temp_dir.path()).unwrap();
// Create a test recording
let recording = StoredRecording {
id: "test123".to_string(),
timestamp: "2024-01-01T12:00:00Z".to_string(),
duration: 60.0,
transcript_segments: vec![
StoredTranscriptSegment {
start: 0.0,
end: 5.0,
text: "Hello world".to_string(),
speaker: "Speaker 1".to_string(),
},
],
summary: Some("Test summary".to_string()),
};
// Save it
storage.save_recording(&recording).unwrap();
assert!(storage.recording_exists("test123"));
// Load it back
let recordings = storage.load_all_recordings().unwrap();
assert_eq!(recordings.len(), 1);
assert_eq!(recordings[0].id, "test123");
// Delete it
storage.delete_recording("test123").unwrap();
assert!(!storage.recording_exists("test123"));
// Verify it's gone
let recordings = storage.load_all_recordings().unwrap();
assert_eq!(recordings.len(), 0);
}
}
+51 -15
View File
@@ -604,7 +604,7 @@ body {
min-height: 0; min-height: 0;
} }
.recordings-list { .transcripts-list {
width: 300px; width: 300px;
background-color: var(--surface-color); background-color: var(--surface-color);
border-radius: 0.5rem; border-radius: 0.5rem;
@@ -612,24 +612,27 @@ body {
overflow-y: auto; overflow-y: auto;
} }
.recordings-list h3 { .transcripts-list h3 {
margin-bottom: 1rem; margin-bottom: 1rem;
font-size: 1.125rem; font-size: 1.125rem;
} }
.no-recordings { .no-transcripts {
color: var(--text-secondary); color: var(--text-secondary);
font-style: italic; font-style: italic;
} }
.recordings-items { .transcript-items {
list-style: none; list-style: none;
display: flex; display: flex;
flex-direction: column; flex-direction: column;
gap: 0.5rem; gap: 0.5rem;
} }
.recording-item { .transcript-item {
display: flex;
align-items: center;
justify-content: space-between;
padding: 0.75rem; padding: 0.75rem;
border-radius: 0.375rem; border-radius: 0.375rem;
cursor: pointer; cursor: pointer;
@@ -638,34 +641,67 @@ body {
border: 1px solid var(--border-color); border: 1px solid var(--border-color);
} }
.recording-item:hover { .transcript-content {
flex: 1;
padding-right: 0.5rem;
}
.transcript-item:hover {
background-color: var(--surface-color); background-color: var(--surface-color);
} }
.recording-item.selected { .transcript-item.selected {
background-color: var(--primary-color); background-color: var(--primary-color);
color: white; color: white;
} }
.recording-item.selected .recording-time { .transcript-item.selected .transcript-time {
color: white; color: white;
} }
.recording-item.selected .recording-status { .transcript-item.selected .transcript-status {
color: rgba(255, 255, 255, 0.8); color: rgba(255, 255, 255, 0.8);
} }
.recording-time { .transcript-time {
font-weight: 500; font-weight: 500;
font-size: 0.875rem; font-size: 0.875rem;
} }
.recording-status { .transcript-status {
font-size: 0.75rem; font-size: 0.75rem;
color: var(--text-secondary); color: var(--text-secondary);
margin-top: 0.25rem; margin-top: 0.25rem;
} }
.transcript-item .delete-button {
padding: 0.375rem 0.5rem;
font-size: 0.875rem;
background-color: transparent;
border: 1px solid var(--border-color);
border-radius: 0.25rem;
cursor: pointer;
transition: all 0.2s;
opacity: 0.7;
}
.transcript-item .delete-button:hover {
background-color: var(--danger-color);
border-color: var(--danger-color);
color: white;
opacity: 1;
}
.transcript-item.selected .delete-button {
border-color: rgba(255, 255, 255, 0.5);
color: white;
}
.transcript-item.selected .delete-button:hover {
background-color: var(--danger-color);
border-color: var(--danger-color);
}
.main-content { .main-content {
flex: 1; flex: 1;
display: flex; display: flex;
@@ -674,14 +710,14 @@ body {
min-width: 0; min-width: 0;
} }
.recording-details { .transcript-details {
flex: 1; flex: 1;
display: flex; display: flex;
flex-direction: column; flex-direction: column;
gap: 1.5rem; gap: 1.5rem;
} }
.recording-header { .transcript-header {
display: flex; display: flex;
justify-content: space-between; justify-content: space-between;
align-items: center; align-items: center;
@@ -689,12 +725,12 @@ body {
gap: 1rem; gap: 1rem;
} }
.recording-header h2 { .transcript-header h2 {
font-size: 1.25rem; font-size: 1.25rem;
margin: 0; margin: 0;
} }
.recording-actions { .transcript-actions {
display: flex; display: flex;
gap: 0.75rem; gap: 0.75rem;
flex-wrap: wrap; flex-wrap: wrap;
+141 -27
View File
@@ -14,6 +14,21 @@ interface TranscriptSegment {
speaker: string; speaker: string;
} }
interface StoredTranscriptSegment {
start: number;
end: number;
text: string;
speaker: string;
}
interface StoredRecording {
id: string;
timestamp: string; // ISO 8601 string
duration: number;
transcript_segments: StoredTranscriptSegment[];
summary: string | null;
}
interface Recording { interface Recording {
id: string; id: string;
timestamp: Date; timestamp: Date;
@@ -51,6 +66,26 @@ function App() {
initializeApp(); initializeApp();
}, []); }, []);
// Helper function to convert stored recording to frontend format
const storedToFrontend = (stored: StoredRecording): Recording => ({
id: stored.id,
timestamp: new Date(stored.timestamp),
duration: stored.duration,
transcriptSegments: stored.transcript_segments,
summary: stored.summary,
isGeneratingSummary: false,
summaryProgress: undefined,
});
// Helper function to convert frontend recording to stored format
const frontendToStored = (recording: Recording): StoredRecording => ({
id: recording.id,
timestamp: recording.timestamp.toISOString(),
duration: recording.duration,
transcript_segments: recording.transcriptSegments,
summary: recording.summary,
});
// Cleanup timers and listeners on unmount // Cleanup timers and listeners on unmount
useEffect(() => { useEffect(() => {
return () => { return () => {
@@ -128,6 +163,17 @@ function App() {
setAppState("ready"); setAppState("ready");
setStatusMessage(""); setStatusMessage("");
setShowLogs(false); setShowLogs(false);
// Load saved recordings
try {
const savedRecordings = await invoke<StoredRecording[]>("load_recordings");
const loadedRecordings = savedRecordings.map(storedToFrontend);
setRecordings(loadedRecordings);
console.log(`Loaded ${loadedRecordings.length} transcripts from storage`);
} catch (loadError) {
console.error("Failed to load transcripts:", loadError);
// Don't fail app init if we can't load transcripts
}
} catch (error) { } catch (error) {
console.error("Initialization failed:", error); console.error("Initialization failed:", error);
setAppState("error"); setAppState("error");
@@ -278,6 +324,17 @@ function App() {
setRecordings(prev => [finalRecording, ...prev]); setRecordings(prev => [finalRecording, ...prev]);
setSelectedRecordingId(finalRecording.id); setSelectedRecordingId(finalRecording.id);
setActiveRecording(null); setActiveRecording(null);
// Save to persistent storage
try {
await invoke("save_recording", {
recording: frontendToStored(finalRecording)
});
console.log("Transcript saved to storage");
} catch (saveError) {
console.error("Failed to save transcript:", saveError);
// Don't fail the whole operation if storage fails
}
} }
// Brief delay to show completion // Brief delay to show completion
@@ -312,18 +369,36 @@ function App() {
const summaryResult = await invoke<string>("summarize", { transcript: fullTranscript }); const summaryResult = await invoke<string>("summarize", { transcript: fullTranscript });
// Update the recording with the summary // Update the recording with the summary
setRecordings(prev => prev.map(r => const updatedRecording = recordings.find(r => r.id === recordingId);
r.id === recordingId if (updatedRecording) {
? { ...r, summary: summaryResult, isGeneratingSummary: false, summaryProgress: 100 } const recordingWithSummary = {
: r ...updatedRecording,
)); summary: summaryResult,
isGeneratingSummary: false,
summaryProgress: 100
};
// Clear progress after a brief delay
setTimeout(() => {
setRecordings(prev => prev.map(r => setRecordings(prev => prev.map(r =>
r.id === recordingId ? { ...r, summaryProgress: undefined } : r r.id === recordingId ? recordingWithSummary : r
)); ));
}, 1000);
// Update in persistent storage
try {
await invoke("update_recording", {
recording: frontendToStored(recordingWithSummary)
});
console.log("Transcript updated with summary");
} catch (updateError) {
console.error("Failed to update transcript:", updateError);
}
// Clear progress after a brief delay
setTimeout(() => {
setRecordings(prev => prev.map(r =>
r.id === recordingId ? { ...r, summaryProgress: undefined } : r
));
}, 1000);
}
} catch (error) { } catch (error) {
console.error("Failed to generate summary:", error); console.error("Failed to generate summary:", error);
setErrorMessage(String(error)); setErrorMessage(String(error));
@@ -344,6 +419,31 @@ function App() {
} }
}; };
const deleteRecording = async (recordingId: string) => {
// Confirm deletion
if (!confirm("Are you sure you want to delete this transcript? This action cannot be undone.")) {
return;
}
try {
// Delete from backend storage
await invoke("delete_recording", { recordingId });
// Remove from state
setRecordings(prev => prev.filter(r => r.id !== recordingId));
// Clear selection if we deleted the selected recording
if (selectedRecordingId === recordingId) {
setSelectedRecordingId(null);
}
console.log(`Transcript ${recordingId} deleted`);
} catch (error) {
console.error("Failed to delete transcript:", error);
alert(`Failed to delete transcript: ${error}`);
}
};
const downloadTranscript = (recordingId: string) => { const downloadTranscript = (recordingId: string) => {
const recording = recordings.find(r => r.id === recordingId); const recording = recordings.find(r => r.id === recordingId);
if (!recording) return; if (!recording) return;
@@ -451,7 +551,7 @@ function App() {
<section className="controls-section"> <section className="controls-section">
{appState === "ready" && ( {appState === "ready" && (
<button className="record-button" onClick={startRecording}> <button className="record-button" onClick={startRecording}>
🎙 Start Recording 🎙 Start Transcribing
</button> </button>
)} )}
@@ -465,7 +565,7 @@ function App() {
)} )}
</div> </div>
<button className="stop-button" onClick={stopRecording}> <button className="stop-button" onClick={stopRecording}>
Stop Recording Stop Transcribing
</button> </button>
</div> </div>
)} )}
@@ -483,24 +583,38 @@ function App() {
); );
const renderRecordingsList = () => ( const renderRecordingsList = () => (
<aside className="recordings-list"> <aside className="transcripts-list">
<h3>Recording History</h3> <h3>Transcript History</h3>
{recordings.length === 0 ? ( {recordings.length === 0 ? (
<p className="no-recordings">No recordings yet</p> <p className="no-transcripts">No transcripts yet</p>
) : ( ) : (
<ul className="recordings-items"> <ul className="transcript-items">
{recordings.map(recording => ( {recordings.map(recording => (
<li <li
key={recording.id} key={recording.id}
className={`recording-item ${selectedRecordingId === recording.id ? 'selected' : ''}`} className={`transcript-item ${selectedRecordingId === recording.id ? 'selected' : ''}`}
onClick={() => setSelectedRecordingId(recording.id)}
> >
<div className="recording-time"> <div
{recording.timestamp.toLocaleTimeString()} - {formatDuration(recording.duration)} className="transcript-content"
</div> onClick={() => setSelectedRecordingId(recording.id)}
<div className="recording-status"> >
{recording.summary ? '✓ Summary' : recording.isGeneratingSummary ? '⏳ Summarizing...' : ''} <div className="transcript-time">
{recording.timestamp.toLocaleTimeString()} - {formatDuration(recording.duration)}
</div>
<div className="transcript-status">
{recording.summary ? '✓ Summary' : recording.isGeneratingSummary ? '⏳ Summarizing...' : ''}
</div>
</div> </div>
<button
className="delete-button"
onClick={(e) => {
e.stopPropagation();
deleteRecording(recording.id);
}}
title="Delete transcript"
>
🗑
</button>
</li> </li>
))} ))}
</ul> </ul>
@@ -538,10 +652,10 @@ function App() {
{/* Display selected recording or active recording */} {/* Display selected recording or active recording */}
{displayedRecording && ( {displayedRecording && (
<div className="recording-details"> <div className="transcript-details">
<div className="recording-header"> <div className="transcript-header">
<h2>Recording from {displayedRecording.timestamp.toLocaleString()}</h2> <h2>Transcript from {displayedRecording.timestamp.toLocaleString()}</h2>
<div className="recording-actions"> <div className="transcript-actions">
<button <button
className="secondary-button" className="secondary-button"
onClick={() => downloadTranscript(displayedRecording.id)} onClick={() => downloadTranscript(displayedRecording.id)}
@@ -588,7 +702,7 @@ function App() {
{!displayedRecording && recordings.length === 0 && appState === "ready" && ( {!displayedRecording && recordings.length === 0 && appState === "ready" && (
<div className="empty-state"> <div className="empty-state">
<p>Click "Start Recording" to begin your first meeting transcription!</p> <p>Click "Start Transcribing" to begin transcribing your first meeting!</p>
</div> </div>
)} )}
</div> </div>