feat: handle transcription in real time

2026-01-29 10:31:40 -08:00
parent df8a89e05d
commit e6c19b589e
4 changed files with 166 additions and 5 deletions
@@ -387,6 +387,35 @@ async fn transcribe_chunk(
    Ok(segments)
 }

+/// Get the next chunk of audio for real-time transcription.
+/// Returns the audio chunk and the new offset to use for the next call.
+#[tauri::command]
+async fn get_audio_chunk(
+    state: State<'_, AppState>,
+    last_offset: usize,
+) -> Result<(Vec<f32>, usize), String> {
+    let audio_guard = state.audio_capture.lock();
+    if let Some(ref capture) = *audio_guard {
+        Ok(capture.extract_chunk(last_offset))
+    } else {
+        Err("No active recording".to_string())
+    }
+}
+
+/// Get remaining audio without modifying the buffer (for final processing).
+#[tauri::command]
+async fn get_remaining_audio(
+    state: State<'_, AppState>,
+    last_offset: usize,
+) -> Result<Vec<f32>, String> {
+    let audio_guard = state.audio_capture.lock();
+    if let Some(ref capture) = *audio_guard {
+        Ok(capture.get_remaining_audio(last_offset))
+    } else {
+        Err("No active recording".to_string())
+    }
+}
+
 /// Generate a summary from a transcript.
 #[tauri::command]
 async fn summarize(
@@ -449,6 +478,8 @@ pub fn run() {
            start_recording,
            stop_recording,
            transcribe_chunk,
+            get_audio_chunk,
+            get_remaining_audio,
            summarize,
            get_backend_logs,
            check_ready,
@@ -196,6 +196,51 @@ impl AudioCapture {
        let sample_count = self.buffer.lock().len();
        sample_count as f32 / WHISPER_SAMPLE_RATE as f32
    }
+
+    /// Extract audio chunk for real-time processing.
+    /// This method retrieves audio starting from the given offset and returns only
+    /// the new samples, keeping a small overlap for context.
+    pub fn extract_chunk(&self, from_sample: usize) -> (Vec<f32>, usize) {
+        let mut buffer = self.buffer.lock();
+        let current_len = buffer.len();
+
+        // If we don't have enough new samples, return empty
+        if from_sample >= current_len {
+            return (Vec::new(), current_len);
+        }
+
+        // Extract new samples
+        let chunk: Vec<f32> = buffer[from_sample..].to_vec();
+
+        // Keep only the last 30 seconds of audio (at 16kHz) to prevent memory exhaustion
+        // This provides enough overlap for context while limiting memory usage
+        const MAX_BUFFER_SECONDS: usize = 30;
+        const MAX_BUFFER_SAMPLES: usize = WHISPER_SAMPLE_RATE as usize * MAX_BUFFER_SECONDS;
+
+        if buffer.len() > MAX_BUFFER_SAMPLES {
+            // Calculate how many samples to remove from the beginning
+            let samples_to_remove = buffer.len() - MAX_BUFFER_SAMPLES;
+            buffer.drain(..samples_to_remove);
+
+            // Return the chunk and adjust the offset
+            return (chunk, current_len - samples_to_remove);
+        }
+
+        // Return the chunk and the new offset
+        (chunk, current_len)
+    }
+
+    /// Get all audio samples from the given offset without modifying the buffer.
+    /// This is used when stopping recording to get any remaining audio.
+    pub fn get_remaining_audio(&self, from_sample: usize) -> Vec<f32> {
+        let buffer = self.buffer.lock();
+
+        if from_sample >= buffer.len() {
+            return Vec::new();
+        }
+
+        buffer[from_sample..].to_vec()
+    }
 }

 impl Default for AudioCapture {