feat: handle transcription in real time

This commit is contained in:
2026-01-29 10:31:40 -08:00
parent df8a89e05d
commit e6c19b589e
4 changed files with 166 additions and 5 deletions
+31
View File
@@ -387,6 +387,35 @@ async fn transcribe_chunk(
Ok(segments)
}
/// Get the next chunk of audio for real-time transcription.
/// Returns the audio chunk and the new offset to use for the next call.
#[tauri::command]
async fn get_audio_chunk(
state: State<'_, AppState>,
last_offset: usize,
) -> Result<(Vec<f32>, usize), String> {
let audio_guard = state.audio_capture.lock();
if let Some(ref capture) = *audio_guard {
Ok(capture.extract_chunk(last_offset))
} else {
Err("No active recording".to_string())
}
}
/// Get remaining audio without modifying the buffer (for final processing).
#[tauri::command]
async fn get_remaining_audio(
state: State<'_, AppState>,
last_offset: usize,
) -> Result<Vec<f32>, String> {
let audio_guard = state.audio_capture.lock();
if let Some(ref capture) = *audio_guard {
Ok(capture.get_remaining_audio(last_offset))
} else {
Err("No active recording".to_string())
}
}
/// Generate a summary from a transcript.
#[tauri::command]
async fn summarize(
@@ -449,6 +478,8 @@ pub fn run() {
start_recording,
stop_recording,
transcribe_chunk,
get_audio_chunk,
get_remaining_audio,
summarize,
get_backend_logs,
check_ready,
+45
View File
@@ -196,6 +196,51 @@ impl AudioCapture {
let sample_count = self.buffer.lock().len();
sample_count as f32 / WHISPER_SAMPLE_RATE as f32
}
/// Extract audio chunk for real-time processing.
/// This method retrieves audio starting from the given offset and returns only
/// the new samples, keeping a small overlap for context.
pub fn extract_chunk(&self, from_sample: usize) -> (Vec<f32>, usize) {
let mut buffer = self.buffer.lock();
let current_len = buffer.len();
// If we don't have enough new samples, return empty
if from_sample >= current_len {
return (Vec::new(), current_len);
}
// Extract new samples
let chunk: Vec<f32> = buffer[from_sample..].to_vec();
// Keep only the last 30 seconds of audio (at 16kHz) to prevent memory exhaustion
// This provides enough overlap for context while limiting memory usage
const MAX_BUFFER_SECONDS: usize = 30;
const MAX_BUFFER_SAMPLES: usize = WHISPER_SAMPLE_RATE as usize * MAX_BUFFER_SECONDS;
if buffer.len() > MAX_BUFFER_SAMPLES {
// Calculate how many samples to remove from the beginning
let samples_to_remove = buffer.len() - MAX_BUFFER_SAMPLES;
buffer.drain(..samples_to_remove);
// Return the chunk and adjust the offset
return (chunk, current_len - samples_to_remove);
}
// Return the chunk and the new offset
(chunk, current_len)
}
/// Get all audio samples from the given offset without modifying the buffer.
/// This is used when stopping recording to get any remaining audio.
pub fn get_remaining_audio(&self, from_sample: usize) -> Vec<f32> {
let buffer = self.buffer.lock();
if from_sample >= buffer.len() {
return Vec::new();
}
buffer[from_sample..].to_vec()
}
}
impl Default for AudioCapture {