generated from nhcarrigan/template
feat: capture audio and loopback
This commit is contained in:
@@ -52,8 +52,15 @@ windows = { version = "0.62", features = [
|
|||||||
"Win32_System_Com",
|
"Win32_System_Com",
|
||||||
"Win32_Foundation",
|
"Win32_Foundation",
|
||||||
"Win32_Media_Audio",
|
"Win32_Media_Audio",
|
||||||
"Win32_Media_Audio_Endpoints",
|
"Win32_Devices_Properties",
|
||||||
|
"Win32_Media_KernelStreaming",
|
||||||
|
"Win32_System_Com_StructuredStorage",
|
||||||
"Win32_System_Threading",
|
"Win32_System_Threading",
|
||||||
|
"Win32_Security",
|
||||||
|
"Win32_System_SystemServices",
|
||||||
|
"Win32_System_Variant",
|
||||||
|
"Win32_Media_Multimedia",
|
||||||
|
"Win32_UI_Shell_PropertiesSystem",
|
||||||
] }
|
] }
|
||||||
|
|
||||||
[patch.crates-io]
|
[patch.crates-io]
|
||||||
|
|||||||
+159
-54
@@ -2,12 +2,15 @@
|
|||||||
//!
|
//!
|
||||||
//! This module handles audio recording in a thread-safe way by using
|
//! This module handles audio recording in a thread-safe way by using
|
||||||
//! a shared buffer that can be accessed from the Tauri state.
|
//! a shared buffer that can be accessed from the Tauri state.
|
||||||
|
//!
|
||||||
|
//! On Windows, it captures both microphone input AND system audio (loopback)
|
||||||
|
//! to record both sides of a meeting conversation.
|
||||||
|
|
||||||
use parking_lot::Mutex;
|
use parking_lot::Mutex;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::thread::{self, JoinHandle};
|
use std::thread::{self, JoinHandle};
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
use tracing::{debug, error, info, warn};
|
use tracing::{error, info, warn};
|
||||||
|
|
||||||
/// Sample rate expected by Whisper (16kHz mono)
|
/// Sample rate expected by Whisper (16kHz mono)
|
||||||
pub const WHISPER_SAMPLE_RATE: u32 = 16000;
|
pub const WHISPER_SAMPLE_RATE: u32 = 16000;
|
||||||
@@ -33,6 +36,8 @@ pub struct AudioCapture {
|
|||||||
is_recording: Arc<Mutex<bool>>,
|
is_recording: Arc<Mutex<bool>>,
|
||||||
should_stop: Arc<Mutex<bool>>,
|
should_stop: Arc<Mutex<bool>>,
|
||||||
recording_thread: Option<JoinHandle<()>>,
|
recording_thread: Option<JoinHandle<()>>,
|
||||||
|
#[cfg(target_os = "windows")]
|
||||||
|
loopback_capture: Option<super::wasapi_loopback::WasapiLoopback>,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Implement Send + Sync manually since our struct only contains thread-safe types
|
// Implement Send + Sync manually since our struct only contains thread-safe types
|
||||||
@@ -47,10 +52,13 @@ impl AudioCapture {
|
|||||||
is_recording: Arc::new(Mutex::new(false)),
|
is_recording: Arc::new(Mutex::new(false)),
|
||||||
should_stop: Arc::new(Mutex::new(false)),
|
should_stop: Arc::new(Mutex::new(false)),
|
||||||
recording_thread: None,
|
recording_thread: None,
|
||||||
|
#[cfg(target_os = "windows")]
|
||||||
|
loopback_capture: None,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Start recording audio in a background thread.
|
/// Start recording audio in a background thread.
|
||||||
|
/// Captures both microphone input and system audio (loopback) on Windows.
|
||||||
pub fn start_recording(&mut self) -> Result<(), AudioError> {
|
pub fn start_recording(&mut self) -> Result<(), AudioError> {
|
||||||
if *self.is_recording.lock() {
|
if *self.is_recording.lock() {
|
||||||
warn!("Already recording");
|
warn!("Already recording");
|
||||||
@@ -62,82 +70,83 @@ impl AudioCapture {
|
|||||||
*self.should_stop.lock() = false;
|
*self.should_stop.lock() = false;
|
||||||
*self.is_recording.lock() = true;
|
*self.is_recording.lock() = true;
|
||||||
|
|
||||||
|
// Start WASAPI loopback capture for system audio (Windows only)
|
||||||
|
#[cfg(target_os = "windows")]
|
||||||
|
{
|
||||||
|
let mut loopback = super::wasapi_loopback::WasapiLoopback::new();
|
||||||
|
if let Err(e) = loopback.start_capture(Arc::clone(&self.buffer)) {
|
||||||
|
warn!("Failed to start WASAPI loopback: {}", e);
|
||||||
|
} else {
|
||||||
|
info!("WASAPI loopback capture initialized");
|
||||||
|
}
|
||||||
|
self.loopback_capture = Some(loopback);
|
||||||
|
}
|
||||||
|
|
||||||
let buffer = Arc::clone(&self.buffer);
|
let buffer = Arc::clone(&self.buffer);
|
||||||
let is_recording = Arc::clone(&self.is_recording);
|
let is_recording = Arc::clone(&self.is_recording);
|
||||||
let should_stop = Arc::clone(&self.should_stop);
|
let should_stop = Arc::clone(&self.should_stop);
|
||||||
|
|
||||||
// Spawn a thread to handle audio capture
|
// Spawn a thread to handle microphone capture via cpal
|
||||||
let handle = thread::spawn(move || {
|
let handle = thread::spawn(move || {
|
||||||
use cpal::traits::{DeviceTrait, HostTrait, StreamTrait};
|
use cpal::traits::{DeviceTrait, HostTrait, StreamTrait};
|
||||||
use cpal::{SampleRate, StreamConfig};
|
|
||||||
|
|
||||||
let host = cpal::default_host();
|
let host = cpal::default_host();
|
||||||
|
|
||||||
let device = match host.default_input_device() {
|
// Get microphone (input device)
|
||||||
Some(d) => d,
|
let input_device = host.default_input_device();
|
||||||
None => {
|
|
||||||
error!("No input device available");
|
if input_device.is_none() {
|
||||||
*is_recording.lock() = false;
|
warn!("No microphone available - only system audio will be captured");
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
};
|
|
||||||
|
|
||||||
info!("Using audio input device: {}", device.name().unwrap_or_default());
|
let mut streams: Vec<cpal::Stream> = Vec::new();
|
||||||
|
|
||||||
// Get supported config
|
// Set up microphone capture
|
||||||
let supported_config = match device.default_input_config() {
|
if let Some(ref device) = input_device {
|
||||||
Ok(c) => c,
|
info!("Microphone device: {}", device.name().unwrap_or_default());
|
||||||
Err(e) => {
|
|
||||||
error!("Failed to get input config: {}", e);
|
|
||||||
*is_recording.lock() = false;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
debug!("Supported config: {:?}", supported_config);
|
if let Ok(config) = device.default_input_config() {
|
||||||
|
let sample_rate = config.sample_rate().0;
|
||||||
// We want 16kHz mono for Whisper
|
let channels = config.channels();
|
||||||
let config = StreamConfig {
|
let sample_format = config.sample_format();
|
||||||
channels: 1,
|
info!("Microphone config: {} Hz, {} ch, {:?}", sample_rate, channels, sample_format);
|
||||||
sample_rate: SampleRate(WHISPER_SAMPLE_RATE),
|
|
||||||
buffer_size: cpal::BufferSize::Default,
|
|
||||||
};
|
|
||||||
|
|
||||||
let buffer_clone = Arc::clone(&buffer);
|
let buffer_clone = Arc::clone(&buffer);
|
||||||
let err_fn = |err| error!("Audio stream error: {}", err);
|
let stream_config: cpal::StreamConfig = config.into();
|
||||||
|
|
||||||
// Build the input stream
|
let stream = build_input_stream(
|
||||||
let stream = match device.build_input_stream(
|
device,
|
||||||
&config,
|
&stream_config,
|
||||||
move |data: &[f32], _: &cpal::InputCallbackInfo| {
|
sample_format,
|
||||||
buffer_clone.lock().extend_from_slice(data);
|
sample_rate,
|
||||||
},
|
channels,
|
||||||
err_fn,
|
buffer_clone,
|
||||||
None,
|
"mic",
|
||||||
) {
|
);
|
||||||
Ok(s) => s,
|
|
||||||
Err(e) => {
|
if let Some(s) = stream {
|
||||||
error!("Failed to build input stream: {}", e);
|
if s.play().is_ok() {
|
||||||
*is_recording.lock() = false;
|
info!("Microphone capture started");
|
||||||
return;
|
streams.push(s);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
};
|
|
||||||
|
|
||||||
if let Err(e) = stream.play() {
|
|
||||||
error!("Failed to start stream: {}", e);
|
|
||||||
*is_recording.lock() = false;
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
info!("Audio recording started");
|
// Note: Even if mic fails, WASAPI loopback may still be capturing system audio
|
||||||
|
if streams.is_empty() {
|
||||||
|
warn!("No microphone stream started - relying on WASAPI loopback for system audio");
|
||||||
|
} else {
|
||||||
|
info!("Audio recording started with {} microphone stream(s)", streams.len());
|
||||||
|
}
|
||||||
|
|
||||||
// Keep the stream alive until stop is requested
|
// Keep the streams alive until stop is requested
|
||||||
while !*should_stop.lock() {
|
while !*should_stop.lock() {
|
||||||
thread::sleep(std::time::Duration::from_millis(100));
|
thread::sleep(std::time::Duration::from_millis(100));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Stream is automatically stopped when dropped
|
// Streams are automatically stopped when dropped
|
||||||
drop(stream);
|
drop(streams);
|
||||||
*is_recording.lock() = false;
|
*is_recording.lock() = false;
|
||||||
info!("Audio recording stopped");
|
info!("Audio recording stopped");
|
||||||
});
|
});
|
||||||
@@ -151,7 +160,13 @@ impl AudioCapture {
|
|||||||
// Signal the thread to stop
|
// Signal the thread to stop
|
||||||
*self.should_stop.lock() = true;
|
*self.should_stop.lock() = true;
|
||||||
|
|
||||||
// Wait for the thread to finish
|
// Stop WASAPI loopback capture
|
||||||
|
#[cfg(target_os = "windows")]
|
||||||
|
if let Some(ref mut loopback) = self.loopback_capture {
|
||||||
|
loopback.stop_capture();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for the microphone thread to finish
|
||||||
if let Some(handle) = self.recording_thread.take() {
|
if let Some(handle) = self.recording_thread.take() {
|
||||||
let _ = handle.join();
|
let _ = handle.join();
|
||||||
}
|
}
|
||||||
@@ -201,6 +216,96 @@ impl Drop for AudioCapture {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Build an input stream for the given device with automatic format handling.
|
||||||
|
fn build_input_stream(
|
||||||
|
device: &cpal::Device,
|
||||||
|
config: &cpal::StreamConfig,
|
||||||
|
sample_format: cpal::SampleFormat,
|
||||||
|
sample_rate: u32,
|
||||||
|
channels: u16,
|
||||||
|
buffer: Arc<Mutex<Vec<f32>>>,
|
||||||
|
source_name: &'static str,
|
||||||
|
) -> Option<cpal::Stream> {
|
||||||
|
use cpal::traits::DeviceTrait;
|
||||||
|
|
||||||
|
let err_fn = move |err| error!("Audio stream error ({}): {}", source_name, err);
|
||||||
|
|
||||||
|
// Create a processing closure that handles mono conversion and resampling
|
||||||
|
let make_processor = move || {
|
||||||
|
let buffer = Arc::clone(&buffer);
|
||||||
|
move |samples: Vec<f32>| {
|
||||||
|
// Convert to mono if stereo
|
||||||
|
let mono_samples: Vec<f32> = if channels > 1 {
|
||||||
|
samples
|
||||||
|
.chunks(channels as usize)
|
||||||
|
.map(|chunk| chunk.iter().sum::<f32>() / channels as f32)
|
||||||
|
.collect()
|
||||||
|
} else {
|
||||||
|
samples
|
||||||
|
};
|
||||||
|
|
||||||
|
// Resample to 16kHz if needed
|
||||||
|
let resampled = if sample_rate != WHISPER_SAMPLE_RATE {
|
||||||
|
resample(&mono_samples, sample_rate, WHISPER_SAMPLE_RATE)
|
||||||
|
} else {
|
||||||
|
mono_samples
|
||||||
|
};
|
||||||
|
|
||||||
|
buffer.lock().extend_from_slice(&resampled);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let stream = match sample_format {
|
||||||
|
cpal::SampleFormat::F32 => {
|
||||||
|
let process = make_processor();
|
||||||
|
device.build_input_stream(
|
||||||
|
config,
|
||||||
|
move |data: &[f32], _: &cpal::InputCallbackInfo| {
|
||||||
|
process(data.to_vec());
|
||||||
|
},
|
||||||
|
err_fn,
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
cpal::SampleFormat::I16 => {
|
||||||
|
let process = make_processor();
|
||||||
|
device.build_input_stream(
|
||||||
|
config,
|
||||||
|
move |data: &[i16], _: &cpal::InputCallbackInfo| {
|
||||||
|
let samples: Vec<f32> = data.iter().map(|&s| s as f32 / 32768.0).collect();
|
||||||
|
process(samples);
|
||||||
|
},
|
||||||
|
err_fn,
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
cpal::SampleFormat::I32 => {
|
||||||
|
let process = make_processor();
|
||||||
|
device.build_input_stream(
|
||||||
|
config,
|
||||||
|
move |data: &[i32], _: &cpal::InputCallbackInfo| {
|
||||||
|
let samples: Vec<f32> = data.iter().map(|&s| s as f32 / 2147483648.0).collect();
|
||||||
|
process(samples);
|
||||||
|
},
|
||||||
|
err_fn,
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
format => {
|
||||||
|
error!("Unsupported sample format for {}: {:?}", source_name, format);
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
match stream {
|
||||||
|
Ok(s) => Some(s),
|
||||||
|
Err(e) => {
|
||||||
|
warn!("Failed to build {} stream: {}", source_name, e);
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Convert audio samples from i16 to f32 format.
|
/// Convert audio samples from i16 to f32 format.
|
||||||
pub fn i16_to_f32(samples: &[i16]) -> Vec<f32> {
|
pub fn i16_to_f32(samples: &[i16]) -> Vec<f32> {
|
||||||
samples.iter().map(|&s| s as f32 / 32768.0).collect()
|
samples.iter().map(|&s| s as f32 / 32768.0).collect()
|
||||||
|
|||||||
@@ -11,6 +11,9 @@ pub mod transcriber;
|
|||||||
pub mod summarizer;
|
pub mod summarizer;
|
||||||
pub mod vad;
|
pub mod vad;
|
||||||
|
|
||||||
|
#[cfg(target_os = "windows")]
|
||||||
|
pub mod wasapi_loopback;
|
||||||
|
|
||||||
pub use audio::AudioCapture;
|
pub use audio::AudioCapture;
|
||||||
pub use transcriber::WhisperTranscriber;
|
pub use transcriber::WhisperTranscriber;
|
||||||
pub use summarizer::LlamaSummarizer;
|
pub use summarizer::LlamaSummarizer;
|
||||||
|
|||||||
@@ -0,0 +1,311 @@
|
|||||||
|
//! WASAPI loopback capture for Windows.
|
||||||
|
//!
|
||||||
|
//! This module captures system audio (what's playing through speakers)
|
||||||
|
//! using Windows Audio Session API (WASAPI) in loopback mode.
|
||||||
|
|
||||||
|
use parking_lot::Mutex;
|
||||||
|
use std::sync::Arc;
|
||||||
|
use std::thread::{self, JoinHandle};
|
||||||
|
use tracing::{error, info, warn};
|
||||||
|
use windows::Win32::Media::Audio;
|
||||||
|
use windows::Win32::System::Com;
|
||||||
|
|
||||||
|
/// WASAPI loopback capture state.
|
||||||
|
pub struct WasapiLoopback {
|
||||||
|
is_capturing: Arc<Mutex<bool>>,
|
||||||
|
should_stop: Arc<Mutex<bool>>,
|
||||||
|
capture_thread: Option<JoinHandle<()>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl WasapiLoopback {
|
||||||
|
/// Create a new WASAPI loopback capture instance.
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
is_capturing: Arc::new(Mutex::new(false)),
|
||||||
|
should_stop: Arc::new(Mutex::new(false)),
|
||||||
|
capture_thread: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Start capturing system audio in loopback mode.
|
||||||
|
pub fn start_capture(&mut self, output_buffer: Arc<Mutex<Vec<f32>>>) -> Result<(), String> {
|
||||||
|
if *self.is_capturing.lock() {
|
||||||
|
warn!("WASAPI loopback already capturing");
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
*self.should_stop.lock() = false;
|
||||||
|
*self.is_capturing.lock() = true;
|
||||||
|
|
||||||
|
let is_capturing = Arc::clone(&self.is_capturing);
|
||||||
|
let should_stop = Arc::clone(&self.should_stop);
|
||||||
|
|
||||||
|
let handle = thread::spawn(move || {
|
||||||
|
if let Err(e) = capture_loopback_audio(output_buffer, should_stop.clone()) {
|
||||||
|
error!("WASAPI loopback capture error: {}", e);
|
||||||
|
}
|
||||||
|
*is_capturing.lock() = false;
|
||||||
|
info!("WASAPI loopback capture stopped");
|
||||||
|
});
|
||||||
|
|
||||||
|
self.capture_thread = Some(handle);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Stop capturing.
|
||||||
|
pub fn stop_capture(&mut self) {
|
||||||
|
*self.should_stop.lock() = true;
|
||||||
|
if let Some(handle) = self.capture_thread.take() {
|
||||||
|
let _ = handle.join();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check if currently capturing.
|
||||||
|
pub fn is_capturing(&self) -> bool {
|
||||||
|
*self.is_capturing.lock()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Drop for WasapiLoopback {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
self.stop_capture();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Target sample rate for Whisper (16kHz)
|
||||||
|
const TARGET_SAMPLE_RATE: u32 = 16000;
|
||||||
|
|
||||||
|
/// Capture loopback audio from the default render device.
|
||||||
|
fn capture_loopback_audio(
|
||||||
|
buffer: Arc<Mutex<Vec<f32>>>,
|
||||||
|
should_stop: Arc<Mutex<bool>>,
|
||||||
|
) -> Result<(), String> {
|
||||||
|
unsafe {
|
||||||
|
// Initialize COM
|
||||||
|
Com::CoInitializeEx(None, Com::COINIT_MULTITHREADED)
|
||||||
|
.ok()
|
||||||
|
.map_err(|e| format!("Failed to initialize COM: {}", e))?;
|
||||||
|
|
||||||
|
// Create device enumerator
|
||||||
|
let enumerator: Audio::IMMDeviceEnumerator =
|
||||||
|
Com::CoCreateInstance(&Audio::MMDeviceEnumerator, None, Com::CLSCTX_ALL)
|
||||||
|
.map_err(|e| format!("Failed to create device enumerator: {}", e))?;
|
||||||
|
|
||||||
|
// Get default render (output) device - this is key for loopback!
|
||||||
|
let device = enumerator
|
||||||
|
.GetDefaultAudioEndpoint(Audio::eRender, Audio::eConsole)
|
||||||
|
.map_err(|e| format!("Failed to get default render device: {}", e))?;
|
||||||
|
|
||||||
|
// Get device name for logging
|
||||||
|
if let Ok(id) = device.GetId() {
|
||||||
|
info!("WASAPI loopback device: {:?}", id.to_string());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Activate audio client
|
||||||
|
let audio_client: Audio::IAudioClient = device
|
||||||
|
.Activate(Com::CLSCTX_ALL, None)
|
||||||
|
.map_err(|e| format!("Failed to activate audio client: {}", e))?;
|
||||||
|
|
||||||
|
// Get the mix format (what the device is actually using)
|
||||||
|
let mix_format = audio_client
|
||||||
|
.GetMixFormat()
|
||||||
|
.map_err(|e| format!("Failed to get mix format: {}", e))?;
|
||||||
|
|
||||||
|
let format = &*mix_format;
|
||||||
|
let sample_rate = format.nSamplesPerSec;
|
||||||
|
let channels = format.nChannels;
|
||||||
|
let bits_per_sample = format.wBitsPerSample;
|
||||||
|
let block_align = format.nBlockAlign;
|
||||||
|
|
||||||
|
info!(
|
||||||
|
"WASAPI loopback format: {} Hz, {} ch, {} bits",
|
||||||
|
sample_rate, channels, bits_per_sample
|
||||||
|
);
|
||||||
|
|
||||||
|
// Initialize audio client in loopback mode
|
||||||
|
// Key flags: AUDCLNT_STREAMFLAGS_LOOPBACK for capturing output
|
||||||
|
// Must use shared mode (not exclusive) for loopback
|
||||||
|
let buffer_duration = 10_000_000i64; // 1 second in 100-nanosecond units
|
||||||
|
|
||||||
|
audio_client
|
||||||
|
.Initialize(
|
||||||
|
Audio::AUDCLNT_SHAREMODE_SHARED,
|
||||||
|
Audio::AUDCLNT_STREAMFLAGS_LOOPBACK,
|
||||||
|
buffer_duration,
|
||||||
|
0,
|
||||||
|
mix_format,
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
.map_err(|e| format!("Failed to initialize audio client: {}", e))?;
|
||||||
|
|
||||||
|
// Get capture client
|
||||||
|
let capture_client: Audio::IAudioCaptureClient = audio_client
|
||||||
|
.GetService()
|
||||||
|
.map_err(|e| format!("Failed to get capture client: {}", e))?;
|
||||||
|
|
||||||
|
// Start capturing
|
||||||
|
audio_client
|
||||||
|
.Start()
|
||||||
|
.map_err(|e| format!("Failed to start audio client: {}", e))?;
|
||||||
|
|
||||||
|
info!("WASAPI loopback capture started");
|
||||||
|
|
||||||
|
// Capture loop - use polling since event mode doesn't work for loopback
|
||||||
|
while !*should_stop.lock() {
|
||||||
|
// Sleep a bit to avoid busy-waiting (10ms = 100Hz polling)
|
||||||
|
thread::sleep(std::time::Duration::from_millis(10));
|
||||||
|
|
||||||
|
// Get available frames
|
||||||
|
let frames_available = match capture_client.GetNextPacketSize() {
|
||||||
|
Ok(frames) => frames,
|
||||||
|
Err(e) => {
|
||||||
|
warn!("Failed to get packet size: {}", e);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
if frames_available == 0 {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get buffer
|
||||||
|
let mut data_ptr: *mut u8 = std::ptr::null_mut();
|
||||||
|
let mut num_frames: u32 = 0;
|
||||||
|
let mut flags: u32 = 0;
|
||||||
|
|
||||||
|
if let Err(e) = capture_client.GetBuffer(
|
||||||
|
&mut data_ptr,
|
||||||
|
&mut num_frames,
|
||||||
|
&mut flags,
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
) {
|
||||||
|
warn!("Failed to get buffer: {}", e);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if num_frames > 0 && !data_ptr.is_null() {
|
||||||
|
// Convert to f32 samples
|
||||||
|
let samples = convert_to_f32(
|
||||||
|
data_ptr,
|
||||||
|
num_frames as usize,
|
||||||
|
channels as usize,
|
||||||
|
bits_per_sample,
|
||||||
|
block_align as usize,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Convert to mono
|
||||||
|
let mono_samples = to_mono(&samples, channels as usize);
|
||||||
|
|
||||||
|
// Resample to 16kHz if needed
|
||||||
|
let resampled = if sample_rate != TARGET_SAMPLE_RATE {
|
||||||
|
resample(&mono_samples, sample_rate, TARGET_SAMPLE_RATE)
|
||||||
|
} else {
|
||||||
|
mono_samples
|
||||||
|
};
|
||||||
|
|
||||||
|
// Add to buffer
|
||||||
|
buffer.lock().extend_from_slice(&resampled);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Release buffer
|
||||||
|
if let Err(e) = capture_client.ReleaseBuffer(num_frames) {
|
||||||
|
warn!("Failed to release buffer: {}", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stop and cleanup
|
||||||
|
let _ = audio_client.Stop();
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Convert raw audio bytes to f32 samples.
|
||||||
|
fn convert_to_f32(
|
||||||
|
data: *mut u8,
|
||||||
|
num_frames: usize,
|
||||||
|
channels: usize,
|
||||||
|
bits_per_sample: u16,
|
||||||
|
block_align: usize,
|
||||||
|
) -> Vec<f32> {
|
||||||
|
let total_samples = num_frames * channels;
|
||||||
|
let mut samples = Vec::with_capacity(total_samples);
|
||||||
|
|
||||||
|
unsafe {
|
||||||
|
match bits_per_sample {
|
||||||
|
16 => {
|
||||||
|
let ptr = data as *const i16;
|
||||||
|
for i in 0..total_samples {
|
||||||
|
let sample = *ptr.add(i);
|
||||||
|
samples.push(sample as f32 / 32768.0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
32 => {
|
||||||
|
// Could be f32 or i32 - WASAPI mix format is usually f32
|
||||||
|
let ptr = data as *const f32;
|
||||||
|
for i in 0..total_samples {
|
||||||
|
samples.push(*ptr.add(i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
24 => {
|
||||||
|
// 24-bit samples packed in 3 bytes
|
||||||
|
for i in 0..total_samples {
|
||||||
|
let offset = (i / channels) * block_align + (i % channels) * 3;
|
||||||
|
let b0 = *data.add(offset) as i32;
|
||||||
|
let b1 = *data.add(offset + 1) as i32;
|
||||||
|
let b2 = *data.add(offset + 2) as i32;
|
||||||
|
let sample = (b2 << 16) | (b1 << 8) | b0;
|
||||||
|
// Sign extend from 24 to 32 bits
|
||||||
|
let sample = if sample & 0x800000 != 0 {
|
||||||
|
sample | 0xFF000000u32 as i32
|
||||||
|
} else {
|
||||||
|
sample
|
||||||
|
};
|
||||||
|
samples.push(sample as f32 / 8388608.0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
warn!("Unsupported bits per sample: {}", bits_per_sample);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
samples
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Convert multi-channel audio to mono by averaging channels.
|
||||||
|
fn to_mono(samples: &[f32], channels: usize) -> Vec<f32> {
|
||||||
|
if channels == 1 {
|
||||||
|
return samples.to_vec();
|
||||||
|
}
|
||||||
|
|
||||||
|
samples
|
||||||
|
.chunks(channels)
|
||||||
|
.map(|chunk| chunk.iter().sum::<f32>() / channels as f32)
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Simple linear interpolation resampling.
|
||||||
|
fn resample(samples: &[f32], from_rate: u32, to_rate: u32) -> Vec<f32> {
|
||||||
|
if from_rate == to_rate || samples.is_empty() {
|
||||||
|
return samples.to_vec();
|
||||||
|
}
|
||||||
|
|
||||||
|
let ratio = to_rate as f64 / from_rate as f64;
|
||||||
|
let new_len = (samples.len() as f64 * ratio) as usize;
|
||||||
|
let mut output = Vec::with_capacity(new_len);
|
||||||
|
|
||||||
|
for i in 0..new_len {
|
||||||
|
let src_idx = i as f64 / ratio;
|
||||||
|
let src_idx_floor = src_idx.floor() as usize;
|
||||||
|
let src_idx_ceil = (src_idx_floor + 1).min(samples.len() - 1);
|
||||||
|
let frac = src_idx - src_idx_floor as f64;
|
||||||
|
|
||||||
|
let sample =
|
||||||
|
samples[src_idx_floor] as f64 * (1.0 - frac) + samples[src_idx_ceil] as f64 * frac;
|
||||||
|
output.push(sample as f32);
|
||||||
|
}
|
||||||
|
|
||||||
|
output
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user