feat: handle transcription in real time

2026-01-29 10:31:40 -08:00
parent df8a89e05d
commit e6c19b589e
4 changed files with 166 additions and 5 deletions
@@ -175,6 +175,12 @@ body {
  animation: pulse 1.5s infinite;
 }

+.real-time-indicator {
+  font-size: 0.875rem;
+  color: var(--secondary-color);
+  font-style: italic;
+}
+
 .recording-duration {
  font-weight: 600;
  font-variant-numeric: tabular-nums;
@@ -25,6 +25,9 @@ function App() {
  const [recordingDuration, setRecordingDuration] = useState(0);
  const initStarted = useRef(false);
  const recordingTimer = useRef<number | null>(null);
+  const transcriptionTimer = useRef<number | null>(null);
+  const audioOffset = useRef(0);
+  const totalProcessedSamples = useRef(0);

  useEffect(() => {
    if (initStarted.current) return;
@@ -33,12 +36,15 @@ function App() {
    initializeApp();
  }, []);

-  // Cleanup timer on unmount
+  // Cleanup timers on unmount
  useEffect(() => {
    return () => {
      if (recordingTimer.current) {
        clearInterval(recordingTimer.current);
      }
+      if (transcriptionTimer.current) {
+        clearInterval(transcriptionTimer.current);
+      }
    };
  }, []);

@@ -93,12 +99,51 @@ function App() {
    initializeApp();
  };

+  const processAudioChunk = async () => {
+    try {
+      // Get the next chunk of audio
+      const [audioChunk, newOffset] = await invoke<[number[], number]>("get_audio_chunk", {
+        lastOffset: audioOffset.current
+      });
+
+      // If we have enough audio (at least 5 seconds worth at 16kHz)
+      if (audioChunk.length >= 5 * 16000) {
+        // Transcribe the chunk
+        const newSegments = await invoke<TranscriptSegment[]>("transcribe_chunk", {
+          audioData: audioChunk
+        });
+
+        if (newSegments.length > 0) {
+          // Calculate timestamps based on total processed samples
+          const baseTime = totalProcessedSamples.current / 16000;
+          const adjustedSegments = newSegments.map(seg => ({
+            ...seg,
+            start: seg.start + baseTime,
+            end: seg.end + baseTime,
+          }));
+
+          setTranscriptSegments(prev => [...prev, ...adjustedSegments]);
+        }
+
+        // Track total processed samples
+        totalProcessedSamples.current += audioChunk.length;
+
+        // Update the offset for next time
+        audioOffset.current = newOffset;
+      }
+    } catch (error) {
+      console.error("Failed to process audio chunk:", error);
+    }
+  };
+
  const startRecording = async () => {
    try {
      setAppState("recording");
      setRecordingDuration(0);
      setTranscriptSegments([]);
      setSummary(null);
+      audioOffset.current = 0;
+      totalProcessedSamples.current = 0;

      await invoke("start_recording");

@@ -106,6 +151,11 @@ function App() {
      recordingTimer.current = window.setInterval(() => {
        setRecordingDuration(d => d + 1);
      }, 1000);
+
+      // Start real-time transcription timer (every 5 seconds)
+      transcriptionTimer.current = window.setInterval(() => {
+        processAudioChunk();
+      }, 5000);
    } catch (error) {
      console.error("Failed to start recording:", error);
      setAppState("ready");
@@ -115,17 +165,43 @@ function App() {

  const stopRecording = async () => {
    try {
-      // Stop the timer
+      // Stop the timers
      if (recordingTimer.current) {
        clearInterval(recordingTimer.current);
        recordingTimer.current = null;
      }
+      if (transcriptionTimer.current) {
+        clearInterval(transcriptionTimer.current);
+        transcriptionTimer.current = null;
+      }

      setAppState("transcribing");
-      setStatusMessage("Transcribing audio...");
+      setStatusMessage("Processing final audio...");

-      const segments = await invoke<TranscriptSegment[]>("stop_recording");
-      setTranscriptSegments(segments);
+      // Process any remaining audio
+      const finalChunk = await invoke<number[]>("get_remaining_audio", {
+        lastOffset: audioOffset.current
+      });
+
+      if (finalChunk.length > 0) {
+        const finalSegments = await invoke<TranscriptSegment[]>("transcribe_chunk", {
+          audioData: finalChunk
+        });
+
+        if (finalSegments.length > 0) {
+          const baseTime = totalProcessedSamples.current / 16000;
+          const adjustedSegments = finalSegments.map(seg => ({
+            ...seg,
+            start: seg.start + baseTime,
+            end: seg.end + baseTime,
+          }));
+
+          setTranscriptSegments(prev => [...prev, ...adjustedSegments]);
+        }
+      }
+
+      // Stop the recording
+      await invoke("stop_recording");

      setAppState("ready");
      setStatusMessage("");
@@ -264,6 +340,9 @@ function App() {
          <div className="recording-indicator">
            <span className="recording-dot" />
            Recording: {formatDuration(recordingDuration)}
+            {transcriptSegments.length > 0 && (
+              <span className="real-time-indicator"> (Real-time transcription active)</span>
+            )}
          </div>
          <button className="stop-button" onClick={stopRecording}>
            ⏹️ Stop Recording