generated from nhcarrigan/template
feat: add meeting transcription app scaffolding
- Add Python backend structure with FastAPI for transcription/summarization - Add React UI with audio recording, transcript, and summary views - Configure Tauri to manage Python backend lifecycle - Set up Windows cross-compilation with cargo-xwin - Add Gitea CI workflow for lint, test, and multi-platform builds - Configure ESLint, Prettier, and Vitest for code quality Note: App scaffolding only - Python env and models not yet set up
This commit is contained in:
+304
-76
@@ -1,116 +1,344 @@
|
||||
.logo.vite:hover {
|
||||
filter: drop-shadow(0 0 2em #747bff);
|
||||
}
|
||||
|
||||
.logo.react:hover {
|
||||
filter: drop-shadow(0 0 2em #61dafb);
|
||||
}
|
||||
:root {
|
||||
font-family: Inter, Avenir, Helvetica, Arial, sans-serif;
|
||||
font-size: 16px;
|
||||
line-height: 24px;
|
||||
line-height: 1.5;
|
||||
font-weight: 400;
|
||||
|
||||
color: #0f0f0f;
|
||||
background-color: #f6f6f6;
|
||||
--primary-color: #3b82f6;
|
||||
--primary-hover: #2563eb;
|
||||
--secondary-color: #10b981;
|
||||
--danger-color: #ef4444;
|
||||
--bg-color: #ffffff;
|
||||
--surface-color: #f9fafb;
|
||||
--text-color: #111827;
|
||||
--text-secondary: #6b7280;
|
||||
--border-color: #e5e7eb;
|
||||
--shadow: 0 1px 3px 0 rgba(0, 0, 0, 0.1), 0 1px 2px 0 rgba(0, 0, 0, 0.06);
|
||||
|
||||
color: var(--text-color);
|
||||
background-color: var(--bg-color);
|
||||
|
||||
font-synthesis: none;
|
||||
text-rendering: optimizeLegibility;
|
||||
-webkit-font-smoothing: antialiased;
|
||||
-moz-osx-font-smoothing: grayscale;
|
||||
-webkit-text-size-adjust: 100%;
|
||||
}
|
||||
|
||||
* {
|
||||
box-sizing: border-box;
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
body {
|
||||
margin: 0;
|
||||
min-height: 100vh;
|
||||
}
|
||||
|
||||
.container {
|
||||
margin: 0;
|
||||
padding-top: 10vh;
|
||||
max-width: 1200px;
|
||||
margin: 0 auto;
|
||||
padding: 2rem;
|
||||
min-height: 100vh;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
justify-content: center;
|
||||
}
|
||||
|
||||
/* Header */
|
||||
.app-header {
|
||||
text-align: center;
|
||||
margin-bottom: 2rem;
|
||||
}
|
||||
|
||||
.app-header h1 {
|
||||
font-size: 2.5rem;
|
||||
font-weight: 700;
|
||||
margin-bottom: 0.5rem;
|
||||
}
|
||||
|
||||
.app-header p {
|
||||
color: var(--text-secondary);
|
||||
font-size: 1.125rem;
|
||||
}
|
||||
|
||||
/* Warning Banner */
|
||||
.warning-banner {
|
||||
background-color: #fef3c7;
|
||||
color: #92400e;
|
||||
padding: 1rem;
|
||||
border-radius: 0.5rem;
|
||||
margin-bottom: 2rem;
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
.logo {
|
||||
height: 6em;
|
||||
padding: 1.5em;
|
||||
will-change: filter;
|
||||
transition: 0.75s;
|
||||
}
|
||||
|
||||
.logo.tauri:hover {
|
||||
filter: drop-shadow(0 0 2em #24c8db);
|
||||
}
|
||||
|
||||
.row {
|
||||
/* App Content */
|
||||
.app-content {
|
||||
flex: 1;
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
flex-direction: column;
|
||||
gap: 2rem;
|
||||
}
|
||||
|
||||
a {
|
||||
font-weight: 500;
|
||||
color: #646cff;
|
||||
text-decoration: inherit;
|
||||
/* Controls Section */
|
||||
.controls-section {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
gap: 1.5rem;
|
||||
}
|
||||
|
||||
a:hover {
|
||||
color: #535bf2;
|
||||
/* Audio Recorder */
|
||||
.audio-recorder {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
gap: 1rem;
|
||||
}
|
||||
|
||||
h1 {
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
input,
|
||||
button {
|
||||
border-radius: 8px;
|
||||
border: 1px solid transparent;
|
||||
padding: 0.6em 1.2em;
|
||||
font-size: 1em;
|
||||
font-weight: 500;
|
||||
font-family: inherit;
|
||||
color: #0f0f0f;
|
||||
background-color: #ffffff;
|
||||
transition: border-color 0.25s;
|
||||
box-shadow: 0 2px 2px rgba(0, 0, 0, 0.2);
|
||||
}
|
||||
|
||||
button {
|
||||
.record-button {
|
||||
font-size: 1.25rem;
|
||||
padding: 1rem 2rem;
|
||||
border-radius: 0.75rem;
|
||||
border: 2px solid transparent;
|
||||
background-color: var(--primary-color);
|
||||
color: white;
|
||||
font-weight: 600;
|
||||
cursor: pointer;
|
||||
transition: all 0.2s;
|
||||
box-shadow: var(--shadow);
|
||||
}
|
||||
|
||||
button:hover {
|
||||
border-color: #396cd8;
|
||||
}
|
||||
button:active {
|
||||
border-color: #396cd8;
|
||||
background-color: #e8e8e8;
|
||||
.record-button:hover {
|
||||
background-color: var(--primary-hover);
|
||||
transform: translateY(-2px);
|
||||
box-shadow:
|
||||
0 4px 6px -1px rgba(0, 0, 0, 0.1),
|
||||
0 2px 4px -1px rgba(0, 0, 0, 0.06);
|
||||
}
|
||||
|
||||
input,
|
||||
button {
|
||||
outline: none;
|
||||
.record-button.recording {
|
||||
background-color: var(--danger-color);
|
||||
}
|
||||
|
||||
#greet-input {
|
||||
margin-right: 5px;
|
||||
.record-button.recording:hover {
|
||||
background-color: #dc2626;
|
||||
}
|
||||
|
||||
.recording-indicator {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
color: var(--danger-color);
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
.pulse {
|
||||
width: 0.75rem;
|
||||
height: 0.75rem;
|
||||
background-color: var(--danger-color);
|
||||
border-radius: 50%;
|
||||
animation: pulse 1.5s infinite;
|
||||
}
|
||||
|
||||
@keyframes pulse {
|
||||
0% {
|
||||
opacity: 1;
|
||||
transform: scale(1);
|
||||
}
|
||||
50% {
|
||||
opacity: 0.5;
|
||||
transform: scale(1.2);
|
||||
}
|
||||
100% {
|
||||
opacity: 1;
|
||||
transform: scale(1);
|
||||
}
|
||||
}
|
||||
|
||||
/* Action Buttons */
|
||||
.action-buttons {
|
||||
display: flex;
|
||||
gap: 1rem;
|
||||
}
|
||||
|
||||
.primary-button,
|
||||
.secondary-button {
|
||||
padding: 0.75rem 1.5rem;
|
||||
border-radius: 0.5rem;
|
||||
border: none;
|
||||
font-size: 1rem;
|
||||
font-weight: 500;
|
||||
cursor: pointer;
|
||||
transition: all 0.2s;
|
||||
box-shadow: var(--shadow);
|
||||
}
|
||||
|
||||
.primary-button {
|
||||
background-color: var(--secondary-color);
|
||||
color: white;
|
||||
}
|
||||
|
||||
.primary-button:hover:not(:disabled) {
|
||||
background-color: #059669;
|
||||
transform: translateY(-1px);
|
||||
}
|
||||
|
||||
.primary-button:disabled {
|
||||
opacity: 0.5;
|
||||
cursor: not-allowed;
|
||||
}
|
||||
|
||||
.secondary-button {
|
||||
background-color: var(--surface-color);
|
||||
color: var(--text-color);
|
||||
border: 1px solid var(--border-color);
|
||||
}
|
||||
|
||||
.secondary-button:hover {
|
||||
background-color: var(--border-color);
|
||||
}
|
||||
|
||||
/* Content Grid */
|
||||
.content-grid {
|
||||
display: grid;
|
||||
grid-template-columns: 1fr 1fr;
|
||||
gap: 2rem;
|
||||
flex: 1;
|
||||
}
|
||||
|
||||
@media (max-width: 768px) {
|
||||
.content-grid {
|
||||
grid-template-columns: 1fr;
|
||||
}
|
||||
}
|
||||
|
||||
/* Transcript Display */
|
||||
.transcript-display,
|
||||
.summary-display {
|
||||
background-color: var(--surface-color);
|
||||
border: 1px solid var(--border-color);
|
||||
border-radius: 0.75rem;
|
||||
padding: 1.5rem;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
max-height: 600px;
|
||||
}
|
||||
|
||||
.transcript-display h2,
|
||||
.summary-display h2 {
|
||||
font-size: 1.25rem;
|
||||
font-weight: 600;
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
.transcript-segments,
|
||||
.summary-content {
|
||||
flex: 1;
|
||||
overflow-y: auto;
|
||||
}
|
||||
|
||||
.empty-state {
|
||||
color: var(--text-secondary);
|
||||
text-align: center;
|
||||
padding: 2rem;
|
||||
}
|
||||
|
||||
.segment {
|
||||
margin-bottom: 1rem;
|
||||
padding-bottom: 1rem;
|
||||
border-bottom: 1px solid var(--border-color);
|
||||
}
|
||||
|
||||
.segment:last-child {
|
||||
border-bottom: none;
|
||||
}
|
||||
|
||||
.segment-header {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
margin-bottom: 0.5rem;
|
||||
}
|
||||
|
||||
.speaker {
|
||||
font-weight: 600;
|
||||
font-size: 0.875rem;
|
||||
}
|
||||
|
||||
.timestamp {
|
||||
font-size: 0.75rem;
|
||||
color: var(--text-secondary);
|
||||
}
|
||||
|
||||
.segment-text {
|
||||
line-height: 1.6;
|
||||
}
|
||||
|
||||
/* Summary Display */
|
||||
.summary-header {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
.download-button {
|
||||
padding: 0.5rem 1rem;
|
||||
border-radius: 0.375rem;
|
||||
border: 1px solid var(--border-color);
|
||||
background-color: var(--bg-color);
|
||||
font-size: 0.875rem;
|
||||
cursor: pointer;
|
||||
transition: all 0.2s;
|
||||
}
|
||||
|
||||
.download-button:hover {
|
||||
background-color: var(--surface-color);
|
||||
}
|
||||
|
||||
.summary-text {
|
||||
white-space: pre-wrap;
|
||||
line-height: 1.6;
|
||||
}
|
||||
|
||||
/* Loading */
|
||||
.loading {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
padding: 3rem;
|
||||
gap: 1rem;
|
||||
}
|
||||
|
||||
.spinner {
|
||||
width: 3rem;
|
||||
height: 3rem;
|
||||
border: 3px solid var(--border-color);
|
||||
border-top-color: var(--primary-color);
|
||||
border-radius: 50%;
|
||||
animation: spin 1s linear infinite;
|
||||
}
|
||||
|
||||
@keyframes spin {
|
||||
to {
|
||||
transform: rotate(360deg);
|
||||
}
|
||||
}
|
||||
|
||||
/* Dark Mode */
|
||||
@media (prefers-color-scheme: dark) {
|
||||
:root {
|
||||
color: #f6f6f6;
|
||||
background-color: #2f2f2f;
|
||||
--bg-color: #111827;
|
||||
--surface-color: #1f2937;
|
||||
--text-color: #f3f4f6;
|
||||
--text-secondary: #9ca3af;
|
||||
--border-color: #374151;
|
||||
--shadow: 0 1px 3px 0 rgba(0, 0, 0, 0.3), 0 1px 2px 0 rgba(0, 0, 0, 0.2);
|
||||
}
|
||||
|
||||
a:hover {
|
||||
color: #24c8db;
|
||||
}
|
||||
|
||||
input,
|
||||
button {
|
||||
color: #ffffff;
|
||||
background-color: #0f0f0f98;
|
||||
}
|
||||
button:active {
|
||||
background-color: #0f0f0f69;
|
||||
.warning-banner {
|
||||
background-color: #451a03;
|
||||
color: #fbbf24;
|
||||
}
|
||||
}
|
||||
|
||||
+174
-36
@@ -1,49 +1,187 @@
|
||||
import { useState } from "react";
|
||||
import reactLogo from "./assets/react.svg";
|
||||
import { useState, useEffect, useRef } from "react";
|
||||
import { invoke } from "@tauri-apps/api/core";
|
||||
import "./App.css";
|
||||
import { AudioRecorder } from "./components/AudioRecorder";
|
||||
import { TranscriptDisplay } from "./components/TranscriptDisplay";
|
||||
import { SummaryDisplay } from "./components/SummaryDisplay";
|
||||
|
||||
interface TranscriptSegment {
|
||||
start: number;
|
||||
end: number;
|
||||
text: string;
|
||||
speaker: string;
|
||||
}
|
||||
|
||||
function App() {
|
||||
const [greetMsg, setGreetMsg] = useState("");
|
||||
const [name, setName] = useState("");
|
||||
const [isRecording, setIsRecording] = useState(false);
|
||||
const [transcriptSegments, setTranscriptSegments] = useState<TranscriptSegment[]>([]);
|
||||
const [summary, setSummary] = useState<string | null>(null);
|
||||
const [isGeneratingSummary, setIsGeneratingSummary] = useState(false);
|
||||
const [backendReady, setBackendReady] = useState(false);
|
||||
const wsRef = useRef<WebSocket | null>(null);
|
||||
|
||||
async function greet() {
|
||||
// Learn more about Tauri commands at https://tauri.app/develop/calling-rust/
|
||||
setGreetMsg(await invoke("greet", { name }));
|
||||
}
|
||||
useEffect(() => {
|
||||
// Start Python backend through Tauri
|
||||
startPythonBackend();
|
||||
}, []);
|
||||
|
||||
const startPythonBackend = async () => {
|
||||
try {
|
||||
// Start backend through Tauri command
|
||||
await invoke("start_backend");
|
||||
|
||||
// Give backend time to start up
|
||||
setTimeout(() => {
|
||||
checkBackendHealth();
|
||||
}, 2000);
|
||||
} catch (error) {
|
||||
console.error("Failed to start backend:", error);
|
||||
}
|
||||
};
|
||||
|
||||
const checkBackendHealth = async () => {
|
||||
try {
|
||||
const response = await fetch("http://localhost:8000/health");
|
||||
if (response.ok) {
|
||||
setBackendReady(true);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error("Backend not ready:", error);
|
||||
// In production, Tauri will start the backend automatically
|
||||
}
|
||||
};
|
||||
|
||||
const handleAudioData = (audioData: ArrayBuffer) => {
|
||||
if (!wsRef.current || wsRef.current.readyState !== WebSocket.OPEN) {
|
||||
// Create WebSocket connection
|
||||
wsRef.current = new WebSocket("ws://localhost:8000/ws/transcribe");
|
||||
|
||||
wsRef.current.onopen = () => {
|
||||
console.log("WebSocket connected");
|
||||
// Send the audio data
|
||||
wsRef.current?.send(audioData);
|
||||
};
|
||||
|
||||
wsRef.current.onmessage = (event) => {
|
||||
const data = JSON.parse(event.data);
|
||||
if (data.type === "transcription" && data.data.segments) {
|
||||
setTranscriptSegments((prev) => [...prev, ...data.data.segments]);
|
||||
}
|
||||
};
|
||||
|
||||
wsRef.current.onclose = () => {
|
||||
console.log("WebSocket disconnected");
|
||||
};
|
||||
} else {
|
||||
// Send audio data through existing connection
|
||||
wsRef.current.send(audioData);
|
||||
}
|
||||
};
|
||||
|
||||
const generateSummary = async () => {
|
||||
if (transcriptSegments.length === 0) return;
|
||||
|
||||
setIsGeneratingSummary(true);
|
||||
|
||||
// Combine all transcript segments into text
|
||||
const fullTranscript = transcriptSegments
|
||||
.map((seg) => `${seg.speaker}: ${seg.text}`)
|
||||
.join("\n");
|
||||
|
||||
try {
|
||||
const response = await fetch("http://localhost:8000/summarize", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({ transcript: fullTranscript }),
|
||||
});
|
||||
|
||||
const data = await response.json();
|
||||
setSummary(data.summary);
|
||||
} catch (error) {
|
||||
console.error("Failed to generate summary:", error);
|
||||
} finally {
|
||||
setIsGeneratingSummary(false);
|
||||
}
|
||||
};
|
||||
|
||||
const downloadTranscript = () => {
|
||||
const content = transcriptSegments
|
||||
.map((seg) => `[${formatTime(seg.start)}] ${seg.speaker}: ${seg.text}`)
|
||||
.join("\n");
|
||||
|
||||
const blob = new Blob([content], { type: "text/plain" });
|
||||
const url = URL.createObjectURL(blob);
|
||||
const a = document.createElement("a");
|
||||
a.href = url;
|
||||
a.download = `meeting-transcript-${new Date().toISOString().split("T")[0]}.txt`;
|
||||
a.click();
|
||||
URL.revokeObjectURL(url);
|
||||
};
|
||||
|
||||
const downloadSummary = () => {
|
||||
if (!summary) return;
|
||||
|
||||
const blob = new Blob([summary], { type: "text/plain" });
|
||||
const url = URL.createObjectURL(blob);
|
||||
const a = document.createElement("a");
|
||||
a.href = url;
|
||||
a.download = `meeting-summary-${new Date().toISOString().split("T")[0]}.txt`;
|
||||
a.click();
|
||||
URL.revokeObjectURL(url);
|
||||
};
|
||||
|
||||
const formatTime = (seconds: number) => {
|
||||
const mins = Math.floor(seconds / 60);
|
||||
const secs = Math.floor(seconds % 60);
|
||||
return `${mins}:${secs.toString().padStart(2, "0")}`;
|
||||
};
|
||||
|
||||
return (
|
||||
<main className="container">
|
||||
<h1>Welcome to Tauri + React</h1>
|
||||
<header className="app-header">
|
||||
<h1>🎙️ Chronara</h1>
|
||||
<p>Local Meeting Transcription & Summarization</p>
|
||||
</header>
|
||||
|
||||
<div className="row">
|
||||
<a href="https://vite.dev" target="_blank">
|
||||
<img src="/vite.svg" className="logo vite" alt="Vite logo" />
|
||||
</a>
|
||||
<a href="https://tauri.app" target="_blank">
|
||||
<img src="/tauri.svg" className="logo tauri" alt="Tauri logo" />
|
||||
</a>
|
||||
<a href="https://react.dev" target="_blank">
|
||||
<img src={reactLogo} className="logo react" alt="React logo" />
|
||||
</a>
|
||||
{!backendReady && (
|
||||
<div className="warning-banner">⚠️ Backend is starting up. This may take a moment...</div>
|
||||
)}
|
||||
|
||||
<div className="app-content">
|
||||
<section className="controls-section">
|
||||
<AudioRecorder
|
||||
onAudioData={handleAudioData}
|
||||
isRecording={isRecording}
|
||||
setIsRecording={setIsRecording}
|
||||
/>
|
||||
|
||||
{!isRecording && transcriptSegments.length > 0 && (
|
||||
<div className="action-buttons">
|
||||
<button className="secondary-button" onClick={downloadTranscript}>
|
||||
📄 Download Transcript
|
||||
</button>
|
||||
<button
|
||||
className="primary-button"
|
||||
onClick={generateSummary}
|
||||
disabled={isGeneratingSummary}
|
||||
>
|
||||
✨ Generate Summary
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
</section>
|
||||
|
||||
<div className="content-grid">
|
||||
<TranscriptDisplay segments={transcriptSegments} />
|
||||
<SummaryDisplay
|
||||
summary={summary}
|
||||
isLoading={isGeneratingSummary}
|
||||
onDownload={downloadSummary}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
<p>Click on the Tauri, Vite, and React logos to learn more.</p>
|
||||
|
||||
<form
|
||||
className="row"
|
||||
onSubmit={(e) => {
|
||||
e.preventDefault();
|
||||
greet();
|
||||
}}
|
||||
>
|
||||
<input
|
||||
id="greet-input"
|
||||
onChange={(e) => setName(e.currentTarget.value)}
|
||||
placeholder="Enter a name..."
|
||||
/>
|
||||
<button type="submit">Greet</button>
|
||||
</form>
|
||||
<p>{greetMsg}</p>
|
||||
</main>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,7 @@
|
||||
import { describe, it, expect } from "vitest";
|
||||
|
||||
describe("App", () => {
|
||||
it("placeholder test", () => {
|
||||
expect(true).toBe(true);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1 @@
|
||||
"""Chronara backend - Local meeting transcription and summarization."""
|
||||
@@ -0,0 +1,74 @@
|
||||
"""Main FastAPI application for Chronara."""
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from fastapi import FastAPI, WebSocket
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
|
||||
from .models.audio import AudioProcessor
|
||||
from .models.llm import LlamaSummarizer
|
||||
from .models.transcriber import WhisperXTranscriber
|
||||
|
||||
app = FastAPI(title="Chronara API", version="0.1.0")
|
||||
|
||||
# Enable CORS for Tauri frontend
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["tauri://localhost", "http://localhost:*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# Initialize models
|
||||
MODEL_DIR = Path(__file__).parent.parent.parent / "models"
|
||||
transcriber = WhisperXTranscriber(model_dir=MODEL_DIR)
|
||||
summarizer = LlamaSummarizer(model_dir=MODEL_DIR)
|
||||
audio_processor = AudioProcessor()
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
async def health_check():
|
||||
"""Check if the API is running and models are loaded."""
|
||||
return {
|
||||
"status": "healthy",
|
||||
"models": {
|
||||
"whisper": transcriber.is_loaded,
|
||||
"llama": summarizer.is_loaded,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@app.websocket("/ws/transcribe")
|
||||
async def transcribe_audio(websocket: WebSocket):
|
||||
"""WebSocket endpoint for real-time audio transcription."""
|
||||
await websocket.accept()
|
||||
|
||||
try:
|
||||
while True:
|
||||
# Receive audio chunk
|
||||
audio_data = await websocket.receive_bytes()
|
||||
|
||||
# Process audio
|
||||
audio_chunk = audio_processor.process_chunk(audio_data)
|
||||
|
||||
# Transcribe if we have enough audio
|
||||
if audio_processor.has_speech(audio_chunk):
|
||||
result = await transcriber.transcribe_chunk(audio_chunk)
|
||||
|
||||
if result:
|
||||
await websocket.send_json({
|
||||
"type": "transcription",
|
||||
"data": result,
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
await websocket.close(code=1000, reason=str(e))
|
||||
|
||||
|
||||
@app.post("/summarize")
|
||||
async def summarize_transcript(transcript: str):
|
||||
"""Summarize a meeting transcript."""
|
||||
summary = await summarizer.summarize(transcript)
|
||||
return {"summary": summary}
|
||||
@@ -0,0 +1 @@
|
||||
"""Model modules for Chronara."""
|
||||
@@ -0,0 +1,73 @@
|
||||
"""Audio processing utilities."""
|
||||
|
||||
import io
|
||||
import wave
|
||||
from typing import Optional
|
||||
|
||||
import numpy as np
|
||||
import pyaudio
|
||||
|
||||
|
||||
class AudioProcessor:
|
||||
"""Handles audio capture and processing."""
|
||||
|
||||
def __init__(self, sample_rate: int = 16000, channels: int = 1):
|
||||
"""Initialize audio processor."""
|
||||
self.sample_rate = sample_rate
|
||||
self.channels = channels
|
||||
self.chunk_size = 1024
|
||||
self.format = pyaudio.paInt16
|
||||
|
||||
# Initialize PyAudio
|
||||
self.audio = pyaudio.PyAudio()
|
||||
|
||||
# Audio buffer for accumulating chunks
|
||||
self.buffer = []
|
||||
self.min_speech_duration = 0.5 # seconds
|
||||
|
||||
def start_recording(self) -> pyaudio.Stream:
|
||||
"""Start audio recording stream."""
|
||||
stream = self.audio.open(
|
||||
format=self.format,
|
||||
channels=self.channels,
|
||||
rate=self.sample_rate,
|
||||
input=True,
|
||||
frames_per_buffer=self.chunk_size,
|
||||
)
|
||||
return stream
|
||||
|
||||
def stop_recording(self, stream: pyaudio.Stream) -> None:
|
||||
"""Stop audio recording."""
|
||||
stream.stop_stream()
|
||||
stream.close()
|
||||
|
||||
def process_chunk(self, audio_bytes: bytes) -> np.ndarray:
|
||||
"""Convert audio bytes to numpy array."""
|
||||
# Convert bytes to numpy array
|
||||
audio_array = np.frombuffer(audio_bytes, dtype=np.int16)
|
||||
|
||||
# Normalize to [-1, 1]
|
||||
audio_float = audio_array.astype(np.float32) / 32768.0
|
||||
|
||||
return audio_float
|
||||
|
||||
def has_speech(self, audio_chunk: np.ndarray, energy_threshold: float = 0.01) -> bool:
|
||||
"""Simple voice activity detection based on energy."""
|
||||
# Calculate RMS energy
|
||||
energy = np.sqrt(np.mean(audio_chunk**2))
|
||||
|
||||
# Check if energy exceeds threshold
|
||||
return energy > energy_threshold
|
||||
|
||||
def save_audio(self, audio_data: bytes, filepath: str) -> None:
|
||||
"""Save audio data to WAV file."""
|
||||
with wave.open(filepath, "wb") as wf:
|
||||
wf.setnchannels(self.channels)
|
||||
wf.setsampwidth(self.audio.get_sample_size(self.format))
|
||||
wf.setframerate(self.sample_rate)
|
||||
wf.writeframes(audio_data)
|
||||
|
||||
def __del__(self):
|
||||
"""Cleanup PyAudio."""
|
||||
if hasattr(self, "audio"):
|
||||
self.audio.terminate()
|
||||
@@ -0,0 +1,67 @@
|
||||
"""Local LLM for meeting summarization using Llama."""
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from llama_cpp import Llama
|
||||
|
||||
|
||||
class LlamaSummarizer:
|
||||
"""Handles meeting summarization using local Llama model."""
|
||||
|
||||
def __init__(self, model_dir: Path, model_size: str = "1B"):
|
||||
"""Initialize Llama model."""
|
||||
self.model_dir = model_dir
|
||||
self.is_loaded = False
|
||||
|
||||
model_path = model_dir / f"llama-3.2-{model_size}-instruct-Q4_K_M.gguf"
|
||||
|
||||
try:
|
||||
self.llm = Llama(
|
||||
model_path=str(model_path),
|
||||
n_ctx=8192, # Context window
|
||||
n_threads=4, # CPU threads
|
||||
n_gpu_layers=-1, # Use GPU if available
|
||||
verbose=False,
|
||||
)
|
||||
self.is_loaded = True
|
||||
except Exception as e:
|
||||
print(f"Failed to load Llama model: {e}")
|
||||
self.is_loaded = False
|
||||
|
||||
async def summarize(self, transcript: str) -> Optional[str]:
|
||||
"""Generate a meeting summary from transcript."""
|
||||
if not self.is_loaded:
|
||||
return None
|
||||
|
||||
prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
|
||||
|
||||
You are a helpful assistant that creates concise meeting summaries. Focus on:
|
||||
- Key decisions made
|
||||
- Action items and who owns them
|
||||
- Important discussions and their outcomes
|
||||
- Next steps
|
||||
|
||||
Keep the summary structured and easy to scan.<|eot_id|><|start_header_id|>user<|end_header_id|>
|
||||
|
||||
Please summarize this meeting transcript:
|
||||
|
||||
{transcript}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
|
||||
|
||||
Meeting Summary:
|
||||
"""
|
||||
|
||||
try:
|
||||
response = self.llm(
|
||||
prompt,
|
||||
max_tokens=1024,
|
||||
temperature=0.7,
|
||||
top_p=0.9,
|
||||
stop=["<|eot_id|>", "<|end_of_text|>"],
|
||||
)
|
||||
|
||||
return response["choices"][0]["text"].strip()
|
||||
|
||||
except Exception as e:
|
||||
print(f"Summarization error: {e}")
|
||||
return None
|
||||
@@ -0,0 +1,88 @@
|
||||
"""WhisperX transcription with speaker diarization."""
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Any, Optional
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import whisperx
|
||||
|
||||
|
||||
class WhisperXTranscriber:
|
||||
"""Handles audio transcription and speaker diarization using WhisperX."""
|
||||
|
||||
def __init__(self, model_dir: Path, model_size: str = "base"):
|
||||
"""Initialize WhisperX with local models."""
|
||||
self.model_dir = model_dir
|
||||
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
self.compute_type = "float16" if self.device == "cuda" else "int8"
|
||||
self.is_loaded = False
|
||||
|
||||
try:
|
||||
# Load ASR model
|
||||
self.model = whisperx.load_model(
|
||||
model_size,
|
||||
self.device,
|
||||
compute_type=self.compute_type,
|
||||
download_root=str(model_dir / "whisper"),
|
||||
)
|
||||
|
||||
# Load alignment model
|
||||
self.align_model, self.align_metadata = whisperx.load_align_model(
|
||||
language_code="en",
|
||||
device=self.device,
|
||||
model_dir=str(model_dir / "alignment"),
|
||||
)
|
||||
|
||||
# Load diarization pipeline
|
||||
self.diarize_model = whisperx.DiarizationPipeline(
|
||||
device=self.device,
|
||||
model_name=str(model_dir / "diarization"),
|
||||
)
|
||||
|
||||
self.is_loaded = True
|
||||
except Exception as e:
|
||||
print(f"Failed to load WhisperX models: {e}")
|
||||
self.is_loaded = False
|
||||
|
||||
async def transcribe_chunk(self, audio_chunk: np.ndarray) -> Optional[dict[str, Any]]:
|
||||
"""Transcribe an audio chunk with speaker diarization."""
|
||||
if not self.is_loaded:
|
||||
return None
|
||||
|
||||
try:
|
||||
# Transcribe
|
||||
result = self.model.transcribe(
|
||||
audio_chunk,
|
||||
batch_size=16,
|
||||
)
|
||||
|
||||
# Align whisper output
|
||||
result = whisperx.align(
|
||||
result["segments"],
|
||||
self.align_model,
|
||||
self.align_metadata,
|
||||
audio_chunk,
|
||||
self.device,
|
||||
)
|
||||
|
||||
# Diarize
|
||||
diarize_segments = self.diarize_model(audio_chunk)
|
||||
result = whisperx.assign_word_speakers(diarize_segments, result)
|
||||
|
||||
# Format output
|
||||
formatted_result = []
|
||||
for segment in result["segments"]:
|
||||
formatted_result.append({
|
||||
"start": segment["start"],
|
||||
"end": segment["end"],
|
||||
"text": segment["text"],
|
||||
"speaker": segment.get("speaker", "Unknown"),
|
||||
})
|
||||
|
||||
return {"segments": formatted_result}
|
||||
|
||||
except Exception as e:
|
||||
print(f"Transcription error: {e}")
|
||||
return None
|
||||
@@ -0,0 +1,83 @@
|
||||
import { useRef, useEffect } from "react";
|
||||
|
||||
interface AudioRecorderProps {
|
||||
onAudioData: (data: ArrayBuffer) => void;
|
||||
isRecording: boolean;
|
||||
setIsRecording: (recording: boolean) => void;
|
||||
}
|
||||
|
||||
export function AudioRecorder({ onAudioData, isRecording, setIsRecording }: AudioRecorderProps) {
|
||||
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
|
||||
const streamRef = useRef<MediaStream | null>(null);
|
||||
|
||||
useEffect(() => {
|
||||
return () => {
|
||||
// Cleanup on unmount
|
||||
if (streamRef.current) {
|
||||
streamRef.current.getTracks().forEach((track) => track.stop());
|
||||
}
|
||||
};
|
||||
}, []);
|
||||
|
||||
const startRecording = async () => {
|
||||
try {
|
||||
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
||||
streamRef.current = stream;
|
||||
|
||||
const mediaRecorder = new MediaRecorder(stream, {
|
||||
mimeType: "audio/webm",
|
||||
});
|
||||
|
||||
mediaRecorderRef.current = mediaRecorder;
|
||||
|
||||
const chunks: Blob[] = [];
|
||||
|
||||
mediaRecorder.ondataavailable = (event) => {
|
||||
if (event.data.size > 0) {
|
||||
chunks.push(event.data);
|
||||
}
|
||||
};
|
||||
|
||||
mediaRecorder.onstop = async () => {
|
||||
const blob = new Blob(chunks, { type: "audio/webm" });
|
||||
const arrayBuffer = await blob.arrayBuffer();
|
||||
onAudioData(arrayBuffer);
|
||||
chunks.length = 0;
|
||||
};
|
||||
|
||||
// Send data every second for real-time processing
|
||||
mediaRecorder.start(1000);
|
||||
setIsRecording(true);
|
||||
} catch (error) {
|
||||
console.error("Error starting recording:", error);
|
||||
alert("Failed to access microphone");
|
||||
}
|
||||
};
|
||||
|
||||
const stopRecording = () => {
|
||||
if (mediaRecorderRef.current && isRecording) {
|
||||
mediaRecorderRef.current.stop();
|
||||
setIsRecording(false);
|
||||
|
||||
if (streamRef.current) {
|
||||
streamRef.current.getTracks().forEach((track) => track.stop());
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="audio-recorder">
|
||||
<button
|
||||
className={`record-button ${isRecording ? "recording" : ""}`}
|
||||
onClick={isRecording ? stopRecording : startRecording}
|
||||
>
|
||||
{isRecording ? "⏹ Stop Recording" : "🎙️ Start Recording"}
|
||||
</button>
|
||||
{isRecording && (
|
||||
<div className="recording-indicator">
|
||||
<span className="pulse"></span> Recording...
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,32 @@
|
||||
interface SummaryDisplayProps {
|
||||
summary: string | null;
|
||||
isLoading: boolean;
|
||||
onDownload: () => void;
|
||||
}
|
||||
|
||||
export function SummaryDisplay({ summary, isLoading, onDownload }: SummaryDisplayProps) {
|
||||
return (
|
||||
<div className="summary-display">
|
||||
<div className="summary-header">
|
||||
<h2>Meeting Summary</h2>
|
||||
{summary && (
|
||||
<button className="download-button" onClick={onDownload}>
|
||||
📥 Download
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
<div className="summary-content">
|
||||
{isLoading ? (
|
||||
<div className="loading">
|
||||
<div className="spinner"></div>
|
||||
<p>Generating summary...</p>
|
||||
</div>
|
||||
) : summary ? (
|
||||
<div className="summary-text">{summary}</div>
|
||||
) : (
|
||||
<p className="empty-state">Summary will appear here after recording is complete.</p>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,57 @@
|
||||
interface TranscriptSegment {
|
||||
start: number;
|
||||
end: number;
|
||||
text: string;
|
||||
speaker: string;
|
||||
}
|
||||
|
||||
interface TranscriptDisplayProps {
|
||||
segments: TranscriptSegment[];
|
||||
}
|
||||
|
||||
export function TranscriptDisplay({ segments }: TranscriptDisplayProps) {
|
||||
const formatTime = (seconds: number) => {
|
||||
const mins = Math.floor(seconds / 60);
|
||||
const secs = Math.floor(seconds % 60);
|
||||
return `${mins}:${secs.toString().padStart(2, "0")}`;
|
||||
};
|
||||
|
||||
const getSpeakerColor = (speaker: string) => {
|
||||
const colors = [
|
||||
"#3b82f6", // blue
|
||||
"#10b981", // green
|
||||
"#f59e0b", // amber
|
||||
"#ef4444", // red
|
||||
"#8b5cf6", // purple
|
||||
"#14b8a6", // teal
|
||||
];
|
||||
const speakerParts = speaker.split("_");
|
||||
const index = speakerParts[1] ? parseInt(speakerParts[1], 10) % colors.length : 0;
|
||||
return colors[index];
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="transcript-display">
|
||||
<h2>Transcript</h2>
|
||||
<div className="transcript-segments">
|
||||
{segments.length === 0 ? (
|
||||
<p className="empty-state">No transcript yet. Start recording to begin.</p>
|
||||
) : (
|
||||
segments.map((segment, index) => (
|
||||
<div key={index} className="segment">
|
||||
<div className="segment-header">
|
||||
<span className="speaker" style={{ color: getSpeakerColor(segment.speaker) }}>
|
||||
{segment.speaker}
|
||||
</span>
|
||||
<span className="timestamp">
|
||||
{formatTime(segment.start)} - {formatTime(segment.end)}
|
||||
</span>
|
||||
</div>
|
||||
<p className="segment-text">{segment.text}</p>
|
||||
</div>
|
||||
))
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
+1
-1
@@ -5,5 +5,5 @@ import App from "./App";
|
||||
ReactDOM.createRoot(document.getElementById("root") as HTMLElement).render(
|
||||
<React.StrictMode>
|
||||
<App />
|
||||
</React.StrictMode>,
|
||||
</React.StrictMode>
|
||||
);
|
||||
|
||||
Reference in New Issue
Block a user