feat: use timestamps in file names

This allows us to run multiple recordings at once.
2025-08-05 11:16:08 -07:00
parent 77bd353a21
commit 53f30437d9
3 changed files with 91 additions and 14 deletions
@@ -0,0 +1,3 @@
 *.json
 *.txt
 *.wav
@@ -1,20 +1,10 @@
-# New Repository Template
+# Meeting Minutes
-This template contains all of our basic files for a new GitHub repository. There is also a handy workflow that will create an issue on a new repository made from this template, with a checklist for the steps we usually take in setting up a new repository.
+This is a local recording, transcription, and summarisation script that listens to your meeting and generates a summary. This is completely offline, to protect your data privacy.
 If you're starting a Node.JS project with TypeScript, we have a [specific template](https://github.com/naomi-lgbt/nodejs-typescript-template) for that purpose.
 ## Readme
 Delete all of the above text (including this line), and uncomment the below text to use our standard readme template.
 <!-- # Project Name
 Project Description
 ## Live Version
-This page is currently deployed. [View the live website.]
+Gotta run it locally. Sorry!
 ## Feedback and Bugs
@@ -36,4 +26,4 @@ Copyright held by Naomi Carrigan.
 ## Contact
-We may be contacted through our [Chat Server](http://chat.nhcarrigan.com) or via email at `contact@nhcarrigan.com`. -->
+We may be contacted through our [Chat Server](http://chat.nhcarrigan.com) or via email at `contact@nhcarrigan.com`.
@@ -0,0 +1,84 @@
 import subprocess
 import json
 import datetime
 # === CONFIG ===
 # Use current timestamp for file naming
 TIMESTAMP = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
 MEETING_FILE = f"meeting_audio_{TIMESTAMP}.wav"
 WHISPER_MODEL = "small.en"
 DURATION = 36000  # seconds (10 hour max)
 OUTPUT_TRANSCRIPT = f"meeting_audio_{TIMESTAMP}.json"
 OUTPUT_SUMMARY = f"summary_{TIMESTAMP}.txt"
 def record_audio():
    print("🎙 Recording meeting audio... (q to stop early)")
    subprocess.run([
        "ffmpeg",
        "-f", "pulse",              # Linux (change for Windows/macOS)
        "-i", "default",            # capture default input/output mix
        "-t", str(DURATION),
        "-ac", "2",
        "-ar", "44100",
        MEETING_FILE
    ], check=True)
 def transcribe_audio():
    print("🔠 Transcribing with WhisperX...")
    subprocess.run([
        # "pipenv run python3 -m whisperx",
        "whisper",
        MEETING_FILE,
        "--device", "cpu",  # Use CPU
        "--language", "en",
        # "--diarize", Put back when whisperX works.
        "--model", WHISPER_MODEL,
        "--output_format", "json",
        "--output_dir", "."
    ], check=True)
 def summarize_with_local_model():
    print("🧾 Summarizing locally with Ollama...")
    with open(OUTPUT_TRANSCRIPT, "r", encoding="utf-8") as f:
        transcript = json.load(f)
    text = "\n".join(
        seg["text"]
        for seg in transcript["segments"]
    )
    prompt = f"""Summarize this meeting transcript into clear bullet points with:
    - Key decisions
    - Action items
    - Notable discussion points
    Transcript:
    {text}
    """
    result = subprocess.run(
        ["ollama", "run", "llama3:8b"],
        input=prompt.encode(),
        capture_output=True,
        check=True
    )
    summary = result.stdout.decode()
    with open(OUTPUT_SUMMARY, "w", encoding="utf-8") as f:
        f.write(summary)
    print("✅ Summary written to", OUTPUT_SUMMARY)
 def cleanup():
    import os
    if os.path.exists(MEETING_FILE):
        os.remove(MEETING_FILE)
    if os.path.exists(OUTPUT_TRANSCRIPT):
        os.remove(OUTPUT_TRANSCRIPT)
 if __name__ == "__main__":
    record_audio()
    transcribe_audio()
    summarize_with_local_model()
    cleanup()