generated from nhcarrigan/template
feat: use whisperx to allow diarization
This commit is contained in:
@@ -6,6 +6,22 @@ This is a local recording, transcription, and summarisation script that listens
|
|||||||
|
|
||||||
Gotta run it locally. Sorry!
|
Gotta run it locally. Sorry!
|
||||||
|
|
||||||
|
1. Install Ollama, pull `llama3:8b`
|
||||||
|
2. Install `uv`.
|
||||||
|
3. Run `uvx python@3.12 whisperx` to install whisperx
|
||||||
|
4. Run `uvx python@3.12 main.py`.
|
||||||
|
|
||||||
|
### Diarization
|
||||||
|
|
||||||
|
To download the models needed for diarization, you need to create a Hugging Face account. Then you should agree to the terms to access these two models:
|
||||||
|
|
||||||
|
1. https://huggingface.co/pyannote/segmentation-3.0
|
||||||
|
2. https://huggingface.co/pyannote/speaker-diarization-3.1
|
||||||
|
|
||||||
|
Uncomment the `--hf_token` line and provide your Hugging Face token. This token needs read permissions for gated public repositories you have access to.
|
||||||
|
|
||||||
|
Once you have run the script with this token at least once, you can remove your token and the script should work as normal.
|
||||||
|
|
||||||
## Feedback and Bugs
|
## Feedback and Bugs
|
||||||
|
|
||||||
If you have feedback or a bug report, please feel free to open a GitHub issue!
|
If you have feedback or a bug report, please feel free to open a GitHub issue!
|
||||||
|
|||||||
@@ -26,15 +26,19 @@ def record_audio():
|
|||||||
def transcribe_audio():
|
def transcribe_audio():
|
||||||
print("🔠 Transcribing with WhisperX...")
|
print("🔠 Transcribing with WhisperX...")
|
||||||
subprocess.run([
|
subprocess.run([
|
||||||
# "pipenv run python3 -m whisperx",
|
"uvx",
|
||||||
"whisper",
|
"whisperx",
|
||||||
MEETING_FILE,
|
MEETING_FILE,
|
||||||
"--device", "cpu", # Use CPU
|
"--device", "cpu", # Use CPU
|
||||||
"--language", "en",
|
"--language", "en",
|
||||||
# "--diarize", Put back when whisperX works.
|
# Uncomment the next line to enable diarization, requires you to download the model from Hugging Face.
|
||||||
|
# "--diarize",
|
||||||
|
"--compute_type", "float32",
|
||||||
"--model", WHISPER_MODEL,
|
"--model", WHISPER_MODEL,
|
||||||
"--output_format", "json",
|
"--output_format", "json",
|
||||||
"--output_dir", "."
|
"--output_dir", "."
|
||||||
|
# You should only need to uncomment this once to perform the initial model download.
|
||||||
|
# "--hf_token", "your_huggingface_token_here"
|
||||||
], check=True)
|
], check=True)
|
||||||
|
|
||||||
def summarize_with_local_model():
|
def summarize_with_local_model():
|
||||||
@@ -43,7 +47,7 @@ def summarize_with_local_model():
|
|||||||
transcript = json.load(f)
|
transcript = json.load(f)
|
||||||
|
|
||||||
text = "\n".join(
|
text = "\n".join(
|
||||||
seg["text"]
|
f"{seg['speaker']}: {seg['text']}"
|
||||||
for seg in transcript["segments"]
|
for seg in transcript["segments"]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user