generated from nhcarrigan/template
67 lines
1.9 KiB
Python
67 lines
1.9 KiB
Python
"""Local LLM for meeting summarization using Llama."""
|
|
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
from llama_cpp import Llama
|
|
|
|
|
|
class LlamaSummarizer:
|
|
"""Handles meeting summarization using local Llama model."""
|
|
|
|
def __init__(self, model_dir: Path, model_size: str = "1B"):
|
|
"""Initialize Llama model."""
|
|
self.model_dir = model_dir
|
|
self.is_loaded = False
|
|
|
|
model_path = model_dir / f"Llama-3.2-{model_size}-Instruct-Q4_K_M.gguf"
|
|
|
|
try:
|
|
self.llm = Llama(
|
|
model_path=str(model_path),
|
|
n_ctx=8192, # Context window
|
|
n_threads=4, # CPU threads
|
|
n_gpu_layers=-1, # Use GPU if available
|
|
verbose=False,
|
|
)
|
|
self.is_loaded = True
|
|
except Exception as e:
|
|
print(f"Failed to load Llama model: {e}")
|
|
self.is_loaded = False
|
|
|
|
async def summarize(self, transcript: str) -> Optional[str]:
|
|
"""Generate a meeting summary from transcript."""
|
|
if not self.is_loaded:
|
|
return None
|
|
|
|
prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
|
|
|
|
You are a helpful assistant that creates concise meeting summaries. Focus on:
|
|
- Key decisions made
|
|
- Action items and who owns them
|
|
- Important discussions and their outcomes
|
|
- Next steps
|
|
|
|
Keep the summary structured and easy to scan.<|eot_id|><|start_header_id|>user<|end_header_id|>
|
|
|
|
Please summarize this meeting transcript:
|
|
|
|
{transcript}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
|
|
|
|
Meeting Summary:
|
|
"""
|
|
|
|
try:
|
|
response = self.llm(
|
|
prompt,
|
|
max_tokens=1024,
|
|
temperature=0.7,
|
|
top_p=0.9,
|
|
stop=["<|eot_id|>", "<|end_of_text|>"],
|
|
)
|
|
|
|
return response["choices"][0]["text"].strip()
|
|
|
|
except Exception as e:
|
|
print(f"Summarization error: {e}")
|
|
return None |