"""Local LLM for meeting summarization using Llama.""" from pathlib import Path from typing import Optional from llama_cpp import Llama class LlamaSummarizer: """Handles meeting summarization using local Llama model.""" def __init__(self, model_dir: Path, model_size: str = "1B"): """Initialize Llama model.""" self.model_dir = model_dir self.is_loaded = False model_path = model_dir / f"Llama-3.2-{model_size}-Instruct-Q4_K_M.gguf" try: self.llm = Llama( model_path=str(model_path), n_ctx=8192, # Context window n_threads=4, # CPU threads n_gpu_layers=-1, # Use GPU if available verbose=False, ) self.is_loaded = True except Exception as e: print(f"Failed to load Llama model: {e}") self.is_loaded = False async def summarize(self, transcript: str) -> Optional[str]: """Generate a meeting summary from transcript.""" if not self.is_loaded: return None prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are a helpful assistant that creates concise meeting summaries. Focus on: - Key decisions made - Action items and who owns them - Important discussions and their outcomes - Next steps Keep the summary structured and easy to scan.<|eot_id|><|start_header_id|>user<|end_header_id|> Please summarize this meeting transcript: {transcript}<|eot_id|><|start_header_id|>assistant<|end_header_id|> Meeting Summary: """ try: response = self.llm( prompt, max_tokens=1024, temperature=0.7, top_p=0.9, stop=["<|eot_id|>", "<|end_of_text|>"], ) return response["choices"][0]["text"].strip() except Exception as e: print(f"Summarization error: {e}") return None