feat: massive overhaul to manage costs (#103)

### Explanation _No response_ ### Issue Closes #102 ### Attestations - [ ] I have read and agree to the [Code of Conduct](https://docs.nhcarrigan.com/community/coc/) - [ ] I have read and agree to the [Community Guidelines](https://docs.nhcarrigan.com/community/guide/). - [ ] My contribution complies with the [Contributor Covenant](https://docs.nhcarrigan.com/dev/covenant/). ### Dependencies - [ ] I have pinned the dependencies to a specific patch version. ### Style - [ ] I have run the linter and resolved any errors. - [ ] My pull request uses an appropriate title, matching the conventional commit standards. - [ ] My scope of feat/fix/chore/etc. correctly matches the nature of changes in my pull request. ### Tests - [ ] My contribution adds new code, and I have added tests to cover it. - [ ] My contribution modifies existing code, and I have updated the tests to reflect these changes. - [ ] All new and existing tests pass locally with my changes. - [ ] Code coverage remains at or above the configured threshold. ### Documentation _No response_ ### Versioning _No response_ Reviewed-on: #103 Co-authored-by: Naomi Carrigan <commits@nhcarrigan.com> Co-committed-by: Naomi Carrigan <commits@nhcarrigan.com>
2026-02-04 19:58:43 -08:00
parent daedbfd865
commit 1c45507cdf
30 changed files with 4024 additions and 103 deletions
@@ -1,6 +1,66 @@
 import { writable, derived } from "svelte/store";
 import { listen } from "@tauri-apps/api/event";
 import { invoke } from "@tauri-apps/api/core";
+import { costTrackingStore } from "./costTracking";
+
+export type ContextWarning = "moderate" | "high" | "critical";
+export type BudgetType = "token" | "cost";
+
+// Model pricing (per million tokens) - keep in sync with stats.rs
+// Source: https://platform.claude.com/docs/en/about-claude/models/overview
+export const MODEL_PRICING: Record<string, { input: number; output: number }> = {
+  // Current generation (Claude 4.5)
+  "claude-opus-4-5-20251101": { input: 5.0, output: 25.0 },
+  "claude-sonnet-4-5-20250929": { input: 3.0, output: 15.0 },
+  "claude-haiku-4-5-20251001": { input: 1.0, output: 5.0 },
+  // Previous generation (Claude 4.x)
+  "claude-opus-4-1-20250805": { input: 15.0, output: 75.0 },
+  "claude-opus-4-20250514": { input: 15.0, output: 75.0 },
+  "claude-sonnet-4-20250514": { input: 3.0, output: 15.0 },
+  // Legacy (Claude 3.x)
+  "claude-3-7-sonnet-20250219": { input: 3.0, output: 15.0 },
+  "claude-3-5-sonnet-20241022": { input: 3.0, output: 15.0 },
+  "claude-3-5-sonnet-20240620": { input: 3.0, output: 15.0 },
+  "claude-3-5-haiku-20241022": { input: 1.0, output: 5.0 },
+  "claude-3-opus-20240229": { input: 15.0, output: 75.0 },
+  "claude-3-sonnet-20240229": { input: 3.0, output: 15.0 },
+  "claude-3-haiku-20240307": { input: 0.25, output: 1.25 },
+};
+
+const DEFAULT_PRICING = { input: 3.0, output: 15.0 }; // Default to Sonnet
+
+export interface CostEstimate {
+  messageTokens: number;
+  totalInputTokens: number;
+  estimatedCost: number;
+}
+
+// Estimate cost for a message before sending
+export function estimateMessageCost(
+  messageText: string,
+  contextTokensUsed: number,
+  model: string | null
+): CostEstimate {
+  // Estimate tokens using ~4 chars per token heuristic
+  const messageTokens = Math.ceil(messageText.length / 4);
+  const totalInputTokens = contextTokensUsed + messageTokens;
+
+  const pricing = model ? (MODEL_PRICING[model] ?? DEFAULT_PRICING) : DEFAULT_PRICING;
+  const estimatedCost = (totalInputTokens / 1_000_000) * pricing.input;
+
+  return { messageTokens, totalInputTokens, estimatedCost };
+}
+export type BudgetStatus =
+  | { type: "ok" }
+  | { type: "warning"; budget_type: BudgetType; percent_used: number }
+  | { type: "exceeded"; budget_type: BudgetType };
+
+// Per-tool token usage statistics
+export interface ToolTokenStats {
+  call_count: number;
+  estimated_input_tokens: number;
+  estimated_output_tokens: number;
+}

 export interface UsageStats {
  total_input_tokens: number;
@@ -20,9 +80,18 @@ export interface UsageStats {
  session_files_edited: number;
  files_created: number;
  session_files_created: number;
-  tools_usage: Record<string, number>;
-  session_tools_usage: Record<string, number>;
+  tools_usage: Record<string, ToolTokenStats>;
+  session_tools_usage: Record<string, ToolTokenStats>;
  session_duration_seconds: number;
+
+  // Context window tracking
+  context_tokens_used: number;
+  context_window_limit: number;
+  context_utilisation_percent: number;
+
+  // Cache analytics (tracks potential savings from repeated tool calls)
+  potential_cache_hits: number;
+  potential_cache_savings_tokens: number;
 }

 // Main stats store
@@ -45,8 +114,24 @@ export const stats = writable<UsageStats>({
  tools_usage: {},
  session_tools_usage: {},
  session_duration_seconds: 0,
+  context_tokens_used: 0,
+  context_window_limit: 200000,
+  context_utilisation_percent: 0,
+  potential_cache_hits: 0,
+  potential_cache_savings_tokens: 0,
 });

+// Format token count with K/M suffix
+export function formatTokenCount(tokens: number): string {
+  if (tokens >= 1000000) {
+    return `${(tokens / 1000000).toFixed(1)}M`;
+  }
+  if (tokens >= 1000) {
+    return `${(tokens / 1000).toFixed(1)}K`;
+  }
+  return tokens.toString();
+}
+
 // Derived store for formatted display values
 export const formattedStats = derived(stats, ($stats) => {
  const formatNumber = (num: number) => num.toLocaleString();
@@ -65,6 +150,20 @@ export const formattedStats = derived(stats, ($stats) => {
    }
  };

+  // Format tool stats with token info
+  const formatToolStats = (toolStats: Record<string, ToolTokenStats>) => {
+    return Object.entries(toolStats).map(([name, stats]) => ({
+      name,
+      callCount: stats.call_count,
+      totalTokens: stats.estimated_input_tokens + stats.estimated_output_tokens,
+      formattedTokens: formatTokenCount(
+        stats.estimated_input_tokens + stats.estimated_output_tokens
+      ),
+      inputTokens: stats.estimated_input_tokens,
+      outputTokens: stats.estimated_output_tokens,
+    }));
+  };
+
  return {
    totalTokens: formatNumber($stats.total_input_tokens + $stats.total_output_tokens),
    totalInputTokens: formatNumber($stats.total_input_tokens),
@@ -88,9 +187,116 @@ export const formattedStats = derived(stats, ($stats) => {
    sessionDuration: formatDuration($stats.session_duration_seconds),
    toolsUsage: $stats.tools_usage,
    sessionToolsUsage: $stats.session_tools_usage,
+    // Formatted tool stats with token info
+    sessionToolsFormatted: formatToolStats($stats.session_tools_usage),
+    toolsFormatted: formatToolStats($stats.tools_usage),
+
+    // Context window tracking
+    contextUsed: formatNumber($stats.context_tokens_used),
+    contextLimit: formatNumber($stats.context_window_limit),
+    contextRemaining: formatNumber(
+      Math.max(0, $stats.context_window_limit - $stats.context_tokens_used)
+    ),
+    contextUtilisation: `${$stats.context_utilisation_percent.toFixed(1)}%`,
  };
 });

+// Derived store for context warning state
+export const contextWarning = derived(stats, ($stats): ContextWarning | null => {
+  if ($stats.context_utilisation_percent >= 90) {
+    return "critical";
+  } else if ($stats.context_utilisation_percent >= 75) {
+    return "high";
+  } else if ($stats.context_utilisation_percent >= 50) {
+    return "moderate";
+  }
+  return null;
+});
+
+// Get warning message for context utilisation
+export function getContextWarningMessage(warning: ContextWarning): string {
+  switch (warning) {
+    case "moderate":
+      return "Context window is 50%+ full. Consider starting a new conversation for better performance.";
+    case "high":
+      return "Context window is 75%+ full. Responses may degrade. Consider summarising or starting fresh.";
+    case "critical":
+      return "Context window is nearly full (90%+)! Start a new conversation to avoid errors.";
+  }
+}
+
+// Budget checking functions
+export function checkBudget(
+  stats: UsageStats,
+  budgetEnabled: boolean,
+  tokenBudget: number | null,
+  costBudget: number | null,
+  warningThreshold: number
+): BudgetStatus {
+  if (!budgetEnabled) {
+    return { type: "ok" };
+  }
+
+  const sessionTokens = stats.session_input_tokens + stats.session_output_tokens;
+
+  // Check token budget
+  if (tokenBudget !== null) {
+    if (sessionTokens >= tokenBudget) {
+      return { type: "exceeded", budget_type: "token" };
+    }
+    const percentUsed = sessionTokens / tokenBudget;
+    if (percentUsed >= warningThreshold) {
+      return { type: "warning", budget_type: "token", percent_used: percentUsed * 100 };
+    }
+  }
+
+  // Check cost budget
+  if (costBudget !== null) {
+    if (stats.session_cost_usd >= costBudget) {
+      return { type: "exceeded", budget_type: "cost" };
+    }
+    const percentUsed = stats.session_cost_usd / costBudget;
+    if (percentUsed >= warningThreshold) {
+      return { type: "warning", budget_type: "cost", percent_used: percentUsed * 100 };
+    }
+  }
+
+  return { type: "ok" };
+}
+
+// Get budget status message
+export function getBudgetStatusMessage(status: BudgetStatus): string | null {
+  if (status.type === "ok") {
+    return null;
+  }
+
+  const budgetTypeLabel = status.budget_type === "token" ? "token" : "cost";
+
+  if (status.type === "exceeded") {
+    return `Session ${budgetTypeLabel} budget exceeded! Consider starting a new session.`;
+  }
+
+  return `Approaching ${budgetTypeLabel} budget limit (${status.percent_used.toFixed(0)}% used).`;
+}
+
+// Get remaining budget values
+export function getRemainingTokenBudget(
+  stats: UsageStats,
+  tokenBudget: number | null
+): number | null {
+  if (tokenBudget === null) return null;
+  const used = stats.session_input_tokens + stats.session_output_tokens;
+  return Math.max(0, tokenBudget - used);
+}
+
+export function getRemainingCostBudget(
+  stats: UsageStats,
+  costBudget: number | null
+): number | null {
+  if (costBudget === null) return null;
+  return Math.max(0, costBudget - stats.session_cost_usd);
+}
+
 // Note: Cost calculation is now done in the Rust backend

 // Initialize stats listener
@@ -102,6 +308,9 @@ export async function initStatsListener() {

    // The backend already tracks all totals - just set the stats directly
    stats.set(newStats);
+
+    // Refresh cost tracking to check for alerts (debounced - won't spam)
+    costTrackingStore.refresh();
  });

  // Load initial persisted stats from backend (no bridge required)