feat: massive overhaul to manage costs (#103)
Security Scan and Upload / Security & DefectDojo Upload (push) Successful in 1m1s
CI / Lint & Test (push) Has been cancelled
CI / Build Linux (push) Has been cancelled
CI / Build Windows (cross-compile) (push) Has been cancelled

### Explanation

_No response_

### Issue

Closes #102

### Attestations

- [ ] I have read and agree to the [Code of Conduct](https://docs.nhcarrigan.com/community/coc/)
- [ ] I have read and agree to the [Community Guidelines](https://docs.nhcarrigan.com/community/guide/).
- [ ] My contribution complies with the [Contributor Covenant](https://docs.nhcarrigan.com/dev/covenant/).

### Dependencies

- [ ] I have pinned the dependencies to a specific patch version.

### Style

- [ ] I have run the linter and resolved any errors.
- [ ] My pull request uses an appropriate title, matching the conventional commit standards.
- [ ] My scope of feat/fix/chore/etc. correctly matches the nature of changes in my pull request.

### Tests

- [ ] My contribution adds new code, and I have added tests to cover it.
- [ ] My contribution modifies existing code, and I have updated the tests to reflect these changes.
- [ ] All new and existing tests pass locally with my changes.
- [ ] Code coverage remains at or above the configured threshold.

### Documentation

_No response_

### Versioning

_No response_

Reviewed-on: #103
Co-authored-by: Naomi Carrigan <commits@nhcarrigan.com>
Co-committed-by: Naomi Carrigan <commits@nhcarrigan.com>
This commit was merged in pull request #103.
This commit is contained in:
2026-02-04 19:58:43 -08:00
committed by Naomi Carrigan
parent daedbfd865
commit 1c45507cdf
30 changed files with 4024 additions and 103 deletions
+211 -2
View File
@@ -1,6 +1,66 @@
import { writable, derived } from "svelte/store";
import { listen } from "@tauri-apps/api/event";
import { invoke } from "@tauri-apps/api/core";
import { costTrackingStore } from "./costTracking";
export type ContextWarning = "moderate" | "high" | "critical";
export type BudgetType = "token" | "cost";
// Model pricing (per million tokens) - keep in sync with stats.rs
// Source: https://platform.claude.com/docs/en/about-claude/models/overview
export const MODEL_PRICING: Record<string, { input: number; output: number }> = {
// Current generation (Claude 4.5)
"claude-opus-4-5-20251101": { input: 5.0, output: 25.0 },
"claude-sonnet-4-5-20250929": { input: 3.0, output: 15.0 },
"claude-haiku-4-5-20251001": { input: 1.0, output: 5.0 },
// Previous generation (Claude 4.x)
"claude-opus-4-1-20250805": { input: 15.0, output: 75.0 },
"claude-opus-4-20250514": { input: 15.0, output: 75.0 },
"claude-sonnet-4-20250514": { input: 3.0, output: 15.0 },
// Legacy (Claude 3.x)
"claude-3-7-sonnet-20250219": { input: 3.0, output: 15.0 },
"claude-3-5-sonnet-20241022": { input: 3.0, output: 15.0 },
"claude-3-5-sonnet-20240620": { input: 3.0, output: 15.0 },
"claude-3-5-haiku-20241022": { input: 1.0, output: 5.0 },
"claude-3-opus-20240229": { input: 15.0, output: 75.0 },
"claude-3-sonnet-20240229": { input: 3.0, output: 15.0 },
"claude-3-haiku-20240307": { input: 0.25, output: 1.25 },
};
const DEFAULT_PRICING = { input: 3.0, output: 15.0 }; // Default to Sonnet
export interface CostEstimate {
messageTokens: number;
totalInputTokens: number;
estimatedCost: number;
}
// Estimate cost for a message before sending
export function estimateMessageCost(
messageText: string,
contextTokensUsed: number,
model: string | null
): CostEstimate {
// Estimate tokens using ~4 chars per token heuristic
const messageTokens = Math.ceil(messageText.length / 4);
const totalInputTokens = contextTokensUsed + messageTokens;
const pricing = model ? (MODEL_PRICING[model] ?? DEFAULT_PRICING) : DEFAULT_PRICING;
const estimatedCost = (totalInputTokens / 1_000_000) * pricing.input;
return { messageTokens, totalInputTokens, estimatedCost };
}
export type BudgetStatus =
| { type: "ok" }
| { type: "warning"; budget_type: BudgetType; percent_used: number }
| { type: "exceeded"; budget_type: BudgetType };
// Per-tool token usage statistics
export interface ToolTokenStats {
call_count: number;
estimated_input_tokens: number;
estimated_output_tokens: number;
}
export interface UsageStats {
total_input_tokens: number;
@@ -20,9 +80,18 @@ export interface UsageStats {
session_files_edited: number;
files_created: number;
session_files_created: number;
tools_usage: Record<string, number>;
session_tools_usage: Record<string, number>;
tools_usage: Record<string, ToolTokenStats>;
session_tools_usage: Record<string, ToolTokenStats>;
session_duration_seconds: number;
// Context window tracking
context_tokens_used: number;
context_window_limit: number;
context_utilisation_percent: number;
// Cache analytics (tracks potential savings from repeated tool calls)
potential_cache_hits: number;
potential_cache_savings_tokens: number;
}
// Main stats store
@@ -45,8 +114,24 @@ export const stats = writable<UsageStats>({
tools_usage: {},
session_tools_usage: {},
session_duration_seconds: 0,
context_tokens_used: 0,
context_window_limit: 200000,
context_utilisation_percent: 0,
potential_cache_hits: 0,
potential_cache_savings_tokens: 0,
});
// Format token count with K/M suffix
export function formatTokenCount(tokens: number): string {
if (tokens >= 1000000) {
return `${(tokens / 1000000).toFixed(1)}M`;
}
if (tokens >= 1000) {
return `${(tokens / 1000).toFixed(1)}K`;
}
return tokens.toString();
}
// Derived store for formatted display values
export const formattedStats = derived(stats, ($stats) => {
const formatNumber = (num: number) => num.toLocaleString();
@@ -65,6 +150,20 @@ export const formattedStats = derived(stats, ($stats) => {
}
};
// Format tool stats with token info
const formatToolStats = (toolStats: Record<string, ToolTokenStats>) => {
return Object.entries(toolStats).map(([name, stats]) => ({
name,
callCount: stats.call_count,
totalTokens: stats.estimated_input_tokens + stats.estimated_output_tokens,
formattedTokens: formatTokenCount(
stats.estimated_input_tokens + stats.estimated_output_tokens
),
inputTokens: stats.estimated_input_tokens,
outputTokens: stats.estimated_output_tokens,
}));
};
return {
totalTokens: formatNumber($stats.total_input_tokens + $stats.total_output_tokens),
totalInputTokens: formatNumber($stats.total_input_tokens),
@@ -88,9 +187,116 @@ export const formattedStats = derived(stats, ($stats) => {
sessionDuration: formatDuration($stats.session_duration_seconds),
toolsUsage: $stats.tools_usage,
sessionToolsUsage: $stats.session_tools_usage,
// Formatted tool stats with token info
sessionToolsFormatted: formatToolStats($stats.session_tools_usage),
toolsFormatted: formatToolStats($stats.tools_usage),
// Context window tracking
contextUsed: formatNumber($stats.context_tokens_used),
contextLimit: formatNumber($stats.context_window_limit),
contextRemaining: formatNumber(
Math.max(0, $stats.context_window_limit - $stats.context_tokens_used)
),
contextUtilisation: `${$stats.context_utilisation_percent.toFixed(1)}%`,
};
});
// Derived store for context warning state
export const contextWarning = derived(stats, ($stats): ContextWarning | null => {
if ($stats.context_utilisation_percent >= 90) {
return "critical";
} else if ($stats.context_utilisation_percent >= 75) {
return "high";
} else if ($stats.context_utilisation_percent >= 50) {
return "moderate";
}
return null;
});
// Get warning message for context utilisation
export function getContextWarningMessage(warning: ContextWarning): string {
switch (warning) {
case "moderate":
return "Context window is 50%+ full. Consider starting a new conversation for better performance.";
case "high":
return "Context window is 75%+ full. Responses may degrade. Consider summarising or starting fresh.";
case "critical":
return "Context window is nearly full (90%+)! Start a new conversation to avoid errors.";
}
}
// Budget checking functions
export function checkBudget(
stats: UsageStats,
budgetEnabled: boolean,
tokenBudget: number | null,
costBudget: number | null,
warningThreshold: number
): BudgetStatus {
if (!budgetEnabled) {
return { type: "ok" };
}
const sessionTokens = stats.session_input_tokens + stats.session_output_tokens;
// Check token budget
if (tokenBudget !== null) {
if (sessionTokens >= tokenBudget) {
return { type: "exceeded", budget_type: "token" };
}
const percentUsed = sessionTokens / tokenBudget;
if (percentUsed >= warningThreshold) {
return { type: "warning", budget_type: "token", percent_used: percentUsed * 100 };
}
}
// Check cost budget
if (costBudget !== null) {
if (stats.session_cost_usd >= costBudget) {
return { type: "exceeded", budget_type: "cost" };
}
const percentUsed = stats.session_cost_usd / costBudget;
if (percentUsed >= warningThreshold) {
return { type: "warning", budget_type: "cost", percent_used: percentUsed * 100 };
}
}
return { type: "ok" };
}
// Get budget status message
export function getBudgetStatusMessage(status: BudgetStatus): string | null {
if (status.type === "ok") {
return null;
}
const budgetTypeLabel = status.budget_type === "token" ? "token" : "cost";
if (status.type === "exceeded") {
return `Session ${budgetTypeLabel} budget exceeded! Consider starting a new session.`;
}
return `Approaching ${budgetTypeLabel} budget limit (${status.percent_used.toFixed(0)}% used).`;
}
// Get remaining budget values
export function getRemainingTokenBudget(
stats: UsageStats,
tokenBudget: number | null
): number | null {
if (tokenBudget === null) return null;
const used = stats.session_input_tokens + stats.session_output_tokens;
return Math.max(0, tokenBudget - used);
}
export function getRemainingCostBudget(
stats: UsageStats,
costBudget: number | null
): number | null {
if (costBudget === null) return null;
return Math.max(0, costBudget - stats.session_cost_usd);
}
// Note: Cost calculation is now done in the Rust backend
// Initialize stats listener
@@ -102,6 +308,9 @@ export async function initStatsListener() {
// The backend already tracks all totals - just set the stats directly
stats.set(newStats);
// Refresh cost tracking to check for alerts (debounced - won't spam)
costTrackingStore.refresh();
});
// Load initial persisted stats from backend (no bridge required)