feat: massive overhaul to manage costs (#103)

### Explanation _No response_ ### Issue Closes #102 ### Attestations - [ ] I have read and agree to the [Code of Conduct](https://docs.nhcarrigan.com/community/coc/) - [ ] I have read and agree to the [Community Guidelines](https://docs.nhcarrigan.com/community/guide/). - [ ] My contribution complies with the [Contributor Covenant](https://docs.nhcarrigan.com/dev/covenant/). ### Dependencies - [ ] I have pinned the dependencies to a specific patch version. ### Style - [ ] I have run the linter and resolved any errors. - [ ] My pull request uses an appropriate title, matching the conventional commit standards. - [ ] My scope of feat/fix/chore/etc. correctly matches the nature of changes in my pull request. ### Tests - [ ] My contribution adds new code, and I have added tests to cover it. - [ ] My contribution modifies existing code, and I have updated the tests to reflect these changes. - [ ] All new and existing tests pass locally with my changes. - [ ] Code coverage remains at or above the configured threshold. ### Documentation _No response_ ### Versioning _No response_ Reviewed-on: #103 Co-authored-by: Naomi Carrigan <commits@nhcarrigan.com> Co-committed-by: Naomi Carrigan <commits@nhcarrigan.com>
2026-02-04 19:58:43 -08:00
parent daedbfd865
commit 1c45507cdf
30 changed files with 4024 additions and 103 deletions
@@ -5,6 +5,110 @@ use std::collections::HashMap;
 use std::time::Instant;
 use tauri_plugin_store::StoreExt;

+/// Per-tool token usage statistics
+#[derive(Debug, Clone, Serialize, Deserialize, Default)]
+pub struct ToolTokenStats {
+    pub call_count: u64,
+    pub estimated_input_tokens: u64,
+    pub estimated_output_tokens: u64,
+}
+
+impl ToolTokenStats {
+    #[allow(dead_code)]
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    pub fn increment_call(&mut self) {
+        self.call_count += 1;
+    }
+
+    pub fn add_tokens(&mut self, input: u64, output: u64) {
+        self.estimated_input_tokens += input;
+        self.estimated_output_tokens += output;
+    }
+
+    #[allow(dead_code)]
+    pub fn total_tokens(&self) -> u64 {
+        self.estimated_input_tokens + self.estimated_output_tokens
+    }
+}
+
+/// Warning levels for context window utilisation
+#[allow(dead_code)]
+#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
+#[serde(rename_all = "snake_case")]
+pub enum ContextWarning {
+    /// 50-74% utilisation - conversation is getting long
+    Moderate,
+    /// 75-89% utilisation - consider summarising
+    High,
+    /// 90%+ utilisation - approaching limit
+    Critical,
+}
+
+/// Budget status indicating whether user is within their limits
+#[allow(dead_code)]
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
+#[serde(rename_all = "snake_case")]
+pub enum BudgetStatus {
+    /// Within budget, no concerns
+    Ok,
+    /// Approaching budget limit (warning threshold reached)
+    Warning {
+        budget_type: BudgetType,
+        percent_used: f32,
+    },
+    /// Budget exceeded
+    Exceeded { budget_type: BudgetType },
+}
+
+/// Type of budget limit
+#[allow(dead_code)]
+#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
+#[serde(rename_all = "snake_case")]
+pub enum BudgetType {
+    Token,
+    Cost,
+}
+
+impl ContextWarning {
+    #[allow(dead_code)]
+    pub fn message(&self) -> &'static str {
+        match self {
+            ContextWarning::Moderate => "Context window is 50%+ full. Consider starting a new conversation for better performance.",
+            ContextWarning::High => "Context window is 75%+ full. Responses may degrade. Consider summarising or starting fresh.",
+            ContextWarning::Critical => "Context window is nearly full (90%+)! Start a new conversation to avoid errors.",
+        }
+    }
+}
+
+/// Get the context window limit (in tokens) for a given model
+fn get_context_window_limit(model: &str) -> u64 {
+    match model {
+        // Claude 4.5 family - 200K standard context
+        "claude-opus-4-5-20251101"
+        | "claude-sonnet-4-5-20250929"
+        | "claude-haiku-4-5-20251001" => 200_000,
+        // Claude 4.x family - 200K standard context
+        "claude-opus-4-1-20250805"
+        | "claude-opus-4-20250514"
+        | "claude-sonnet-4-20250514" => 200_000,
+        // Claude 3.x family
+        "claude-3-7-sonnet-20250219"
+        | "claude-3-5-sonnet-20241022"
+        | "claude-3-5-sonnet-20240620"
+        | "claude-3-5-haiku-20241022"
+        | "claude-3-opus-20240229"
+        | "claude-3-sonnet-20240229"
+        | "claude-3-haiku-20240307" => 200_000,
+        // Default to 200K for unknown Claude models
+        _ if model.starts_with("claude") => 200_000,
+        // For non-Claude models (Ollama, etc.), use a conservative default
+        _ => 128_000,
+    }
+}
+
 #[derive(Debug, Clone, Serialize, Deserialize, Default)]
 pub struct UsageStats {
    pub total_input_tokens: u64,
@@ -24,8 +128,8 @@ pub struct UsageStats {
    pub session_files_edited: u64,
    pub files_created: u64,
    pub session_files_created: u64,
-    pub tools_usage: HashMap<String, u64>,
-    pub session_tools_usage: HashMap<String, u64>,
+    pub tools_usage: HashMap<String, ToolTokenStats>,
+    pub session_tools_usage: HashMap<String, ToolTokenStats>,
    pub session_duration_seconds: u64,
    #[serde(skip)]
    pub session_start: Option<Instant>,
@@ -38,6 +142,15 @@ pub struct UsageStats {
    pub night_sessions: u64,   // Sessions started after 10 PM
    pub last_session_date: Option<String>, // ISO date string for streak tracking

+    // Context window tracking
+    pub context_tokens_used: u64,
+    pub context_window_limit: u64,
+    pub context_utilisation_percent: f32,
+
+    // Cache analytics (tracks potential savings from repeated tool calls)
+    pub potential_cache_hits: u64,
+    pub potential_cache_savings_tokens: u64,
+
    // Achievement tracking
    #[serde(skip)]
    pub achievements: AchievementProgress,
@@ -61,6 +174,114 @@ impl UsageStats {
        self.session_cost_usd += cost;

        self.model = Some(model.to_string());
+
+        // Update context window tracking
+        self.update_context_tracking(model);
+    }
+
+    pub fn update_context_tracking(&mut self, model: &str) {
+        // Get context window limit for the current model
+        self.context_window_limit = get_context_window_limit(model);
+
+        // Context tokens = input tokens (the prompt/context sent to the model)
+        // We track cumulative session input as a proxy for context growth
+        self.context_tokens_used = self.session_input_tokens;
+
+        // Calculate utilisation percentage
+        if self.context_window_limit > 0 {
+            self.context_utilisation_percent =
+                (self.context_tokens_used as f32 / self.context_window_limit as f32) * 100.0;
+        }
+    }
+
+    #[allow(dead_code)]
+    pub fn get_context_warning(&self) -> Option<ContextWarning> {
+        if self.context_utilisation_percent >= 90.0 {
+            Some(ContextWarning::Critical)
+        } else if self.context_utilisation_percent >= 75.0 {
+            Some(ContextWarning::High)
+        } else if self.context_utilisation_percent >= 50.0 {
+            Some(ContextWarning::Moderate)
+        } else {
+            None
+        }
+    }
+
+    #[allow(dead_code)]
+    pub fn estimate_remaining_tokens(&self) -> u64 {
+        self.context_window_limit
+            .saturating_sub(self.context_tokens_used)
+    }
+
+    /// Check budget status given current usage and budget settings
+    #[allow(dead_code)]
+    pub fn check_budget(
+        &self,
+        budget_enabled: bool,
+        token_budget: Option<u64>,
+        cost_budget: Option<f64>,
+        warning_threshold: f32,
+    ) -> BudgetStatus {
+        if !budget_enabled {
+            return BudgetStatus::Ok;
+        }
+
+        let session_tokens = self.session_input_tokens + self.session_output_tokens;
+
+        // Check token budget
+        if let Some(limit) = token_budget {
+            if session_tokens >= limit {
+                return BudgetStatus::Exceeded {
+                    budget_type: BudgetType::Token,
+                };
+            }
+            let percent_used = session_tokens as f32 / limit as f32;
+            if percent_used >= warning_threshold {
+                return BudgetStatus::Warning {
+                    budget_type: BudgetType::Token,
+                    percent_used: percent_used * 100.0,
+                };
+            }
+        }
+
+        // Check cost budget
+        if let Some(limit) = cost_budget {
+            if self.session_cost_usd >= limit {
+                return BudgetStatus::Exceeded {
+                    budget_type: BudgetType::Cost,
+                };
+            }
+            let percent_used = (self.session_cost_usd / limit) as f32;
+            if percent_used >= warning_threshold {
+                return BudgetStatus::Warning {
+                    budget_type: BudgetType::Cost,
+                    percent_used: percent_used * 100.0,
+                };
+            }
+        }
+
+        BudgetStatus::Ok
+    }
+
+    /// Get remaining token budget (None if no budget set)
+    #[allow(dead_code)]
+    pub fn get_remaining_token_budget(&self, token_budget: Option<u64>) -> Option<u64> {
+        token_budget.map(|limit| {
+            let used = self.session_input_tokens + self.session_output_tokens;
+            limit.saturating_sub(used)
+        })
+    }
+
+    /// Get remaining cost budget (None if no budget set)
+    #[allow(dead_code)]
+    pub fn get_remaining_cost_budget(&self, cost_budget: Option<f64>) -> Option<f64> {
+        cost_budget.map(|limit| {
+            if limit > self.session_cost_usd {
+                limit - self.session_cost_usd
+            } else {
+                0.0
+            }
+        })
    }

    pub fn reset_session(&mut self) {
@@ -76,6 +297,13 @@ impl UsageStats {
        self.session_start = Some(Instant::now());
        self.achievements.start_session();

+        // Reset context window tracking
+        self.context_tokens_used = 0;
+        self.context_utilisation_percent = 0.0;
+
+        // Note: Cache analytics are NOT reset here - they're cumulative across sessions
+        // to show total potential savings over time
+
        // Track session start for achievements
        self.track_session_start();
    }
@@ -139,11 +367,32 @@ impl UsageStats {
    }

    pub fn increment_tool_usage(&mut self, tool_name: &str) {
-        *self.tools_usage.entry(tool_name.to_string()).or_insert(0) += 1;
-        *self
-            .session_tools_usage
+        self.tools_usage
            .entry(tool_name.to_string())
-            .or_insert(0) += 1;
+            .or_default()
+            .increment_call();
+        self.session_tools_usage
+            .entry(tool_name.to_string())
+            .or_default()
+            .increment_call();
+    }
+
+    pub fn add_tool_tokens(&mut self, tool_name: &str, input_tokens: u64, output_tokens: u64) {
+        self.tools_usage
+            .entry(tool_name.to_string())
+            .or_default()
+            .add_tokens(input_tokens, output_tokens);
+        self.session_tools_usage
+            .entry(tool_name.to_string())
+            .or_default()
+            .add_tokens(input_tokens, output_tokens);
+    }
+
+    /// Record a potential cache hit (when the same tool call is made twice)
+    #[allow(dead_code)]
+    pub fn add_potential_cache_hit(&mut self, tokens_saved: u64) {
+        self.potential_cache_hits += 1;
+        self.potential_cache_savings_tokens += tokens_saved;
    }

    pub fn get_session_duration(&mut self) -> u64 {
@@ -184,6 +433,11 @@ impl UsageStats {
            morning_sessions: self.morning_sessions,
            night_sessions: self.night_sessions,
            last_session_date: self.last_session_date.clone(),
+            context_tokens_used: self.context_tokens_used,
+            context_window_limit: self.context_window_limit,
+            context_utilisation_percent: self.context_utilisation_percent,
+            potential_cache_hits: self.potential_cache_hits,
+            potential_cache_savings_tokens: self.potential_cache_savings_tokens,
            achievements: AchievementProgress::new(), // Dummy for copy
        };
        check_achievements(&stats_copy, &mut self.achievements)
@@ -206,20 +460,22 @@ fn is_consecutive_day(prev_date: &str, current_date: &str) -> bool {
    }
 }

-// Pricing as of January 2025
-// https://www.anthropic.com/pricing
-fn calculate_cost(input_tokens: u64, output_tokens: u64, model: &str) -> f64 {
+// Pricing as of February 2026
+// https://platform.claude.com/docs/en/about-claude/models/overview
+pub fn calculate_cost(input_tokens: u64, output_tokens: u64, model: &str) -> f64 {
    let (input_price_per_million, output_price_per_million) = match model {
-        // Opus 4.5
-        "claude-opus-4-5-20251101" => (15.0, 75.0),
+        // Current generation (Claude 4.5)
+        "claude-opus-4-5-20251101" => (5.0, 25.0),
+        "claude-sonnet-4-5-20250929" => (3.0, 15.0),
+        "claude-haiku-4-5-20251001" => (1.0, 5.0),

-        // Opus 4
+        // Previous generation (Claude 4.x)
+        "claude-opus-4-1-20250805" => (15.0, 75.0),
        "claude-opus-4-20250514" => (15.0, 75.0),
-
-        // Sonnet 4
        "claude-sonnet-4-20250514" => (3.0, 15.0),

-        // Previous generation models
+        // Legacy (Claude 3.x)
+        "claude-3-7-sonnet-20250219" => (3.0, 15.0),
        "claude-3-5-sonnet-20241022" => (3.0, 15.0),
        "claude-3-5-sonnet-20240620" => (3.0, 15.0),
        "claude-3-5-haiku-20241022" => (1.0, 5.0),
@@ -252,7 +508,7 @@ pub struct PersistedStats {
    pub code_blocks_generated: u64,
    pub files_edited: u64,
    pub files_created: u64,
-    pub tools_usage: HashMap<String, u64>,
+    pub tools_usage: HashMap<String, ToolTokenStats>,
    pub sessions_started: u64,
    pub consecutive_days: u64,
    pub total_days_used: u64,
@@ -372,8 +628,10 @@ mod tests {
    #[test]
    fn test_cost_calculation_opus_45() {
        let cost = calculate_cost(1000, 2000, "claude-opus-4-5-20251101");
-        // Same pricing as Opus 4
-        assert!((cost - 0.165).abs() < 0.0001);
+        // Opus 4.5 pricing: $5/MTok input, $25/MTok output
+        // 1000 input tokens = $0.005, 2000 output tokens = $0.05
+        // Total = $0.055
+        assert!((cost - 0.055).abs() < 0.0001);
    }

    #[test]
@@ -512,10 +770,33 @@ mod tests {
        stats.increment_tool_usage("Read");
        stats.increment_tool_usage("Write");

-        assert_eq!(stats.tools_usage.get("Read"), Some(&2));
-        assert_eq!(stats.tools_usage.get("Write"), Some(&1));
-        assert_eq!(stats.session_tools_usage.get("Read"), Some(&2));
-        assert_eq!(stats.session_tools_usage.get("Write"), Some(&1));
+        assert_eq!(stats.tools_usage.get("Read").map(|t| t.call_count), Some(2));
+        assert_eq!(stats.tools_usage.get("Write").map(|t| t.call_count), Some(1));
+        assert_eq!(stats.session_tools_usage.get("Read").map(|t| t.call_count), Some(2));
+        assert_eq!(stats.session_tools_usage.get("Write").map(|t| t.call_count), Some(1));
+    }
+
+    #[test]
+    fn test_add_tool_tokens() {
+        let mut stats = UsageStats::new();
+        stats.increment_tool_usage("Read");
+        stats.add_tool_tokens("Read", 100, 50);
+        stats.add_tool_tokens("Read", 200, 100);
+
+        let read_stats = stats.tools_usage.get("Read").unwrap();
+        assert_eq!(read_stats.call_count, 1);
+        assert_eq!(read_stats.estimated_input_tokens, 300);
+        assert_eq!(read_stats.estimated_output_tokens, 150);
+        assert_eq!(read_stats.total_tokens(), 450);
+    }
+
+    #[test]
+    fn test_tool_token_stats_default() {
+        let tool_stats = ToolTokenStats::new();
+        assert_eq!(tool_stats.call_count, 0);
+        assert_eq!(tool_stats.estimated_input_tokens, 0);
+        assert_eq!(tool_stats.estimated_output_tokens, 0);
+        assert_eq!(tool_stats.total_tokens(), 0);
    }

    #[test]
@@ -590,7 +871,11 @@ mod tests {
            files_created: 5,
            tools_usage: {
                let mut map = HashMap::new();
-                map.insert("Read".to_string(), 50);
+                map.insert("Read".to_string(), ToolTokenStats {
+                    call_count: 50,
+                    estimated_input_tokens: 5000,
+                    estimated_output_tokens: 2500,
+                });
                map
            },
            sessions_started: 10,
@@ -608,7 +893,8 @@ mod tests {
        assert_eq!(stats.total_output_tokens, 20000);
        assert_eq!(stats.total_cost_usd, 5.50);
        assert_eq!(stats.messages_exchanged, 100);
-        assert_eq!(stats.tools_usage.get("Read"), Some(&50));
+        assert_eq!(stats.tools_usage.get("Read").map(|t| t.call_count), Some(50));
+        assert_eq!(stats.tools_usage.get("Read").map(|t| t.estimated_input_tokens), Some(5000));
        assert_eq!(stats.consecutive_days, 7);
        assert_eq!(stats.morning_sessions, 3);
        assert_eq!(stats.last_session_date, Some("2024-06-15".to_string()));
@@ -672,4 +958,351 @@ mod tests {
        assert!(json.contains("stats"));
        assert!(json.contains("total_input_tokens"));
    }
+
+    // =====================
+    // Context Window Tracking tests
+    // =====================
+
+    #[test]
+    fn test_context_window_limit_claude_4() {
+        assert_eq!(get_context_window_limit("claude-opus-4-5-20251101"), 200_000);
+        assert_eq!(get_context_window_limit("claude-opus-4-20250514"), 200_000);
+        assert_eq!(get_context_window_limit("claude-sonnet-4-20250514"), 200_000);
+    }
+
+    #[test]
+    fn test_context_window_limit_claude_35() {
+        assert_eq!(
+            get_context_window_limit("claude-3-5-sonnet-20241022"),
+            200_000
+        );
+        assert_eq!(
+            get_context_window_limit("claude-3-5-sonnet-20240620"),
+            200_000
+        );
+        assert_eq!(
+            get_context_window_limit("claude-3-5-haiku-20241022"),
+            200_000
+        );
+    }
+
+    #[test]
+    fn test_context_window_limit_unknown_claude() {
+        assert_eq!(
+            get_context_window_limit("claude-some-future-model"),
+            200_000
+        );
+    }
+
+    #[test]
+    fn test_context_window_limit_non_claude() {
+        assert_eq!(get_context_window_limit("gpt-4"), 128_000);
+        assert_eq!(get_context_window_limit("llama-3"), 128_000);
+        assert_eq!(get_context_window_limit("unknown-model"), 128_000);
+    }
+
+    #[test]
+    fn test_context_tracking_update() {
+        let mut stats = UsageStats::new();
+        stats.add_usage(50_000, 10_000, "claude-sonnet-4-20250514");
+
+        assert_eq!(stats.context_tokens_used, 50_000);
+        assert_eq!(stats.context_window_limit, 200_000);
+        assert!((stats.context_utilisation_percent - 25.0).abs() < 0.1);
+    }
+
+    #[test]
+    fn test_context_tracking_accumulates() {
+        let mut stats = UsageStats::new();
+        stats.add_usage(50_000, 10_000, "claude-sonnet-4-20250514");
+        stats.add_usage(50_000, 10_000, "claude-sonnet-4-20250514");
+
+        assert_eq!(stats.context_tokens_used, 100_000);
+        assert!((stats.context_utilisation_percent - 50.0).abs() < 0.1);
+    }
+
+    #[test]
+    fn test_context_warning_none() {
+        let mut stats = UsageStats::new();
+        stats.context_utilisation_percent = 40.0;
+        assert!(stats.get_context_warning().is_none());
+    }
+
+    #[test]
+    fn test_context_warning_moderate() {
+        let mut stats = UsageStats::new();
+        stats.context_utilisation_percent = 55.0;
+        assert_eq!(stats.get_context_warning(), Some(ContextWarning::Moderate));
+    }
+
+    #[test]
+    fn test_context_warning_high() {
+        let mut stats = UsageStats::new();
+        stats.context_utilisation_percent = 80.0;
+        assert_eq!(stats.get_context_warning(), Some(ContextWarning::High));
+    }
+
+    #[test]
+    fn test_context_warning_critical() {
+        let mut stats = UsageStats::new();
+        stats.context_utilisation_percent = 95.0;
+        assert_eq!(stats.get_context_warning(), Some(ContextWarning::Critical));
+    }
+
+    #[test]
+    fn test_estimate_remaining_tokens() {
+        let mut stats = UsageStats::new();
+        stats.context_tokens_used = 50_000;
+        stats.context_window_limit = 200_000;
+
+        assert_eq!(stats.estimate_remaining_tokens(), 150_000);
+    }
+
+    #[test]
+    fn test_estimate_remaining_tokens_at_limit() {
+        let mut stats = UsageStats::new();
+        stats.context_tokens_used = 200_000;
+        stats.context_window_limit = 200_000;
+
+        assert_eq!(stats.estimate_remaining_tokens(), 0);
+    }
+
+    #[test]
+    fn test_estimate_remaining_tokens_over_limit() {
+        let mut stats = UsageStats::new();
+        stats.context_tokens_used = 250_000;
+        stats.context_window_limit = 200_000;
+
+        assert_eq!(stats.estimate_remaining_tokens(), 0);
+    }
+
+    #[test]
+    fn test_context_reset_on_session_reset() {
+        let mut stats = UsageStats::new();
+        stats.add_usage(100_000, 20_000, "claude-sonnet-4-20250514");
+
+        assert!(stats.context_tokens_used > 0);
+        assert!(stats.context_utilisation_percent > 0.0);
+
+        stats.reset_session();
+
+        assert_eq!(stats.context_tokens_used, 0);
+        assert_eq!(stats.context_utilisation_percent, 0.0);
+    }
+
+    #[test]
+    fn test_context_warning_message() {
+        assert_eq!(
+            ContextWarning::Moderate.message(),
+            "Context window is 50%+ full. Consider starting a new conversation for better performance."
+        );
+        assert_eq!(
+            ContextWarning::High.message(),
+            "Context window is 75%+ full. Responses may degrade. Consider summarising or starting fresh."
+        );
+        assert_eq!(
+            ContextWarning::Critical.message(),
+            "Context window is nearly full (90%+)! Start a new conversation to avoid errors."
+        );
+    }
+
+    #[test]
+    fn test_context_warning_serialization() {
+        let warning = ContextWarning::Critical;
+        let json = serde_json::to_string(&warning).expect("Failed to serialize");
+        assert_eq!(json, "\"critical\"");
+
+        let warning = ContextWarning::Moderate;
+        let json = serde_json::to_string(&warning).expect("Failed to serialize");
+        assert_eq!(json, "\"moderate\"");
+    }
+
+    // =====================
+    // Budget Tracking tests
+    // =====================
+
+    #[test]
+    fn test_budget_disabled_returns_ok() {
+        let stats = UsageStats::new();
+        let status = stats.check_budget(false, Some(1000), Some(1.0), 0.8);
+        assert_eq!(status, BudgetStatus::Ok);
+    }
+
+    #[test]
+    fn test_budget_no_limits_returns_ok() {
+        let stats = UsageStats::new();
+        let status = stats.check_budget(true, None, None, 0.8);
+        assert_eq!(status, BudgetStatus::Ok);
+    }
+
+    #[test]
+    fn test_token_budget_within_limit() {
+        let mut stats = UsageStats::new();
+        stats.session_input_tokens = 500;
+        stats.session_output_tokens = 300;
+
+        let status = stats.check_budget(true, Some(10000), None, 0.8);
+        assert_eq!(status, BudgetStatus::Ok);
+    }
+
+    #[test]
+    fn test_token_budget_warning() {
+        let mut stats = UsageStats::new();
+        stats.session_input_tokens = 4500;
+        stats.session_output_tokens = 4000;
+
+        let status = stats.check_budget(true, Some(10000), None, 0.8);
+        match status {
+            BudgetStatus::Warning {
+                budget_type,
+                percent_used,
+            } => {
+                assert_eq!(budget_type, BudgetType::Token);
+                assert!(percent_used >= 80.0);
+            }
+            _ => panic!("Expected Warning status"),
+        }
+    }
+
+    #[test]
+    fn test_token_budget_exceeded() {
+        let mut stats = UsageStats::new();
+        stats.session_input_tokens = 6000;
+        stats.session_output_tokens = 5000;
+
+        let status = stats.check_budget(true, Some(10000), None, 0.8);
+        assert_eq!(
+            status,
+            BudgetStatus::Exceeded {
+                budget_type: BudgetType::Token
+            }
+        );
+    }
+
+    #[test]
+    fn test_cost_budget_within_limit() {
+        let mut stats = UsageStats::new();
+        stats.session_cost_usd = 0.50;
+
+        let status = stats.check_budget(true, None, Some(5.0), 0.8);
+        assert_eq!(status, BudgetStatus::Ok);
+    }
+
+    #[test]
+    fn test_cost_budget_warning() {
+        let mut stats = UsageStats::new();
+        stats.session_cost_usd = 4.25;
+
+        let status = stats.check_budget(true, None, Some(5.0), 0.8);
+        match status {
+            BudgetStatus::Warning {
+                budget_type,
+                percent_used,
+            } => {
+                assert_eq!(budget_type, BudgetType::Cost);
+                assert!(percent_used >= 80.0);
+            }
+            _ => panic!("Expected Warning status"),
+        }
+    }
+
+    #[test]
+    fn test_cost_budget_exceeded() {
+        let mut stats = UsageStats::new();
+        stats.session_cost_usd = 5.50;
+
+        let status = stats.check_budget(true, None, Some(5.0), 0.8);
+        assert_eq!(
+            status,
+            BudgetStatus::Exceeded {
+                budget_type: BudgetType::Cost
+            }
+        );
+    }
+
+    #[test]
+    fn test_token_budget_takes_priority() {
+        let mut stats = UsageStats::new();
+        stats.session_input_tokens = 12000;
+        stats.session_output_tokens = 0;
+        stats.session_cost_usd = 0.01;
+
+        // Token budget exceeded, cost budget OK
+        let status = stats.check_budget(true, Some(10000), Some(5.0), 0.8);
+        assert_eq!(
+            status,
+            BudgetStatus::Exceeded {
+                budget_type: BudgetType::Token
+            }
+        );
+    }
+
+    #[test]
+    fn test_remaining_token_budget() {
+        let mut stats = UsageStats::new();
+        stats.session_input_tokens = 3000;
+        stats.session_output_tokens = 2000;
+
+        assert_eq!(stats.get_remaining_token_budget(Some(10000)), Some(5000));
+        assert_eq!(stats.get_remaining_token_budget(None), None);
+    }
+
+    #[test]
+    fn test_remaining_token_budget_exceeded() {
+        let mut stats = UsageStats::new();
+        stats.session_input_tokens = 8000;
+        stats.session_output_tokens = 5000;
+
+        assert_eq!(stats.get_remaining_token_budget(Some(10000)), Some(0));
+    }
+
+    #[test]
+    fn test_remaining_cost_budget() {
+        let mut stats = UsageStats::new();
+        stats.session_cost_usd = 2.50;
+
+        let remaining = stats.get_remaining_cost_budget(Some(5.0));
+        assert!(remaining.is_some());
+        assert!((remaining.unwrap() - 2.50).abs() < 0.001);
+        assert_eq!(stats.get_remaining_cost_budget(None), None);
+    }
+
+    #[test]
+    fn test_remaining_cost_budget_exceeded() {
+        let mut stats = UsageStats::new();
+        stats.session_cost_usd = 6.0;
+
+        let remaining = stats.get_remaining_cost_budget(Some(5.0));
+        assert!(remaining.is_some());
+        assert!((remaining.unwrap() - 0.0).abs() < 0.001);
+    }
+
+    #[test]
+    fn test_budget_status_serialization() {
+        let status = BudgetStatus::Warning {
+            budget_type: BudgetType::Token,
+            percent_used: 85.5,
+        };
+        let json = serde_json::to_string(&status).expect("Failed to serialize");
+        assert!(json.contains("warning"));
+        assert!(json.contains("token"));
+
+        let status = BudgetStatus::Exceeded {
+            budget_type: BudgetType::Cost,
+        };
+        let json = serde_json::to_string(&status).expect("Failed to serialize");
+        assert!(json.contains("exceeded"));
+        assert!(json.contains("cost"));
+    }
+
+    #[test]
+    fn test_budget_type_serialization() {
+        let token = BudgetType::Token;
+        let json = serde_json::to_string(&token).expect("Failed to serialize");
+        assert_eq!(json, "\"token\"");
+
+        let cost = BudgetType::Cost;
+        let json = serde_json::to_string(&cost).expect("Failed to serialize");
+        assert_eq!(json, "\"cost\"");
+    }
 }