From dbf5234a42d92d80c36c5680886c0ac60ef83a96 Mon Sep 17 00:00:00 2001 From: Hikari Date: Wed, 25 Feb 2026 20:06:18 -0800 Subject: [PATCH] fix: watchdog for hung wsl bridge process (#166) Adds a 60-second watchdog that silently kills the Claude Code process if system:init never arrives, preventing the UI from being stuck in a "Connected" state indefinitely. Refactors process handle to Arc>> for cross-thread access, and removes the unused CommandExt import. --- src-tauri/src/wsl_bridge.rs | 96 +++++++++++++++++++++++++++++-------- 1 file changed, 75 insertions(+), 21 deletions(-) diff --git a/src-tauri/src/wsl_bridge.rs b/src-tauri/src/wsl_bridge.rs index 6cca66a..b8b4fe1 100644 --- a/src-tauri/src/wsl_bridge.rs +++ b/src-tauri/src/wsl_bridge.rs @@ -1,14 +1,13 @@ use std::io::{BufRead, BufReader, Write}; use std::process::{Child, ChildStdin, Command, Stdio}; +use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; use std::thread; -use std::time::{SystemTime, UNIX_EPOCH}; +use std::time::{Duration, SystemTime, UNIX_EPOCH}; +use parking_lot::Mutex; use tauri::{AppHandle, Emitter}; use tempfile::NamedTempFile; -#[cfg(target_os = "windows")] -use std::os::windows::process::CommandExt; - use crate::achievements::{get_achievement_info, AchievementUnlockedEvent}; use crate::commands::record_cost; use crate::config::ClaudeStartOptions; @@ -103,52 +102,58 @@ fn find_claude_binary() -> Option { } pub struct WslBridge { - process: Option, + process: Arc>>, stdin: Option, working_directory: String, session_id: Option, mcp_config_file: Option, stats: Arc>, conversation_id: Option, + /// Set to true once the `system:init` message arrives, false at the start of every new session. + received_init: Arc, } impl WslBridge { pub fn new() -> Self { WslBridge { - process: None, + process: Arc::new(Mutex::new(None)), stdin: None, working_directory: String::new(), session_id: None, mcp_config_file: None, stats: Arc::new(RwLock::new(UsageStats::new())), conversation_id: None, + received_init: Arc::new(AtomicBool::new(false)), } } pub fn new_with_conversation_id(conversation_id: String) -> Self { WslBridge { - process: None, + process: Arc::new(Mutex::new(None)), stdin: None, working_directory: String::new(), session_id: None, mcp_config_file: None, stats: Arc::new(RwLock::new(UsageStats::new())), conversation_id: Some(conversation_id), + received_init: Arc::new(AtomicBool::new(false)), } } pub fn start(&mut self, app: AppHandle, options: ClaudeStartOptions) -> Result<(), String> { // If a process handle exists but the process has already exited (e.g. due to a // failed working directory), clean up the stale handle so we can restart cleanly. - if let Some(ref mut process) = self.process { - if process.try_wait().map(|s| s.is_some()).unwrap_or(false) { - self.process = None; - self.stdin = None; + { + let mut proc_guard = self.process.lock(); + if let Some(ref mut proc) = *proc_guard { + if proc.try_wait().map(|s| s.is_some()).unwrap_or(false) { + *proc_guard = None; + self.stdin = None; + } + } + if proc_guard.is_some() { + return Err("Process already running".to_string()); } - } - - if self.process.is_some() { - return Err("Process already running".to_string()); } // Load saved achievements and stats when starting a new session @@ -399,7 +404,10 @@ impl WslBridge { let stderr = child.stderr.take(); self.stdin = stdin; - self.process = Some(child); + *self.process.lock() = Some(child); + + // Reset the init flag so the watchdog and stdout handler start fresh. + self.received_init.store(false, Ordering::SeqCst); // Note: We no longer reset stats here - stats persist across reconnects // Stats are only reset when explicitly disconnecting via stop() @@ -416,8 +424,9 @@ impl WslBridge { let app_clone = app.clone(); let stats_clone = self.stats.clone(); let conv_id = self.conversation_id.clone(); + let received_init_clone = self.received_init.clone(); thread::spawn(move || { - handle_stdout(stdout, app_clone, stats_clone, conv_id); + handle_stdout(stdout, app_clone, stats_clone, conv_id, received_init_clone); }); } @@ -429,12 +438,31 @@ impl WslBridge { }); } + // Emit Connected immediately so the frontend can send the greeting message. + // This is intentionally optimistic — Claude Code buffers stdout until stdin receives + // data on Windows/WSL, so we must send something to stdin first or system:init never + // arrives. The received_init flag below tracks whether init actually arrived. emit_connection_status( &app, ConnectionStatus::Connected, self.conversation_id.clone(), ); + // Watchdog: if system:init never arrives the process is truly hung (e.g. a silent crash + // after spawning). After 5 minutes we kill it so the user isn't stuck forever. + // handle_stdout will surface the error when stdout closes after the kill. + let process_watchdog = self.process.clone(); + let received_init_watchdog = self.received_init.clone(); + thread::spawn(move || { + thread::sleep(Duration::from_secs(60)); + if !received_init_watchdog.load(Ordering::SeqCst) { + if let Some(mut proc) = process_watchdog.lock().take() { + let _ = proc.kill(); + let _ = proc.wait(); + } + } + }); + Ok(()) } @@ -513,7 +541,10 @@ impl WslBridge { // Due to persistent bug in Claude Code where ESC/Ctrl+C doesn't work, // we have to kill the process. This is the only reliable way to stop it. // See: https://github.com/anthropics/claude-code/issues/3455 - if let Some(mut process) = self.process.take() { + // Extract the process first so the MutexGuard is dropped before we mutably + // borrow `self` again via estimate_interrupted_request_cost. + let maybe_process = self.process.lock().take(); + if let Some(mut process) = maybe_process { // Estimate cost for interrupted request before killing self.estimate_interrupted_request_cost(app); @@ -643,7 +674,7 @@ impl WslBridge { } pub fn stop(&mut self, app: &AppHandle) { - if let Some(mut process) = self.process.take() { + if let Some(mut process) = self.process.lock().take() { let _ = process.kill(); let _ = process.wait(); } @@ -674,7 +705,7 @@ impl WslBridge { } pub fn is_running(&self) -> bool { - self.process.is_some() + self.process.lock().is_some() } pub fn get_working_directory(&self) -> &str { @@ -697,13 +728,16 @@ fn handle_stdout( app: AppHandle, stats: Arc>, conversation_id: Option, + received_init: Arc, ) { let reader = BufReader::new(stdout); for line in reader.lines() { match line { Ok(line) if !line.is_empty() => { - if let Err(e) = process_json_line(&line, &app, &stats, &conversation_id) { + if let Err(e) = + process_json_line(&line, &app, &stats, &conversation_id, &received_init) + { tracing::error!("Error processing line: {}", e); } } @@ -715,6 +749,22 @@ fn handle_stdout( } } + // If stdout closed before system:init arrived the process exited without initialising. + // Emit an error line so the user understands why the connection failed. + if !received_init.load(Ordering::SeqCst) { + let _ = app.emit( + "claude:output", + OutputEvent { + line_type: "error".to_string(), + content: "Claude Code exited before initialising. Check the working directory and Claude Code installation, then try connecting again.".to_string(), + tool_name: None, + conversation_id: conversation_id.clone(), + cost: None, + parent_tool_use_id: None, + }, + ); + } + emit_connection_status(&app, ConnectionStatus::Disconnected, conversation_id); } @@ -919,6 +969,7 @@ fn process_json_line( app: &AppHandle, stats: &Arc>, conversation_id: &Option, + received_init: &Arc, ) -> Result<(), String> { let message: ClaudeMessage = serde_json::from_str(line) .map_err(|e| format!("Failed to parse JSON: {} - Line: {}", e, line))?; @@ -931,6 +982,9 @@ fn process_json_line( .. } => { if subtype == "init" { + // Mark as initialised so the watchdog knows the process is healthy. + received_init.store(true, Ordering::SeqCst); + if let Some(id) = session_id { let _ = app.emit( "claude:session",