generated from nhcarrigan/template
fix: watchdog for hung wsl bridge process (#166)
Adds a 60-second watchdog that silently kills the Claude Code process if system:init never arrives, preventing the UI from being stuck in a "Connected" state indefinitely. Refactors process handle to Arc<Mutex<Option<Child>>> for cross-thread access, and removes the unused CommandExt import.
This commit is contained in:
+75
-21
@@ -1,14 +1,13 @@
|
||||
use std::io::{BufRead, BufReader, Write};
|
||||
use std::process::{Child, ChildStdin, Command, Stdio};
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::Arc;
|
||||
use std::thread;
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
use std::time::{Duration, SystemTime, UNIX_EPOCH};
|
||||
use parking_lot::Mutex;
|
||||
use tauri::{AppHandle, Emitter};
|
||||
use tempfile::NamedTempFile;
|
||||
|
||||
#[cfg(target_os = "windows")]
|
||||
use std::os::windows::process::CommandExt;
|
||||
|
||||
use crate::achievements::{get_achievement_info, AchievementUnlockedEvent};
|
||||
use crate::commands::record_cost;
|
||||
use crate::config::ClaudeStartOptions;
|
||||
@@ -103,52 +102,58 @@ fn find_claude_binary() -> Option<String> {
|
||||
}
|
||||
|
||||
pub struct WslBridge {
|
||||
process: Option<Child>,
|
||||
process: Arc<Mutex<Option<Child>>>,
|
||||
stdin: Option<ChildStdin>,
|
||||
working_directory: String,
|
||||
session_id: Option<String>,
|
||||
mcp_config_file: Option<NamedTempFile>,
|
||||
stats: Arc<RwLock<UsageStats>>,
|
||||
conversation_id: Option<String>,
|
||||
/// Set to true once the `system:init` message arrives, false at the start of every new session.
|
||||
received_init: Arc<AtomicBool>,
|
||||
}
|
||||
|
||||
impl WslBridge {
|
||||
pub fn new() -> Self {
|
||||
WslBridge {
|
||||
process: None,
|
||||
process: Arc::new(Mutex::new(None)),
|
||||
stdin: None,
|
||||
working_directory: String::new(),
|
||||
session_id: None,
|
||||
mcp_config_file: None,
|
||||
stats: Arc::new(RwLock::new(UsageStats::new())),
|
||||
conversation_id: None,
|
||||
received_init: Arc::new(AtomicBool::new(false)),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_with_conversation_id(conversation_id: String) -> Self {
|
||||
WslBridge {
|
||||
process: None,
|
||||
process: Arc::new(Mutex::new(None)),
|
||||
stdin: None,
|
||||
working_directory: String::new(),
|
||||
session_id: None,
|
||||
mcp_config_file: None,
|
||||
stats: Arc::new(RwLock::new(UsageStats::new())),
|
||||
conversation_id: Some(conversation_id),
|
||||
received_init: Arc::new(AtomicBool::new(false)),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn start(&mut self, app: AppHandle, options: ClaudeStartOptions) -> Result<(), String> {
|
||||
// If a process handle exists but the process has already exited (e.g. due to a
|
||||
// failed working directory), clean up the stale handle so we can restart cleanly.
|
||||
if let Some(ref mut process) = self.process {
|
||||
if process.try_wait().map(|s| s.is_some()).unwrap_or(false) {
|
||||
self.process = None;
|
||||
self.stdin = None;
|
||||
{
|
||||
let mut proc_guard = self.process.lock();
|
||||
if let Some(ref mut proc) = *proc_guard {
|
||||
if proc.try_wait().map(|s| s.is_some()).unwrap_or(false) {
|
||||
*proc_guard = None;
|
||||
self.stdin = None;
|
||||
}
|
||||
}
|
||||
if proc_guard.is_some() {
|
||||
return Err("Process already running".to_string());
|
||||
}
|
||||
}
|
||||
|
||||
if self.process.is_some() {
|
||||
return Err("Process already running".to_string());
|
||||
}
|
||||
|
||||
// Load saved achievements and stats when starting a new session
|
||||
@@ -399,7 +404,10 @@ impl WslBridge {
|
||||
let stderr = child.stderr.take();
|
||||
|
||||
self.stdin = stdin;
|
||||
self.process = Some(child);
|
||||
*self.process.lock() = Some(child);
|
||||
|
||||
// Reset the init flag so the watchdog and stdout handler start fresh.
|
||||
self.received_init.store(false, Ordering::SeqCst);
|
||||
|
||||
// Note: We no longer reset stats here - stats persist across reconnects
|
||||
// Stats are only reset when explicitly disconnecting via stop()
|
||||
@@ -416,8 +424,9 @@ impl WslBridge {
|
||||
let app_clone = app.clone();
|
||||
let stats_clone = self.stats.clone();
|
||||
let conv_id = self.conversation_id.clone();
|
||||
let received_init_clone = self.received_init.clone();
|
||||
thread::spawn(move || {
|
||||
handle_stdout(stdout, app_clone, stats_clone, conv_id);
|
||||
handle_stdout(stdout, app_clone, stats_clone, conv_id, received_init_clone);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -429,12 +438,31 @@ impl WslBridge {
|
||||
});
|
||||
}
|
||||
|
||||
// Emit Connected immediately so the frontend can send the greeting message.
|
||||
// This is intentionally optimistic — Claude Code buffers stdout until stdin receives
|
||||
// data on Windows/WSL, so we must send something to stdin first or system:init never
|
||||
// arrives. The received_init flag below tracks whether init actually arrived.
|
||||
emit_connection_status(
|
||||
&app,
|
||||
ConnectionStatus::Connected,
|
||||
self.conversation_id.clone(),
|
||||
);
|
||||
|
||||
// Watchdog: if system:init never arrives the process is truly hung (e.g. a silent crash
|
||||
// after spawning). After 5 minutes we kill it so the user isn't stuck forever.
|
||||
// handle_stdout will surface the error when stdout closes after the kill.
|
||||
let process_watchdog = self.process.clone();
|
||||
let received_init_watchdog = self.received_init.clone();
|
||||
thread::spawn(move || {
|
||||
thread::sleep(Duration::from_secs(60));
|
||||
if !received_init_watchdog.load(Ordering::SeqCst) {
|
||||
if let Some(mut proc) = process_watchdog.lock().take() {
|
||||
let _ = proc.kill();
|
||||
let _ = proc.wait();
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -513,7 +541,10 @@ impl WslBridge {
|
||||
// Due to persistent bug in Claude Code where ESC/Ctrl+C doesn't work,
|
||||
// we have to kill the process. This is the only reliable way to stop it.
|
||||
// See: https://github.com/anthropics/claude-code/issues/3455
|
||||
if let Some(mut process) = self.process.take() {
|
||||
// Extract the process first so the MutexGuard is dropped before we mutably
|
||||
// borrow `self` again via estimate_interrupted_request_cost.
|
||||
let maybe_process = self.process.lock().take();
|
||||
if let Some(mut process) = maybe_process {
|
||||
// Estimate cost for interrupted request before killing
|
||||
self.estimate_interrupted_request_cost(app);
|
||||
|
||||
@@ -643,7 +674,7 @@ impl WslBridge {
|
||||
}
|
||||
|
||||
pub fn stop(&mut self, app: &AppHandle) {
|
||||
if let Some(mut process) = self.process.take() {
|
||||
if let Some(mut process) = self.process.lock().take() {
|
||||
let _ = process.kill();
|
||||
let _ = process.wait();
|
||||
}
|
||||
@@ -674,7 +705,7 @@ impl WslBridge {
|
||||
}
|
||||
|
||||
pub fn is_running(&self) -> bool {
|
||||
self.process.is_some()
|
||||
self.process.lock().is_some()
|
||||
}
|
||||
|
||||
pub fn get_working_directory(&self) -> &str {
|
||||
@@ -697,13 +728,16 @@ fn handle_stdout(
|
||||
app: AppHandle,
|
||||
stats: Arc<RwLock<UsageStats>>,
|
||||
conversation_id: Option<String>,
|
||||
received_init: Arc<AtomicBool>,
|
||||
) {
|
||||
let reader = BufReader::new(stdout);
|
||||
|
||||
for line in reader.lines() {
|
||||
match line {
|
||||
Ok(line) if !line.is_empty() => {
|
||||
if let Err(e) = process_json_line(&line, &app, &stats, &conversation_id) {
|
||||
if let Err(e) =
|
||||
process_json_line(&line, &app, &stats, &conversation_id, &received_init)
|
||||
{
|
||||
tracing::error!("Error processing line: {}", e);
|
||||
}
|
||||
}
|
||||
@@ -715,6 +749,22 @@ fn handle_stdout(
|
||||
}
|
||||
}
|
||||
|
||||
// If stdout closed before system:init arrived the process exited without initialising.
|
||||
// Emit an error line so the user understands why the connection failed.
|
||||
if !received_init.load(Ordering::SeqCst) {
|
||||
let _ = app.emit(
|
||||
"claude:output",
|
||||
OutputEvent {
|
||||
line_type: "error".to_string(),
|
||||
content: "Claude Code exited before initialising. Check the working directory and Claude Code installation, then try connecting again.".to_string(),
|
||||
tool_name: None,
|
||||
conversation_id: conversation_id.clone(),
|
||||
cost: None,
|
||||
parent_tool_use_id: None,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
emit_connection_status(&app, ConnectionStatus::Disconnected, conversation_id);
|
||||
}
|
||||
|
||||
@@ -919,6 +969,7 @@ fn process_json_line(
|
||||
app: &AppHandle,
|
||||
stats: &Arc<RwLock<UsageStats>>,
|
||||
conversation_id: &Option<String>,
|
||||
received_init: &Arc<AtomicBool>,
|
||||
) -> Result<(), String> {
|
||||
let message: ClaudeMessage = serde_json::from_str(line)
|
||||
.map_err(|e| format!("Failed to parse JSON: {} - Line: {}", e, line))?;
|
||||
@@ -931,6 +982,9 @@ fn process_json_line(
|
||||
..
|
||||
} => {
|
||||
if subtype == "init" {
|
||||
// Mark as initialised so the watchdog knows the process is healthy.
|
||||
received_init.store(true, Ordering::SeqCst);
|
||||
|
||||
if let Some(id) = session_id {
|
||||
let _ = app.emit(
|
||||
"claude:session",
|
||||
|
||||
Reference in New Issue
Block a user