shepherd-launcher/crates/shepherd-host-linux/src/process.rs
2025-12-27 14:18:32 -05:00

522 lines
18 KiB
Rust

//! Process management utilities
use nix::sys::signal::{self, Signal};
use nix::unistd::Pid;
use std::collections::HashMap;
use std::os::unix::process::CommandExt;
use std::process::{Child, Command, Stdio};
use tracing::{debug, info, warn};
use shepherd_host_api::{ExitStatus, HostError, HostResult};
/// Extract the snap name from a command path
/// Examples:
/// - "/snap/mc-installer/279/bin/mc-installer" -> Some("mc-installer")
/// - "mc-installer" (if it's a snap) -> Some("mc-installer")
/// - "/usr/bin/firefox" -> None
fn extract_snap_name(program: &str) -> Option<String> {
// Check if it's a path starting with /snap/
if program.starts_with("/snap/") {
// Format: /snap/<snap-name>/<revision>/...
let parts: Vec<&str> = program.split('/').collect();
if parts.len() >= 3 {
return Some(parts[2].to_string());
}
}
// Check if it looks like a snap command (no path, and we can verify via snap path)
if !program.contains('/') {
let snap_path = format!("/snap/bin/{}", program);
if std::path::Path::new(&snap_path).exists() {
return Some(program.to_string());
}
}
None
}
/// Managed child process with process group tracking
pub struct ManagedProcess {
pub child: Child,
pub pid: u32,
pub pgid: u32,
/// The command name (for fallback killing via pkill)
pub command_name: String,
/// The snap name if this is a snap app (for cgroup-based killing)
pub snap_name: Option<String>,
}
/// Initialize process management (called once at startup)
pub fn init() {
info!("Process management initialized");
}
/// Kill all processes in a snap's cgroup using systemd
/// Snaps create scopes at: snap.<snap-name>.<snap-name>-<uuid>.scope
/// Direct signals don't work due to AppArmor confinement, but systemctl --user does
/// NOTE: We always use SIGKILL for snap apps because apps like Minecraft Launcher
/// have self-restart behavior and will spawn new instances when receiving SIGTERM
pub fn kill_snap_cgroup(snap_name: &str, _signal: Signal) -> bool {
let uid = nix::unistd::getuid().as_raw();
let base_path = format!(
"/sys/fs/cgroup/user.slice/user-{}.slice/user@{}.service/app.slice",
uid, uid
);
// Find all scope directories matching this snap
let pattern = format!("snap.{}.{}-", snap_name, snap_name);
let base = std::path::Path::new(&base_path);
if !base.exists() {
debug!(path = %base_path, "Snap cgroup base path doesn't exist");
return false;
}
let mut stopped_any = false;
if let Ok(entries) = std::fs::read_dir(base) {
for entry in entries.flatten() {
let name = entry.file_name();
let name_str = name.to_string_lossy();
if name_str.starts_with(&pattern) && name_str.ends_with(".scope") {
let scope_name = name_str.to_string();
// Always use SIGKILL for snap apps to prevent self-restart behavior
// Using systemctl kill --signal=KILL sends SIGKILL to all processes in scope
let result = Command::new("systemctl")
.args(["--user", "kill", "--signal=KILL", &scope_name])
.output();
match result {
Ok(output) => {
if output.status.success() {
info!(scope = %scope_name, "Killed snap scope via systemctl SIGKILL");
stopped_any = true;
} else {
let stderr = String::from_utf8_lossy(&output.stderr);
warn!(scope = %scope_name, stderr = %stderr, "systemctl kill command failed");
}
}
Err(e) => {
warn!(scope = %scope_name, error = %e, "Failed to run systemctl");
}
}
}
}
}
if stopped_any {
info!(snap = snap_name, "Killed snap scope(s) via systemctl SIGKILL");
} else {
debug!(snap = snap_name, "No snap scope found to kill");
}
stopped_any
}
/// Kill processes by command name using pkill
pub fn kill_by_command(command_name: &str, signal: Signal) -> bool {
let signal_name = match signal {
Signal::SIGTERM => "TERM",
Signal::SIGKILL => "KILL",
_ => "TERM",
};
// Use pkill to find and kill processes by command name
let result = Command::new("pkill")
.args([&format!("-{}", signal_name), "-f", command_name])
.output();
match result {
Ok(output) => {
// pkill returns 0 if processes were found and signaled
if output.status.success() {
info!(command = command_name, signal = signal_name, "Killed processes by command name");
true
} else {
// No processes found is not an error
debug!(command = command_name, "No processes found matching command name");
false
}
}
Err(e) => {
warn!(command = command_name, error = %e, "Failed to run pkill");
false
}
}
}
impl ManagedProcess {
/// Spawn a new process in its own process group
pub fn spawn(
argv: &[String],
env: &HashMap<String, String>,
cwd: Option<&std::path::PathBuf>,
capture_output: bool,
) -> HostResult<Self> {
if argv.is_empty() {
return Err(HostError::SpawnFailed("Empty argv".into()));
}
let program = &argv[0];
let args = &argv[1..];
let mut cmd = Command::new(program);
cmd.args(args);
// Set environment
cmd.env_clear();
// Inherit essential environment variables
// These are needed for most Linux applications to work correctly
let inherit_vars = [
// Core paths
"PATH",
"HOME",
"USER",
"SHELL",
// Display/graphics - both X11 and Wayland
"DISPLAY",
"WAYLAND_DISPLAY",
"XDG_RUNTIME_DIR",
"XDG_SESSION_TYPE",
"XDG_SESSION_DESKTOP",
"XDG_CURRENT_DESKTOP",
// X11 authorization (needed for XWayland apps)
"XAUTHORITY",
// XDG directories (needed for app data/config)
"XDG_DATA_HOME",
"XDG_CONFIG_HOME",
"XDG_CACHE_HOME",
"XDG_STATE_HOME",
"XDG_DATA_DIRS",
"XDG_CONFIG_DIRS",
// Snap support (critical for Snap apps like Minecraft)
"SNAP",
"SNAP_USER_DATA",
"SNAP_USER_COMMON",
"SNAP_REAL_HOME",
"SNAP_NAME",
"SNAP_INSTANCE_NAME",
"SNAP_ARCH",
"SNAP_VERSION",
"SNAP_REVISION",
"SNAP_COMMON",
"SNAP_DATA",
"SNAP_LIBRARY_PATH",
// Locale
"LANG",
"LANGUAGE",
"LC_ALL",
// D-Bus (needed for many GUI apps)
"DBUS_SESSION_BUS_ADDRESS",
// Graphics/GPU
"LIBGL_ALWAYS_SOFTWARE",
"__GLX_VENDOR_LIBRARY_NAME",
"VK_ICD_FILENAMES",
"MESA_LOADER_DRIVER_OVERRIDE",
// Audio
"PULSE_SERVER",
"PULSE_COOKIE",
// GTK/GLib settings (needed for proper theming and SSL)
"GTK_MODULES",
"GIO_EXTRA_MODULES",
"GSETTINGS_SCHEMA_DIR",
"GSETTINGS_BACKEND",
// SSL/TLS certificate locations
"SSL_CERT_FILE",
"SSL_CERT_DIR",
"CURL_CA_BUNDLE",
"REQUESTS_CA_BUNDLE",
// Desktop session info (needed for portal integration)
"DESKTOP_SESSION",
"GNOME_DESKTOP_SESSION_ID",
];
for var in inherit_vars {
if let Ok(val) = std::env::var(var) {
cmd.env(var, val);
}
}
// Special handling for WAYLAND_DISPLAY:
// If SHEPHERD_WAYLAND_DISPLAY is set, use that instead of the inherited value.
// This allows apps to be launched on a nested compositor while the daemon
// runs on the parent compositor. When the daemon runs inside the nested
// compositor, this is not needed as WAYLAND_DISPLAY is already correct.
if let Ok(shepherd_display) = std::env::var("SHEPHERD_WAYLAND_DISPLAY") {
debug!(display = %shepherd_display, "Using SHEPHERD_WAYLAND_DISPLAY override for child process");
cmd.env("WAYLAND_DISPLAY", shepherd_display);
}
// Add custom environment (these can override inherited vars)
for (k, v) in env {
cmd.env(k, v);
}
// Set working directory
if let Some(dir) = cwd {
cmd.current_dir(dir);
}
// Configure output capture
// For debugging, inherit stdout/stderr so we can see errors
if capture_output {
cmd.stdout(Stdio::piped());
cmd.stderr(Stdio::piped());
} else {
// Inherit from parent so we can see child output for debugging
cmd.stdout(Stdio::inherit());
cmd.stderr(Stdio::inherit());
}
cmd.stdin(Stdio::null());
// Store the command name for later use in killing
let command_name = program.to_string();
// Set up process group - this child becomes its own process group leader
// SAFETY: This is safe in the pre-exec context
unsafe {
cmd.pre_exec(|| {
nix::unistd::setsid().map_err(|e| {
std::io::Error::new(std::io::ErrorKind::Other, e.to_string())
})?;
Ok(())
});
}
let child = cmd.spawn().map_err(|e| {
HostError::SpawnFailed(format!("Failed to spawn {}: {}", program, e))
})?;
let pid = child.id();
let pgid = pid; // After setsid, pid == pgid
// Extract snap name from command if it's a snap app
// Format: /snap/<snap-name>/... or just the snap command name
let snap_name = extract_snap_name(program);
info!(pid = pid, pgid = pgid, program = %program, snap = ?snap_name, "Process spawned");
Ok(Self { child, pid, pgid, command_name, snap_name })
}
/// Get all descendant PIDs of this process using /proc
fn get_descendant_pids(&self) -> Vec<i32> {
let mut descendants = Vec::new();
let mut to_check = vec![self.pid as i32];
while let Some(parent_pid) = to_check.pop() {
// Read /proc to find children of this PID
if let Ok(entries) = std::fs::read_dir("/proc") {
for entry in entries.flatten() {
let name = entry.file_name();
let name_str = name.to_string_lossy();
// Skip non-numeric entries (not PIDs)
if let Ok(pid) = name_str.parse::<i32>() {
// Read the stat file to get parent PID
let stat_path = format!("/proc/{}/stat", pid);
if let Ok(stat) = std::fs::read_to_string(&stat_path) {
// Format: pid (comm) state ppid ...
// Find the closing paren to handle comm with spaces/parens
if let Some(paren_end) = stat.rfind(')') {
let after_comm = &stat[paren_end + 2..];
let fields: Vec<&str> = after_comm.split_whitespace().collect();
if fields.len() >= 2 {
if let Ok(ppid) = fields[1].parse::<i32>() {
if ppid == parent_pid {
descendants.push(pid);
to_check.push(pid);
}
}
}
}
}
}
}
}
}
descendants
}
/// Send SIGTERM to all processes in this session
pub fn terminate(&self) -> HostResult<()> {
// First try to kill by command name - this catches snap apps and re-parented processes
kill_by_command(&self.command_name, Signal::SIGTERM);
// Also try to kill the process group
let pgid = Pid::from_raw(-(self.pgid as i32)); // Negative for process group
match signal::kill(pgid, Signal::SIGTERM) {
Ok(()) => {
debug!(pgid = self.pgid, "Sent SIGTERM to process group");
}
Err(nix::errno::Errno::ESRCH) => {
// Process group already gone
}
Err(e) => {
debug!(pgid = self.pgid, error = %e, "Failed to send SIGTERM to process group");
}
}
// Also kill all descendants (they may have escaped the process group)
let descendants = self.get_descendant_pids();
for pid in &descendants {
let _ = signal::kill(Pid::from_raw(*pid), Signal::SIGTERM);
}
if !descendants.is_empty() {
debug!(descendants = ?descendants, "Sent SIGTERM to descendant processes");
}
Ok(())
}
/// Send SIGKILL to all processes in this session
pub fn kill(&self) -> HostResult<()> {
// First try to kill by command name - this catches snap apps and re-parented processes
kill_by_command(&self.command_name, Signal::SIGKILL);
// Also try to kill the process group
let pgid = Pid::from_raw(-(self.pgid as i32));
match signal::kill(pgid, Signal::SIGKILL) {
Ok(()) => {
debug!(pgid = self.pgid, "Sent SIGKILL to process group");
}
Err(nix::errno::Errno::ESRCH) => {
// Process group already gone
}
Err(e) => {
debug!(pgid = self.pgid, error = %e, "Failed to send SIGKILL to process group");
}
}
// Also kill all descendants (they may have escaped the process group)
let descendants = self.get_descendant_pids();
for pid in &descendants {
let _ = signal::kill(Pid::from_raw(*pid), Signal::SIGKILL);
}
if !descendants.is_empty() {
debug!(descendants = ?descendants, "Sent SIGKILL to descendant processes");
}
Ok(())
}
/// Check if the process has exited (non-blocking)
pub fn try_wait(&mut self) -> HostResult<Option<ExitStatus>> {
match self.child.try_wait() {
Ok(Some(status)) => {
let exit_status = if let Some(code) = status.code() {
ExitStatus::with_code(code)
} else {
// Killed by signal
#[cfg(unix)]
{
use std::os::unix::process::ExitStatusExt;
if let Some(sig) = status.signal() {
ExitStatus::signaled(sig)
} else {
ExitStatus::with_code(-1)
}
}
#[cfg(not(unix))]
{
ExitStatus::with_code(-1)
}
};
Ok(Some(exit_status))
}
Ok(None) => Ok(None), // Still running
Err(e) => Err(HostError::Internal(format!("Wait failed: {}", e))),
}
}
/// Wait for the process to exit (blocking)
pub fn wait(&mut self) -> HostResult<ExitStatus> {
match self.child.wait() {
Ok(status) => {
let exit_status = if let Some(code) = status.code() {
ExitStatus::with_code(code)
} else {
#[cfg(unix)]
{
use std::os::unix::process::ExitStatusExt;
if let Some(sig) = status.signal() {
ExitStatus::signaled(sig)
} else {
ExitStatus::with_code(-1)
}
}
#[cfg(not(unix))]
{
ExitStatus::with_code(-1)
}
};
Ok(exit_status)
}
Err(e) => Err(HostError::Internal(format!("Wait failed: {}", e))),
}
}
/// Clean up resources associated with this process
pub fn cleanup(&self) {
// Nothing to clean up for systemd scopes - systemd handles it
}
}
impl Drop for ManagedProcess {
fn drop(&mut self) {
// Nothing special to do for systemd scopes - systemd cleans up automatically
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn spawn_simple_process() {
let argv = vec!["true".to_string()];
let env = HashMap::new();
let mut proc = ManagedProcess::spawn(&argv, &env, None, false).unwrap();
// Wait for it to complete
let status = proc.wait().unwrap();
assert!(status.is_success());
}
#[test]
fn spawn_with_args() {
let argv = vec!["echo".to_string(), "hello".to_string()];
let env = HashMap::new();
let mut proc = ManagedProcess::spawn(&argv, &env, None, false).unwrap();
let status = proc.wait().unwrap();
assert!(status.is_success());
}
#[test]
fn terminate_sleeping_process() {
let argv = vec!["sleep".to_string(), "60".to_string()];
let env = HashMap::new();
let proc = ManagedProcess::spawn(&argv, &env, None, false).unwrap();
// Give it a moment to start
std::thread::sleep(std::time::Duration::from_millis(50));
// Terminate it
proc.terminate().unwrap();
// Wait a bit and check
std::thread::sleep(std::time::Duration::from_millis(100));
// Process should be gone or terminating
}
}