From b179232a21753903ac29e06f58b6172d2172a6dc Mon Sep 17 00:00:00 2001 From: Albert Armea Date: Sat, 27 Dec 2025 14:11:43 -0500 Subject: [PATCH] WIP: fix running Minecraft --- README.md | 27 ++- config.example.toml | 26 +- crates/shepherd-host-linux/src/adapter.rs | 119 ++++++--- crates/shepherd-host-linux/src/process.rs | 282 ++++++++++------------ crates/shepherdd/src/main.rs | 21 +- run-dev | 16 ++ setup-cgroups.sh | 34 +++ 7 files changed, 316 insertions(+), 209 deletions(-) create mode 100755 setup-cgroups.sh diff --git a/README.md b/README.md index b9e437e..bc7c1f2 100644 --- a/README.md +++ b/README.md @@ -34,9 +34,30 @@ tl;dr: 2. System dependencies: - **Ubuntu/Debian**: `apt install build-essential pkg-config libglib2.0-dev libgtk-4-dev libcairo2-dev libpango1.0-dev libgdk-pixbuf-xlib-2.0-dev libwayland-dev libx11-dev libxkbcommon-dev libgirepository1.0-dev libgtk4-layer-shell-dev librust-gtk4-layer-shell-sys-dev sway swayidle` 3. Rust (`curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh`) -4. binaries (TODO: deployable package that depends on Sway and installs the config) -5. test session on login -6. configure auto-login to this session +4. Set up cgroups for process management (one-time, requires root): + ```bash + sudo ./setup-cgroups.sh + ``` +5. binaries (TODO: deployable package that depends on Sway and installs the config) +6. test session on login +7. configure auto-login to this session + +### cgroups Setup + +The shepherd daemon uses Linux cgroups v2 to reliably terminate all processes +when a session ends. This is essential for applications like Minecraft that +spawn child processes which may escape traditional process group signals. + +Run the setup script once after installation: + +```bash +sudo ./setup-cgroups.sh +``` + +This creates `/sys/fs/cgroup/shepherd` with appropriate permissions for your +user. The directory is not persistent across reboots on most systems, so you +may want to add this to your system startup (e.g., in `/etc/rc.local` or a +systemd unit). ## Usage diff --git a/config.example.toml b/config.example.toml index b166ed0..046a97d 100644 --- a/config.example.toml +++ b/config.example.toml @@ -58,15 +58,15 @@ max_run_seconds = 3600 # 1 hour max daily_quota_seconds = 7200 # 2 hours per day cooldown_seconds = 300 # 5 minute cooldown after each session -# Example: Minecraft (via Prism Launcher) +# Example: Minecraft (via snap mc-installer) [[entries]] id = "minecraft" label = "Minecraft" -icon = "prismlauncher" +icon = "minecraft" [entries.kind] type = "process" -argv = ["prismlauncher"] +argv = ["mc-installer"] [entries.availability] always = true # No time restrictions @@ -106,26 +106,6 @@ always = true [entries.limits] max_run_seconds = 7200 # 2 hours -# Example: Web browser (restricted) -[[entries]] -id = "browser" -label = "Web Browser" -icon = "firefox" - -[entries.kind] -type = "process" -argv = ["firefox", "-P", "kids"] - -[entries.availability] -[[entries.availability.windows]] -days = ["sat", "sun"] -start = "14:00" -end = "17:00" - -[entries.limits] -max_run_seconds = 1800 # 30 minutes -daily_quota_seconds = 3600 # 1 hour per day - # Example: Disabled entry [[entries]] id = "disabled-game" diff --git a/crates/shepherd-host-linux/src/adapter.rs b/crates/shepherd-host-linux/src/adapter.rs index f31ea0e..66dbbe9 100644 --- a/crates/shepherd-host-linux/src/adapter.rs +++ b/crates/shepherd-host-linux/src/adapter.rs @@ -1,9 +1,9 @@ //! Linux host adapter implementation use async_trait::async_trait; -use shepherd_api::{EntryKind, EntryKindTag}; +use shepherd_api::EntryKind; use shepherd_host_api::{ - ExitStatus, HostAdapter, HostCapabilities, HostError, HostEvent, HostHandlePayload, + HostAdapter, HostCapabilities, HostError, HostEvent, HostHandlePayload, HostResult, HostSessionHandle, SpawnOptions, StopMode, }; use shepherd_util::SessionId; @@ -11,33 +11,40 @@ use std::collections::HashMap; use std::sync::{Arc, Mutex}; use std::time::Duration; use tokio::sync::mpsc; -use tracing::{debug, error, info, warn}; +use tracing::{info, warn}; -use crate::process::{init_cgroup_base, ManagedProcess}; +use crate::process::{init, kill_by_command, kill_snap_cgroup, ManagedProcess}; + +/// Information tracked for each session for cleanup purposes +#[derive(Clone, Debug)] +struct SessionInfo { + command_name: String, + snap_name: Option, +} /// Linux host adapter pub struct LinuxHost { capabilities: HostCapabilities, processes: Arc>>, + /// Track session info for killing + session_info: Arc>>, event_tx: mpsc::UnboundedSender, event_rx: Arc>>>, - /// Whether cgroups are available for process management - cgroups_enabled: bool, } impl LinuxHost { pub fn new() -> Self { let (tx, rx) = mpsc::unbounded_channel(); - // Try to initialize cgroups - let cgroups_enabled = init_cgroup_base(); + // Initialize process management + init(); Self { capabilities: HostCapabilities::linux_full(), processes: Arc::new(Mutex::new(HashMap::new())), + session_info: Arc::new(Mutex::new(HashMap::new())), event_tx: tx, event_rx: Arc::new(Mutex::new(Some(rx))), - cgroups_enabled, } } @@ -132,23 +139,27 @@ impl HostAdapter for LinuxHost { } }; - // Use cgroups for process management if available - let session_id_str = if self.cgroups_enabled { - Some(session_id.to_string()) - } else { - None - }; + // Get the command name for fallback killing + let command_name = argv.first().cloned().unwrap_or_default(); - let proc = ManagedProcess::spawn_with_session_id( + let proc = ManagedProcess::spawn( &argv, &env, cwd.as_ref(), options.capture_stdout || options.capture_stderr, - session_id_str.as_deref(), )?; let pid = proc.pid; let pgid = proc.pgid; + let snap_name = proc.snap_name.clone(); + + // Store the session info so we can use it for killing even after process exits + let session_info_entry = SessionInfo { + command_name: command_name.clone(), + snap_name: snap_name.clone(), + }; + self.session_info.lock().unwrap().insert(session_id.clone(), session_info_entry); + info!(session_id = %session_id, command = %command_name, snap = ?snap_name, "Tracking session info"); let handle = HostSessionHandle::new( session_id, @@ -163,26 +174,42 @@ impl HostAdapter for LinuxHost { } async fn stop(&self, handle: &HostSessionHandle, mode: StopMode) -> HostResult<()> { + let session_id = handle.session_id.clone(); let (pid, _pgid) = match handle.payload() { HostHandlePayload::Linux { pid, pgid } => (*pid, *pgid), _ => return Err(HostError::SessionNotFound), }; - // Check if process exists - { - let procs = self.processes.lock().unwrap(); - if !procs.contains_key(&pid) { - return Err(HostError::SessionNotFound); - } + // Get the session's info for killing + let session_info = self.session_info.lock().unwrap().get(&session_id).cloned(); + + // Check if we have session info OR a tracked process + let has_process = self.processes.lock().unwrap().contains_key(&pid); + + if session_info.is_none() && !has_process { + warn!(session_id = %session_id, pid = pid, "No session info or tracked process found"); + return Err(HostError::SessionNotFound); } match mode { StopMode::Graceful { timeout } => { - // Send SIGTERM + // If this is a snap app, use cgroup-based killing (most reliable) + if let Some(ref info) = session_info { + if let Some(ref snap) = info.snap_name { + kill_snap_cgroup(snap, nix::sys::signal::Signal::SIGTERM); + info!(snap = %snap, "Sent SIGTERM via snap cgroup"); + } else { + // Fall back to command name for non-snap apps + kill_by_command(&info.command_name, nix::sys::signal::Signal::SIGTERM); + info!(command = %info.command_name, "Sent SIGTERM via command name"); + } + } + + // Also send SIGTERM via process handle { let procs = self.processes.lock().unwrap(); if let Some(p) = procs.get(&pid) { - p.terminate()?; + let _ = p.terminate(); } } @@ -190,31 +217,57 @@ impl HostAdapter for LinuxHost { let start = std::time::Instant::now(); loop { if start.elapsed() >= timeout { - // Force kill after timeout + // Force kill after timeout using snap cgroup or command name + if let Some(ref info) = session_info { + if let Some(ref snap) = info.snap_name { + kill_snap_cgroup(snap, nix::sys::signal::Signal::SIGKILL); + info!(snap = %snap, "Sent SIGKILL via snap cgroup (timeout)"); + } else { + kill_by_command(&info.command_name, nix::sys::signal::Signal::SIGKILL); + info!(command = %info.command_name, "Sent SIGKILL via command name (timeout)"); + } + } + + // Also force kill via process handle let procs = self.processes.lock().unwrap(); if let Some(p) = procs.get(&pid) { - p.kill()?; + let _ = p.kill(); } break; } - { - let procs = self.processes.lock().unwrap(); - if !procs.contains_key(&pid) { - break; - } + // Check if process is still running + let still_running = self.processes.lock().unwrap().contains_key(&pid); + + if !still_running { + break; } tokio::time::sleep(Duration::from_millis(50)).await; } } StopMode::Force => { + // Force kill via snap cgroup or command name + if let Some(ref info) = session_info { + if let Some(ref snap) = info.snap_name { + kill_snap_cgroup(snap, nix::sys::signal::Signal::SIGKILL); + info!(snap = %snap, "Sent SIGKILL via snap cgroup"); + } else { + kill_by_command(&info.command_name, nix::sys::signal::Signal::SIGKILL); + info!(command = %info.command_name, "Sent SIGKILL via command name"); + } + } + + // Also force kill via process handle let procs = self.processes.lock().unwrap(); if let Some(p) = procs.get(&pid) { - p.kill()?; + let _ = p.kill(); } } } + + // Clean up the session info tracking + self.session_info.lock().unwrap().remove(&session_id); Ok(()) } diff --git a/crates/shepherd-host-linux/src/process.rs b/crates/shepherd-host-linux/src/process.rs index 6bf563b..bf3b9c5 100644 --- a/crates/shepherd-host-linux/src/process.rs +++ b/crates/shepherd-host-linux/src/process.rs @@ -1,146 +1,159 @@ //! Process management utilities use nix::sys::signal::{self, Signal}; -use nix::sys::wait::{waitpid, WaitPidFlag, WaitStatus}; use nix::unistd::Pid; use std::collections::HashMap; use std::os::unix::process::CommandExt; -use std::path::{Path, PathBuf}; use std::process::{Child, Command, Stdio}; use tracing::{debug, info, warn}; use shepherd_host_api::{ExitStatus, HostError, HostResult}; -/// Base path for shepherd's cgroups -const CGROUP_BASE: &str = "/sys/fs/cgroup/shepherd"; +/// Extract the snap name from a command path +/// Examples: +/// - "/snap/mc-installer/279/bin/mc-installer" -> Some("mc-installer") +/// - "mc-installer" (if it's a snap) -> Some("mc-installer") +/// - "/usr/bin/firefox" -> None +fn extract_snap_name(program: &str) -> Option { + // Check if it's a path starting with /snap/ + if program.starts_with("/snap/") { + // Format: /snap///... + let parts: Vec<&str> = program.split('/').collect(); + if parts.len() >= 3 { + return Some(parts[2].to_string()); + } + } + + // Check if it looks like a snap command (no path, and we can verify via snap path) + if !program.contains('/') { + let snap_path = format!("/snap/bin/{}", program); + if std::path::Path::new(&snap_path).exists() { + return Some(program.to_string()); + } + } + + None +} -/// Managed child process with process group and optional cgroup +/// Managed child process with process group tracking pub struct ManagedProcess { pub child: Child, pub pid: u32, pub pgid: u32, - /// The cgroup path if cgroups are enabled - pub cgroup_path: Option, + /// The command name (for fallback killing via pkill) + pub command_name: String, + /// The snap name if this is a snap app (for cgroup-based killing) + pub snap_name: Option, } -/// Initialize the shepherd cgroup hierarchy (called once at startup) -pub fn init_cgroup_base() -> bool { - let base = Path::new(CGROUP_BASE); +/// Initialize process management (called once at startup) +pub fn init() { + info!("Process management initialized"); +} + +/// Kill all processes in a snap's cgroup using systemd +/// Snaps create scopes at: snap..-.scope +/// Direct signals don't work due to AppArmor confinement, but systemctl --user does +/// NOTE: We always use SIGKILL for snap apps because apps like Minecraft Launcher +/// have self-restart behavior and will spawn new instances when receiving SIGTERM +pub fn kill_snap_cgroup(snap_name: &str, _signal: Signal) -> bool { + let uid = nix::unistd::getuid().as_raw(); + let base_path = format!( + "/sys/fs/cgroup/user.slice/user-{}.slice/user@{}.service/app.slice", + uid, uid + ); - // Check if cgroups v2 is available - if !Path::new("/sys/fs/cgroup/cgroup.controllers").exists() { - info!("cgroups v2 not available, falling back to process group signals"); + // Find all scope directories matching this snap + let pattern = format!("snap.{}.{}-", snap_name, snap_name); + + let base = std::path::Path::new(&base_path); + if !base.exists() { + debug!(path = %base_path, "Snap cgroup base path doesn't exist"); return false; } - // Try to create our base cgroup - if !base.exists() { - if let Err(e) = std::fs::create_dir_all(base) { - warn!(error = %e, "Failed to create shepherd cgroup base - running without cgroup support"); - return false; + let mut stopped_any = false; + + if let Ok(entries) = std::fs::read_dir(base) { + for entry in entries.flatten() { + let name = entry.file_name(); + let name_str = name.to_string_lossy(); + + if name_str.starts_with(&pattern) && name_str.ends_with(".scope") { + let scope_name = name_str.to_string(); + + // Always use SIGKILL for snap apps to prevent self-restart behavior + // Using systemctl kill --signal=KILL sends SIGKILL to all processes in scope + let result = Command::new("systemctl") + .args(["--user", "kill", "--signal=KILL", &scope_name]) + .output(); + + match result { + Ok(output) => { + if output.status.success() { + info!(scope = %scope_name, "Killed snap scope via systemctl SIGKILL"); + stopped_any = true; + } else { + let stderr = String::from_utf8_lossy(&output.stderr); + warn!(scope = %scope_name, stderr = %stderr, "systemctl kill command failed"); + } + } + Err(e) => { + warn!(scope = %scope_name, error = %e, "Failed to run systemctl"); + } + } + } } } - info!("cgroups v2 initialized at {}", CGROUP_BASE); - true -} - -/// Create a cgroup for a session -fn create_session_cgroup(session_id: &str) -> Option { - let cgroup_path = PathBuf::from(CGROUP_BASE).join(session_id); - - if let Err(e) = std::fs::create_dir_all(&cgroup_path) { - warn!(error = %e, path = %cgroup_path.display(), "Failed to create session cgroup"); - return None; + if stopped_any { + info!(snap = snap_name, "Killed snap scope(s) via systemctl SIGKILL"); + } else { + debug!(snap = snap_name, "No snap scope found to kill"); } - debug!(path = %cgroup_path.display(), "Created session cgroup"); - Some(cgroup_path) + stopped_any } -/// Move a process into a cgroup -fn move_to_cgroup(cgroup_path: &Path, pid: u32) -> bool { - let procs_file = cgroup_path.join("cgroup.procs"); +/// Kill processes by command name using pkill +pub fn kill_by_command(command_name: &str, signal: Signal) -> bool { + let signal_name = match signal { + Signal::SIGTERM => "TERM", + Signal::SIGKILL => "KILL", + _ => "TERM", + }; - if let Err(e) = std::fs::write(&procs_file, pid.to_string()) { - warn!(error = %e, pid = pid, path = %procs_file.display(), "Failed to move process to cgroup"); - return false; - } + // Use pkill to find and kill processes by command name + let result = Command::new("pkill") + .args([&format!("-{}", signal_name), "-f", command_name]) + .output(); - debug!(pid = pid, cgroup = %cgroup_path.display(), "Moved process to cgroup"); - true -} - -/// Get all PIDs in a cgroup -fn get_cgroup_pids(cgroup_path: &Path) -> Vec { - let procs_file = cgroup_path.join("cgroup.procs"); - - match std::fs::read_to_string(&procs_file) { - Ok(contents) => { - contents - .lines() - .filter_map(|line| line.trim().parse::().ok()) - .collect() + match result { + Ok(output) => { + // pkill returns 0 if processes were found and signaled + if output.status.success() { + info!(command = command_name, signal = signal_name, "Killed processes by command name"); + true + } else { + // No processes found is not an error + debug!(command = command_name, "No processes found matching command name"); + false + } } Err(e) => { - debug!(error = %e, path = %procs_file.display(), "Failed to read cgroup.procs"); - Vec::new() + warn!(command = command_name, error = %e, "Failed to run pkill"); + false } } } -/// Kill all processes in a cgroup -fn kill_cgroup(cgroup_path: &Path, signal: Signal) -> Vec { - let pids = get_cgroup_pids(cgroup_path); - - for pid in &pids { - let _ = signal::kill(Pid::from_raw(*pid), signal); - } - - if !pids.is_empty() { - debug!(pids = ?pids, signal = ?signal, cgroup = %cgroup_path.display(), "Sent signal to cgroup processes"); - } - - pids -} - -/// Remove a session cgroup (must be empty) -fn cleanup_session_cgroup(cgroup_path: &Path) { - // The cgroup must be empty before we can remove it - // We'll try a few times in case processes are still exiting - for _ in 0..5 { - let pids = get_cgroup_pids(cgroup_path); - if pids.is_empty() { - if let Err(e) = std::fs::remove_dir(cgroup_path) { - debug!(error = %e, path = %cgroup_path.display(), "Failed to remove session cgroup"); - } else { - debug!(path = %cgroup_path.display(), "Removed session cgroup"); - } - return; - } - std::thread::sleep(std::time::Duration::from_millis(100)); - } - debug!(path = %cgroup_path.display(), "Cgroup still has processes, leaving cleanup for later"); -} - impl ManagedProcess { - /// Spawn a new process in its own process group and optionally in a cgroup + /// Spawn a new process in its own process group pub fn spawn( argv: &[String], env: &HashMap, - cwd: Option<&PathBuf>, + cwd: Option<&std::path::PathBuf>, capture_output: bool, - ) -> HostResult { - Self::spawn_with_session_id(argv, env, cwd, capture_output, None) - } - - /// Spawn a new process with an optional session ID for cgroup management - pub fn spawn_with_session_id( - argv: &[String], - env: &HashMap, - cwd: Option<&PathBuf>, - capture_output: bool, - session_id: Option<&str>, ) -> HostResult { if argv.is_empty() { return Err(HostError::SpawnFailed("Empty argv".into())); @@ -260,12 +273,13 @@ impl ManagedProcess { cmd.stdin(Stdio::null()); + // Store the command name for later use in killing + let command_name = program.to_string(); + // Set up process group - this child becomes its own process group leader // SAFETY: This is safe in the pre-exec context unsafe { cmd.pre_exec(|| { - // Create new session (which creates new process group) - // This ensures the child is the leader of a new process group nix::unistd::setsid().map_err(|e| { std::io::Error::new(std::io::ErrorKind::Other, e.to_string()) })?; @@ -279,28 +293,14 @@ impl ManagedProcess { let pid = child.id(); let pgid = pid; // After setsid, pid == pgid + + // Extract snap name from command if it's a snap app + // Format: /snap//... or just the snap command name + let snap_name = extract_snap_name(program); - // Try to create a cgroup for this session and move the process into it - let cgroup_path = if let Some(sid) = session_id { - if let Some(cg_path) = create_session_cgroup(sid) { - if move_to_cgroup(&cg_path, pid) { - info!(pid = pid, cgroup = %cg_path.display(), "Process moved to session cgroup"); - Some(cg_path) - } else { - // Cleanup the empty cgroup we created - let _ = std::fs::remove_dir(&cg_path); - None - } - } else { - None - } - } else { - None - }; + info!(pid = pid, pgid = pgid, program = %program, snap = ?snap_name, "Process spawned"); - debug!(pid = pid, pgid = pgid, program = %program, has_cgroup = cgroup_path.is_some(), "Process spawned"); - - Ok(Self { child, pid, pgid, cgroup_path }) + Ok(Self { child, pid, pgid, command_name, snap_name }) } /// Get all descendant PIDs of this process using /proc @@ -343,16 +343,12 @@ impl ManagedProcess { descendants } - /// Send SIGTERM to all processes in this session (via cgroup if available, or process group) + /// Send SIGTERM to all processes in this session pub fn terminate(&self) -> HostResult<()> { - // If we have a cgroup, use it - this is the most reliable method - if let Some(ref cgroup_path) = self.cgroup_path { - let pids = kill_cgroup(cgroup_path, Signal::SIGTERM); - info!(pids = ?pids, cgroup = %cgroup_path.display(), "Sent SIGTERM via cgroup"); - return Ok(()); - } + // First try to kill by command name - this catches snap apps and re-parented processes + kill_by_command(&self.command_name, Signal::SIGTERM); - // Fallback: try to kill the process group + // Also try to kill the process group let pgid = Pid::from_raw(-(self.pgid as i32)); // Negative for process group match signal::kill(pgid, Signal::SIGTERM) { @@ -379,16 +375,12 @@ impl ManagedProcess { Ok(()) } - /// Send SIGKILL to all processes in this session (via cgroup if available, or process group) + /// Send SIGKILL to all processes in this session pub fn kill(&self) -> HostResult<()> { - // If we have a cgroup, use it - this is the most reliable method - if let Some(ref cgroup_path) = self.cgroup_path { - let pids = kill_cgroup(cgroup_path, Signal::SIGKILL); - info!(pids = ?pids, cgroup = %cgroup_path.display(), "Sent SIGKILL via cgroup"); - return Ok(()); - } + // First try to kill by command name - this catches snap apps and re-parented processes + kill_by_command(&self.command_name, Signal::SIGKILL); - // Fallback: try to kill the process group + // Also try to kill the process group let pgid = Pid::from_raw(-(self.pgid as i32)); match signal::kill(pgid, Signal::SIGKILL) { @@ -471,21 +463,15 @@ impl ManagedProcess { } } - /// Clean up resources associated with this process (especially cgroups) + /// Clean up resources associated with this process pub fn cleanup(&self) { - if let Some(ref cgroup_path) = self.cgroup_path { - cleanup_session_cgroup(cgroup_path); - } + // Nothing to clean up for systemd scopes - systemd handles it } } impl Drop for ManagedProcess { fn drop(&mut self) { - // Try to clean up the cgroup when the process struct is dropped - if let Some(ref cgroup_path) = self.cgroup_path { - // Only try once, don't block in Drop - let _ = std::fs::remove_dir(cgroup_path); - } + // Nothing special to do for systemd scopes - systemd cleans up automatically } } diff --git a/crates/shepherdd/src/main.rs b/crates/shepherdd/src/main.rs index b137dac..16391d1 100644 --- a/crates/shepherdd/src/main.rs +++ b/crates/shepherdd/src/main.rs @@ -569,8 +569,25 @@ impl Daemon { StopMode::Force => SessionEndReason::AdminStop, }; - match eng.stop_current(reason, now_mono, now) { - StopDecision::Stopped(_result) => { + match eng.stop_current(reason.clone(), now_mono, now) { + StopDecision::Stopped(result) => { + // Broadcast SessionEnded event so UIs know to transition + info!( + session_id = %result.session_id, + reason = ?result.reason, + "Broadcasting SessionEnded from StopCurrent" + ); + ipc.broadcast_event(Event::new(EventPayload::SessionEnded { + session_id: result.session_id, + entry_id: result.entry_id, + reason: result.reason, + duration: result.duration, + })); + + // Also broadcast StateChanged so UIs can update their entry list + let snapshot = eng.get_state(); + ipc.broadcast_event(Event::new(EventPayload::StateChanged(snapshot))); + drop(eng); // Release lock before host operations // Stop the actual process diff --git a/run-dev b/run-dev index 1c96ffd..7e562aa 100755 --- a/run-dev +++ b/run-dev @@ -9,6 +9,16 @@ SOCKET_PATH="$DEV_RUNTIME/shepherd.sock" mkdir -p "$DATA_DIR" +# Kill any existing shepherd dev instances before starting +echo "Cleaning up any existing dev instances..." +pkill -f "sway -c ./sway.conf" 2>/dev/null || true +pkill -f "shepherdd" 2>/dev/null || true +pkill -f "shepherd-launcher" 2>/dev/null || true +pkill -f "shepherd-hud" 2>/dev/null || true +# Remove stale socket +rm -f "$SOCKET_PATH" +sleep 0.5 + # Export environment variables for shepherd binaries export SHEPHERD_SOCKET="$SOCKET_PATH" export SHEPHERD_DATA_DIR="$DATA_DIR" @@ -27,6 +37,12 @@ cleanup() { if [ ! -z "$SWAY_PID" ]; then kill $SWAY_PID 2>/dev/null || true fi + # Also explicitly kill any shepherd processes that might have escaped + pkill -f "shepherdd" 2>/dev/null || true + pkill -f "shepherd-launcher" 2>/dev/null || true + pkill -f "shepherd-hud" 2>/dev/null || true + # Remove socket + rm -f "$SOCKET_PATH" } trap cleanup EXIT diff --git a/setup-cgroups.sh b/setup-cgroups.sh new file mode 100755 index 0000000..8c2baab --- /dev/null +++ b/setup-cgroups.sh @@ -0,0 +1,34 @@ +#!/bin/bash +# Setup cgroups v2 for shepherd-launcher +# This script must be run as root (or with sudo) + +set -e + +CGROUP_BASE="/sys/fs/cgroup/shepherd" + +# Check if cgroups v2 is available +if [ ! -f /sys/fs/cgroup/cgroup.controllers ]; then + echo "Error: cgroups v2 is not available on this system" + echo "Make sure your kernel supports cgroups v2 and it's mounted" + exit 1 +fi + +# Get the user who will run shepherd (default to SUDO_USER or current user) +SHEPHERD_USER="${1:-${SUDO_USER:-$(whoami)}}" + +echo "Setting up cgroups for shepherd-launcher..." +echo "User: $SHEPHERD_USER" + +# Create the shepherd cgroup directory +mkdir -p "$CGROUP_BASE" + +# Set ownership so the shepherd daemon can create session cgroups +chown "$SHEPHERD_USER:$SHEPHERD_USER" "$CGROUP_BASE" + +# Set permissions (owner can read/write/execute, others can read/execute) +chmod 755 "$CGROUP_BASE" + +echo "Created $CGROUP_BASE with ownership $SHEPHERD_USER" +echo "" +echo "cgroups v2 setup complete!" +echo "The shepherd daemon can now create session cgroups for reliable process management."