WIP: fix running Minecraft
This commit is contained in:
parent
77a167e45f
commit
b179232a21
7 changed files with 316 additions and 209 deletions
27
README.md
27
README.md
|
|
@ -34,9 +34,30 @@ tl;dr:
|
||||||
2. System dependencies:
|
2. System dependencies:
|
||||||
- **Ubuntu/Debian**: `apt install build-essential pkg-config libglib2.0-dev libgtk-4-dev libcairo2-dev libpango1.0-dev libgdk-pixbuf-xlib-2.0-dev libwayland-dev libx11-dev libxkbcommon-dev libgirepository1.0-dev libgtk4-layer-shell-dev librust-gtk4-layer-shell-sys-dev sway swayidle`
|
- **Ubuntu/Debian**: `apt install build-essential pkg-config libglib2.0-dev libgtk-4-dev libcairo2-dev libpango1.0-dev libgdk-pixbuf-xlib-2.0-dev libwayland-dev libx11-dev libxkbcommon-dev libgirepository1.0-dev libgtk4-layer-shell-dev librust-gtk4-layer-shell-sys-dev sway swayidle`
|
||||||
3. Rust (`curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh`)
|
3. Rust (`curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh`)
|
||||||
4. binaries (TODO: deployable package that depends on Sway and installs the config)
|
4. Set up cgroups for process management (one-time, requires root):
|
||||||
5. test session on login
|
```bash
|
||||||
6. configure auto-login to this session
|
sudo ./setup-cgroups.sh
|
||||||
|
```
|
||||||
|
5. binaries (TODO: deployable package that depends on Sway and installs the config)
|
||||||
|
6. test session on login
|
||||||
|
7. configure auto-login to this session
|
||||||
|
|
||||||
|
### cgroups Setup
|
||||||
|
|
||||||
|
The shepherd daemon uses Linux cgroups v2 to reliably terminate all processes
|
||||||
|
when a session ends. This is essential for applications like Minecraft that
|
||||||
|
spawn child processes which may escape traditional process group signals.
|
||||||
|
|
||||||
|
Run the setup script once after installation:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sudo ./setup-cgroups.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
This creates `/sys/fs/cgroup/shepherd` with appropriate permissions for your
|
||||||
|
user. The directory is not persistent across reboots on most systems, so you
|
||||||
|
may want to add this to your system startup (e.g., in `/etc/rc.local` or a
|
||||||
|
systemd unit).
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -58,15 +58,15 @@ max_run_seconds = 3600 # 1 hour max
|
||||||
daily_quota_seconds = 7200 # 2 hours per day
|
daily_quota_seconds = 7200 # 2 hours per day
|
||||||
cooldown_seconds = 300 # 5 minute cooldown after each session
|
cooldown_seconds = 300 # 5 minute cooldown after each session
|
||||||
|
|
||||||
# Example: Minecraft (via Prism Launcher)
|
# Example: Minecraft (via snap mc-installer)
|
||||||
[[entries]]
|
[[entries]]
|
||||||
id = "minecraft"
|
id = "minecraft"
|
||||||
label = "Minecraft"
|
label = "Minecraft"
|
||||||
icon = "prismlauncher"
|
icon = "minecraft"
|
||||||
|
|
||||||
[entries.kind]
|
[entries.kind]
|
||||||
type = "process"
|
type = "process"
|
||||||
argv = ["prismlauncher"]
|
argv = ["mc-installer"]
|
||||||
|
|
||||||
[entries.availability]
|
[entries.availability]
|
||||||
always = true # No time restrictions
|
always = true # No time restrictions
|
||||||
|
|
@ -106,26 +106,6 @@ always = true
|
||||||
[entries.limits]
|
[entries.limits]
|
||||||
max_run_seconds = 7200 # 2 hours
|
max_run_seconds = 7200 # 2 hours
|
||||||
|
|
||||||
# Example: Web browser (restricted)
|
|
||||||
[[entries]]
|
|
||||||
id = "browser"
|
|
||||||
label = "Web Browser"
|
|
||||||
icon = "firefox"
|
|
||||||
|
|
||||||
[entries.kind]
|
|
||||||
type = "process"
|
|
||||||
argv = ["firefox", "-P", "kids"]
|
|
||||||
|
|
||||||
[entries.availability]
|
|
||||||
[[entries.availability.windows]]
|
|
||||||
days = ["sat", "sun"]
|
|
||||||
start = "14:00"
|
|
||||||
end = "17:00"
|
|
||||||
|
|
||||||
[entries.limits]
|
|
||||||
max_run_seconds = 1800 # 30 minutes
|
|
||||||
daily_quota_seconds = 3600 # 1 hour per day
|
|
||||||
|
|
||||||
# Example: Disabled entry
|
# Example: Disabled entry
|
||||||
[[entries]]
|
[[entries]]
|
||||||
id = "disabled-game"
|
id = "disabled-game"
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
//! Linux host adapter implementation
|
//! Linux host adapter implementation
|
||||||
|
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use shepherd_api::{EntryKind, EntryKindTag};
|
use shepherd_api::EntryKind;
|
||||||
use shepherd_host_api::{
|
use shepherd_host_api::{
|
||||||
ExitStatus, HostAdapter, HostCapabilities, HostError, HostEvent, HostHandlePayload,
|
HostAdapter, HostCapabilities, HostError, HostEvent, HostHandlePayload,
|
||||||
HostResult, HostSessionHandle, SpawnOptions, StopMode,
|
HostResult, HostSessionHandle, SpawnOptions, StopMode,
|
||||||
};
|
};
|
||||||
use shepherd_util::SessionId;
|
use shepherd_util::SessionId;
|
||||||
|
|
@ -11,33 +11,40 @@ use std::collections::HashMap;
|
||||||
use std::sync::{Arc, Mutex};
|
use std::sync::{Arc, Mutex};
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
use tokio::sync::mpsc;
|
use tokio::sync::mpsc;
|
||||||
use tracing::{debug, error, info, warn};
|
use tracing::{info, warn};
|
||||||
|
|
||||||
use crate::process::{init_cgroup_base, ManagedProcess};
|
use crate::process::{init, kill_by_command, kill_snap_cgroup, ManagedProcess};
|
||||||
|
|
||||||
|
/// Information tracked for each session for cleanup purposes
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
struct SessionInfo {
|
||||||
|
command_name: String,
|
||||||
|
snap_name: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
/// Linux host adapter
|
/// Linux host adapter
|
||||||
pub struct LinuxHost {
|
pub struct LinuxHost {
|
||||||
capabilities: HostCapabilities,
|
capabilities: HostCapabilities,
|
||||||
processes: Arc<Mutex<HashMap<u32, ManagedProcess>>>,
|
processes: Arc<Mutex<HashMap<u32, ManagedProcess>>>,
|
||||||
|
/// Track session info for killing
|
||||||
|
session_info: Arc<Mutex<HashMap<SessionId, SessionInfo>>>,
|
||||||
event_tx: mpsc::UnboundedSender<HostEvent>,
|
event_tx: mpsc::UnboundedSender<HostEvent>,
|
||||||
event_rx: Arc<Mutex<Option<mpsc::UnboundedReceiver<HostEvent>>>>,
|
event_rx: Arc<Mutex<Option<mpsc::UnboundedReceiver<HostEvent>>>>,
|
||||||
/// Whether cgroups are available for process management
|
|
||||||
cgroups_enabled: bool,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl LinuxHost {
|
impl LinuxHost {
|
||||||
pub fn new() -> Self {
|
pub fn new() -> Self {
|
||||||
let (tx, rx) = mpsc::unbounded_channel();
|
let (tx, rx) = mpsc::unbounded_channel();
|
||||||
|
|
||||||
// Try to initialize cgroups
|
// Initialize process management
|
||||||
let cgroups_enabled = init_cgroup_base();
|
init();
|
||||||
|
|
||||||
Self {
|
Self {
|
||||||
capabilities: HostCapabilities::linux_full(),
|
capabilities: HostCapabilities::linux_full(),
|
||||||
processes: Arc::new(Mutex::new(HashMap::new())),
|
processes: Arc::new(Mutex::new(HashMap::new())),
|
||||||
|
session_info: Arc::new(Mutex::new(HashMap::new())),
|
||||||
event_tx: tx,
|
event_tx: tx,
|
||||||
event_rx: Arc::new(Mutex::new(Some(rx))),
|
event_rx: Arc::new(Mutex::new(Some(rx))),
|
||||||
cgroups_enabled,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -132,23 +139,27 @@ impl HostAdapter for LinuxHost {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Use cgroups for process management if available
|
// Get the command name for fallback killing
|
||||||
let session_id_str = if self.cgroups_enabled {
|
let command_name = argv.first().cloned().unwrap_or_default();
|
||||||
Some(session_id.to_string())
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
};
|
|
||||||
|
|
||||||
let proc = ManagedProcess::spawn_with_session_id(
|
let proc = ManagedProcess::spawn(
|
||||||
&argv,
|
&argv,
|
||||||
&env,
|
&env,
|
||||||
cwd.as_ref(),
|
cwd.as_ref(),
|
||||||
options.capture_stdout || options.capture_stderr,
|
options.capture_stdout || options.capture_stderr,
|
||||||
session_id_str.as_deref(),
|
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
let pid = proc.pid;
|
let pid = proc.pid;
|
||||||
let pgid = proc.pgid;
|
let pgid = proc.pgid;
|
||||||
|
let snap_name = proc.snap_name.clone();
|
||||||
|
|
||||||
|
// Store the session info so we can use it for killing even after process exits
|
||||||
|
let session_info_entry = SessionInfo {
|
||||||
|
command_name: command_name.clone(),
|
||||||
|
snap_name: snap_name.clone(),
|
||||||
|
};
|
||||||
|
self.session_info.lock().unwrap().insert(session_id.clone(), session_info_entry);
|
||||||
|
info!(session_id = %session_id, command = %command_name, snap = ?snap_name, "Tracking session info");
|
||||||
|
|
||||||
let handle = HostSessionHandle::new(
|
let handle = HostSessionHandle::new(
|
||||||
session_id,
|
session_id,
|
||||||
|
|
@ -163,26 +174,42 @@ impl HostAdapter for LinuxHost {
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn stop(&self, handle: &HostSessionHandle, mode: StopMode) -> HostResult<()> {
|
async fn stop(&self, handle: &HostSessionHandle, mode: StopMode) -> HostResult<()> {
|
||||||
|
let session_id = handle.session_id.clone();
|
||||||
let (pid, _pgid) = match handle.payload() {
|
let (pid, _pgid) = match handle.payload() {
|
||||||
HostHandlePayload::Linux { pid, pgid } => (*pid, *pgid),
|
HostHandlePayload::Linux { pid, pgid } => (*pid, *pgid),
|
||||||
_ => return Err(HostError::SessionNotFound),
|
_ => return Err(HostError::SessionNotFound),
|
||||||
};
|
};
|
||||||
|
|
||||||
// Check if process exists
|
// Get the session's info for killing
|
||||||
{
|
let session_info = self.session_info.lock().unwrap().get(&session_id).cloned();
|
||||||
let procs = self.processes.lock().unwrap();
|
|
||||||
if !procs.contains_key(&pid) {
|
// Check if we have session info OR a tracked process
|
||||||
return Err(HostError::SessionNotFound);
|
let has_process = self.processes.lock().unwrap().contains_key(&pid);
|
||||||
}
|
|
||||||
|
if session_info.is_none() && !has_process {
|
||||||
|
warn!(session_id = %session_id, pid = pid, "No session info or tracked process found");
|
||||||
|
return Err(HostError::SessionNotFound);
|
||||||
}
|
}
|
||||||
|
|
||||||
match mode {
|
match mode {
|
||||||
StopMode::Graceful { timeout } => {
|
StopMode::Graceful { timeout } => {
|
||||||
// Send SIGTERM
|
// If this is a snap app, use cgroup-based killing (most reliable)
|
||||||
|
if let Some(ref info) = session_info {
|
||||||
|
if let Some(ref snap) = info.snap_name {
|
||||||
|
kill_snap_cgroup(snap, nix::sys::signal::Signal::SIGTERM);
|
||||||
|
info!(snap = %snap, "Sent SIGTERM via snap cgroup");
|
||||||
|
} else {
|
||||||
|
// Fall back to command name for non-snap apps
|
||||||
|
kill_by_command(&info.command_name, nix::sys::signal::Signal::SIGTERM);
|
||||||
|
info!(command = %info.command_name, "Sent SIGTERM via command name");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Also send SIGTERM via process handle
|
||||||
{
|
{
|
||||||
let procs = self.processes.lock().unwrap();
|
let procs = self.processes.lock().unwrap();
|
||||||
if let Some(p) = procs.get(&pid) {
|
if let Some(p) = procs.get(&pid) {
|
||||||
p.terminate()?;
|
let _ = p.terminate();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -190,31 +217,57 @@ impl HostAdapter for LinuxHost {
|
||||||
let start = std::time::Instant::now();
|
let start = std::time::Instant::now();
|
||||||
loop {
|
loop {
|
||||||
if start.elapsed() >= timeout {
|
if start.elapsed() >= timeout {
|
||||||
// Force kill after timeout
|
// Force kill after timeout using snap cgroup or command name
|
||||||
|
if let Some(ref info) = session_info {
|
||||||
|
if let Some(ref snap) = info.snap_name {
|
||||||
|
kill_snap_cgroup(snap, nix::sys::signal::Signal::SIGKILL);
|
||||||
|
info!(snap = %snap, "Sent SIGKILL via snap cgroup (timeout)");
|
||||||
|
} else {
|
||||||
|
kill_by_command(&info.command_name, nix::sys::signal::Signal::SIGKILL);
|
||||||
|
info!(command = %info.command_name, "Sent SIGKILL via command name (timeout)");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Also force kill via process handle
|
||||||
let procs = self.processes.lock().unwrap();
|
let procs = self.processes.lock().unwrap();
|
||||||
if let Some(p) = procs.get(&pid) {
|
if let Some(p) = procs.get(&pid) {
|
||||||
p.kill()?;
|
let _ = p.kill();
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
// Check if process is still running
|
||||||
let procs = self.processes.lock().unwrap();
|
let still_running = self.processes.lock().unwrap().contains_key(&pid);
|
||||||
if !procs.contains_key(&pid) {
|
|
||||||
break;
|
if !still_running {
|
||||||
}
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
tokio::time::sleep(Duration::from_millis(50)).await;
|
tokio::time::sleep(Duration::from_millis(50)).await;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
StopMode::Force => {
|
StopMode::Force => {
|
||||||
|
// Force kill via snap cgroup or command name
|
||||||
|
if let Some(ref info) = session_info {
|
||||||
|
if let Some(ref snap) = info.snap_name {
|
||||||
|
kill_snap_cgroup(snap, nix::sys::signal::Signal::SIGKILL);
|
||||||
|
info!(snap = %snap, "Sent SIGKILL via snap cgroup");
|
||||||
|
} else {
|
||||||
|
kill_by_command(&info.command_name, nix::sys::signal::Signal::SIGKILL);
|
||||||
|
info!(command = %info.command_name, "Sent SIGKILL via command name");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Also force kill via process handle
|
||||||
let procs = self.processes.lock().unwrap();
|
let procs = self.processes.lock().unwrap();
|
||||||
if let Some(p) = procs.get(&pid) {
|
if let Some(p) = procs.get(&pid) {
|
||||||
p.kill()?;
|
let _ = p.kill();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Clean up the session info tracking
|
||||||
|
self.session_info.lock().unwrap().remove(&session_id);
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,146 +1,159 @@
|
||||||
//! Process management utilities
|
//! Process management utilities
|
||||||
|
|
||||||
use nix::sys::signal::{self, Signal};
|
use nix::sys::signal::{self, Signal};
|
||||||
use nix::sys::wait::{waitpid, WaitPidFlag, WaitStatus};
|
|
||||||
use nix::unistd::Pid;
|
use nix::unistd::Pid;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::os::unix::process::CommandExt;
|
use std::os::unix::process::CommandExt;
|
||||||
use std::path::{Path, PathBuf};
|
|
||||||
use std::process::{Child, Command, Stdio};
|
use std::process::{Child, Command, Stdio};
|
||||||
use tracing::{debug, info, warn};
|
use tracing::{debug, info, warn};
|
||||||
|
|
||||||
use shepherd_host_api::{ExitStatus, HostError, HostResult};
|
use shepherd_host_api::{ExitStatus, HostError, HostResult};
|
||||||
|
|
||||||
/// Base path for shepherd's cgroups
|
/// Extract the snap name from a command path
|
||||||
const CGROUP_BASE: &str = "/sys/fs/cgroup/shepherd";
|
/// Examples:
|
||||||
|
/// - "/snap/mc-installer/279/bin/mc-installer" -> Some("mc-installer")
|
||||||
|
/// - "mc-installer" (if it's a snap) -> Some("mc-installer")
|
||||||
|
/// - "/usr/bin/firefox" -> None
|
||||||
|
fn extract_snap_name(program: &str) -> Option<String> {
|
||||||
|
// Check if it's a path starting with /snap/
|
||||||
|
if program.starts_with("/snap/") {
|
||||||
|
// Format: /snap/<snap-name>/<revision>/...
|
||||||
|
let parts: Vec<&str> = program.split('/').collect();
|
||||||
|
if parts.len() >= 3 {
|
||||||
|
return Some(parts[2].to_string());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if it looks like a snap command (no path, and we can verify via snap path)
|
||||||
|
if !program.contains('/') {
|
||||||
|
let snap_path = format!("/snap/bin/{}", program);
|
||||||
|
if std::path::Path::new(&snap_path).exists() {
|
||||||
|
return Some(program.to_string());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
/// Managed child process with process group and optional cgroup
|
/// Managed child process with process group tracking
|
||||||
pub struct ManagedProcess {
|
pub struct ManagedProcess {
|
||||||
pub child: Child,
|
pub child: Child,
|
||||||
pub pid: u32,
|
pub pid: u32,
|
||||||
pub pgid: u32,
|
pub pgid: u32,
|
||||||
/// The cgroup path if cgroups are enabled
|
/// The command name (for fallback killing via pkill)
|
||||||
pub cgroup_path: Option<PathBuf>,
|
pub command_name: String,
|
||||||
|
/// The snap name if this is a snap app (for cgroup-based killing)
|
||||||
|
pub snap_name: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Initialize the shepherd cgroup hierarchy (called once at startup)
|
/// Initialize process management (called once at startup)
|
||||||
pub fn init_cgroup_base() -> bool {
|
pub fn init() {
|
||||||
let base = Path::new(CGROUP_BASE);
|
info!("Process management initialized");
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Kill all processes in a snap's cgroup using systemd
|
||||||
|
/// Snaps create scopes at: snap.<snap-name>.<snap-name>-<uuid>.scope
|
||||||
|
/// Direct signals don't work due to AppArmor confinement, but systemctl --user does
|
||||||
|
/// NOTE: We always use SIGKILL for snap apps because apps like Minecraft Launcher
|
||||||
|
/// have self-restart behavior and will spawn new instances when receiving SIGTERM
|
||||||
|
pub fn kill_snap_cgroup(snap_name: &str, _signal: Signal) -> bool {
|
||||||
|
let uid = nix::unistd::getuid().as_raw();
|
||||||
|
let base_path = format!(
|
||||||
|
"/sys/fs/cgroup/user.slice/user-{}.slice/user@{}.service/app.slice",
|
||||||
|
uid, uid
|
||||||
|
);
|
||||||
|
|
||||||
// Check if cgroups v2 is available
|
// Find all scope directories matching this snap
|
||||||
if !Path::new("/sys/fs/cgroup/cgroup.controllers").exists() {
|
let pattern = format!("snap.{}.{}-", snap_name, snap_name);
|
||||||
info!("cgroups v2 not available, falling back to process group signals");
|
|
||||||
|
let base = std::path::Path::new(&base_path);
|
||||||
|
if !base.exists() {
|
||||||
|
debug!(path = %base_path, "Snap cgroup base path doesn't exist");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Try to create our base cgroup
|
let mut stopped_any = false;
|
||||||
if !base.exists() {
|
|
||||||
if let Err(e) = std::fs::create_dir_all(base) {
|
if let Ok(entries) = std::fs::read_dir(base) {
|
||||||
warn!(error = %e, "Failed to create shepherd cgroup base - running without cgroup support");
|
for entry in entries.flatten() {
|
||||||
return false;
|
let name = entry.file_name();
|
||||||
|
let name_str = name.to_string_lossy();
|
||||||
|
|
||||||
|
if name_str.starts_with(&pattern) && name_str.ends_with(".scope") {
|
||||||
|
let scope_name = name_str.to_string();
|
||||||
|
|
||||||
|
// Always use SIGKILL for snap apps to prevent self-restart behavior
|
||||||
|
// Using systemctl kill --signal=KILL sends SIGKILL to all processes in scope
|
||||||
|
let result = Command::new("systemctl")
|
||||||
|
.args(["--user", "kill", "--signal=KILL", &scope_name])
|
||||||
|
.output();
|
||||||
|
|
||||||
|
match result {
|
||||||
|
Ok(output) => {
|
||||||
|
if output.status.success() {
|
||||||
|
info!(scope = %scope_name, "Killed snap scope via systemctl SIGKILL");
|
||||||
|
stopped_any = true;
|
||||||
|
} else {
|
||||||
|
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||||
|
warn!(scope = %scope_name, stderr = %stderr, "systemctl kill command failed");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
warn!(scope = %scope_name, error = %e, "Failed to run systemctl");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
info!("cgroups v2 initialized at {}", CGROUP_BASE);
|
if stopped_any {
|
||||||
true
|
info!(snap = snap_name, "Killed snap scope(s) via systemctl SIGKILL");
|
||||||
}
|
} else {
|
||||||
|
debug!(snap = snap_name, "No snap scope found to kill");
|
||||||
/// Create a cgroup for a session
|
|
||||||
fn create_session_cgroup(session_id: &str) -> Option<PathBuf> {
|
|
||||||
let cgroup_path = PathBuf::from(CGROUP_BASE).join(session_id);
|
|
||||||
|
|
||||||
if let Err(e) = std::fs::create_dir_all(&cgroup_path) {
|
|
||||||
warn!(error = %e, path = %cgroup_path.display(), "Failed to create session cgroup");
|
|
||||||
return None;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
debug!(path = %cgroup_path.display(), "Created session cgroup");
|
stopped_any
|
||||||
Some(cgroup_path)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Move a process into a cgroup
|
/// Kill processes by command name using pkill
|
||||||
fn move_to_cgroup(cgroup_path: &Path, pid: u32) -> bool {
|
pub fn kill_by_command(command_name: &str, signal: Signal) -> bool {
|
||||||
let procs_file = cgroup_path.join("cgroup.procs");
|
let signal_name = match signal {
|
||||||
|
Signal::SIGTERM => "TERM",
|
||||||
|
Signal::SIGKILL => "KILL",
|
||||||
|
_ => "TERM",
|
||||||
|
};
|
||||||
|
|
||||||
if let Err(e) = std::fs::write(&procs_file, pid.to_string()) {
|
// Use pkill to find and kill processes by command name
|
||||||
warn!(error = %e, pid = pid, path = %procs_file.display(), "Failed to move process to cgroup");
|
let result = Command::new("pkill")
|
||||||
return false;
|
.args([&format!("-{}", signal_name), "-f", command_name])
|
||||||
}
|
.output();
|
||||||
|
|
||||||
debug!(pid = pid, cgroup = %cgroup_path.display(), "Moved process to cgroup");
|
match result {
|
||||||
true
|
Ok(output) => {
|
||||||
}
|
// pkill returns 0 if processes were found and signaled
|
||||||
|
if output.status.success() {
|
||||||
/// Get all PIDs in a cgroup
|
info!(command = command_name, signal = signal_name, "Killed processes by command name");
|
||||||
fn get_cgroup_pids(cgroup_path: &Path) -> Vec<i32> {
|
true
|
||||||
let procs_file = cgroup_path.join("cgroup.procs");
|
} else {
|
||||||
|
// No processes found is not an error
|
||||||
match std::fs::read_to_string(&procs_file) {
|
debug!(command = command_name, "No processes found matching command name");
|
||||||
Ok(contents) => {
|
false
|
||||||
contents
|
}
|
||||||
.lines()
|
|
||||||
.filter_map(|line| line.trim().parse::<i32>().ok())
|
|
||||||
.collect()
|
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
debug!(error = %e, path = %procs_file.display(), "Failed to read cgroup.procs");
|
warn!(command = command_name, error = %e, "Failed to run pkill");
|
||||||
Vec::new()
|
false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Kill all processes in a cgroup
|
|
||||||
fn kill_cgroup(cgroup_path: &Path, signal: Signal) -> Vec<i32> {
|
|
||||||
let pids = get_cgroup_pids(cgroup_path);
|
|
||||||
|
|
||||||
for pid in &pids {
|
|
||||||
let _ = signal::kill(Pid::from_raw(*pid), signal);
|
|
||||||
}
|
|
||||||
|
|
||||||
if !pids.is_empty() {
|
|
||||||
debug!(pids = ?pids, signal = ?signal, cgroup = %cgroup_path.display(), "Sent signal to cgroup processes");
|
|
||||||
}
|
|
||||||
|
|
||||||
pids
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Remove a session cgroup (must be empty)
|
|
||||||
fn cleanup_session_cgroup(cgroup_path: &Path) {
|
|
||||||
// The cgroup must be empty before we can remove it
|
|
||||||
// We'll try a few times in case processes are still exiting
|
|
||||||
for _ in 0..5 {
|
|
||||||
let pids = get_cgroup_pids(cgroup_path);
|
|
||||||
if pids.is_empty() {
|
|
||||||
if let Err(e) = std::fs::remove_dir(cgroup_path) {
|
|
||||||
debug!(error = %e, path = %cgroup_path.display(), "Failed to remove session cgroup");
|
|
||||||
} else {
|
|
||||||
debug!(path = %cgroup_path.display(), "Removed session cgroup");
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
std::thread::sleep(std::time::Duration::from_millis(100));
|
|
||||||
}
|
|
||||||
debug!(path = %cgroup_path.display(), "Cgroup still has processes, leaving cleanup for later");
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ManagedProcess {
|
impl ManagedProcess {
|
||||||
/// Spawn a new process in its own process group and optionally in a cgroup
|
/// Spawn a new process in its own process group
|
||||||
pub fn spawn(
|
pub fn spawn(
|
||||||
argv: &[String],
|
argv: &[String],
|
||||||
env: &HashMap<String, String>,
|
env: &HashMap<String, String>,
|
||||||
cwd: Option<&PathBuf>,
|
cwd: Option<&std::path::PathBuf>,
|
||||||
capture_output: bool,
|
capture_output: bool,
|
||||||
) -> HostResult<Self> {
|
|
||||||
Self::spawn_with_session_id(argv, env, cwd, capture_output, None)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Spawn a new process with an optional session ID for cgroup management
|
|
||||||
pub fn spawn_with_session_id(
|
|
||||||
argv: &[String],
|
|
||||||
env: &HashMap<String, String>,
|
|
||||||
cwd: Option<&PathBuf>,
|
|
||||||
capture_output: bool,
|
|
||||||
session_id: Option<&str>,
|
|
||||||
) -> HostResult<Self> {
|
) -> HostResult<Self> {
|
||||||
if argv.is_empty() {
|
if argv.is_empty() {
|
||||||
return Err(HostError::SpawnFailed("Empty argv".into()));
|
return Err(HostError::SpawnFailed("Empty argv".into()));
|
||||||
|
|
@ -260,12 +273,13 @@ impl ManagedProcess {
|
||||||
|
|
||||||
cmd.stdin(Stdio::null());
|
cmd.stdin(Stdio::null());
|
||||||
|
|
||||||
|
// Store the command name for later use in killing
|
||||||
|
let command_name = program.to_string();
|
||||||
|
|
||||||
// Set up process group - this child becomes its own process group leader
|
// Set up process group - this child becomes its own process group leader
|
||||||
// SAFETY: This is safe in the pre-exec context
|
// SAFETY: This is safe in the pre-exec context
|
||||||
unsafe {
|
unsafe {
|
||||||
cmd.pre_exec(|| {
|
cmd.pre_exec(|| {
|
||||||
// Create new session (which creates new process group)
|
|
||||||
// This ensures the child is the leader of a new process group
|
|
||||||
nix::unistd::setsid().map_err(|e| {
|
nix::unistd::setsid().map_err(|e| {
|
||||||
std::io::Error::new(std::io::ErrorKind::Other, e.to_string())
|
std::io::Error::new(std::io::ErrorKind::Other, e.to_string())
|
||||||
})?;
|
})?;
|
||||||
|
|
@ -279,28 +293,14 @@ impl ManagedProcess {
|
||||||
|
|
||||||
let pid = child.id();
|
let pid = child.id();
|
||||||
let pgid = pid; // After setsid, pid == pgid
|
let pgid = pid; // After setsid, pid == pgid
|
||||||
|
|
||||||
|
// Extract snap name from command if it's a snap app
|
||||||
|
// Format: /snap/<snap-name>/... or just the snap command name
|
||||||
|
let snap_name = extract_snap_name(program);
|
||||||
|
|
||||||
// Try to create a cgroup for this session and move the process into it
|
info!(pid = pid, pgid = pgid, program = %program, snap = ?snap_name, "Process spawned");
|
||||||
let cgroup_path = if let Some(sid) = session_id {
|
|
||||||
if let Some(cg_path) = create_session_cgroup(sid) {
|
|
||||||
if move_to_cgroup(&cg_path, pid) {
|
|
||||||
info!(pid = pid, cgroup = %cg_path.display(), "Process moved to session cgroup");
|
|
||||||
Some(cg_path)
|
|
||||||
} else {
|
|
||||||
// Cleanup the empty cgroup we created
|
|
||||||
let _ = std::fs::remove_dir(&cg_path);
|
|
||||||
None
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
};
|
|
||||||
|
|
||||||
debug!(pid = pid, pgid = pgid, program = %program, has_cgroup = cgroup_path.is_some(), "Process spawned");
|
Ok(Self { child, pid, pgid, command_name, snap_name })
|
||||||
|
|
||||||
Ok(Self { child, pid, pgid, cgroup_path })
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get all descendant PIDs of this process using /proc
|
/// Get all descendant PIDs of this process using /proc
|
||||||
|
|
@ -343,16 +343,12 @@ impl ManagedProcess {
|
||||||
descendants
|
descendants
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Send SIGTERM to all processes in this session (via cgroup if available, or process group)
|
/// Send SIGTERM to all processes in this session
|
||||||
pub fn terminate(&self) -> HostResult<()> {
|
pub fn terminate(&self) -> HostResult<()> {
|
||||||
// If we have a cgroup, use it - this is the most reliable method
|
// First try to kill by command name - this catches snap apps and re-parented processes
|
||||||
if let Some(ref cgroup_path) = self.cgroup_path {
|
kill_by_command(&self.command_name, Signal::SIGTERM);
|
||||||
let pids = kill_cgroup(cgroup_path, Signal::SIGTERM);
|
|
||||||
info!(pids = ?pids, cgroup = %cgroup_path.display(), "Sent SIGTERM via cgroup");
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fallback: try to kill the process group
|
// Also try to kill the process group
|
||||||
let pgid = Pid::from_raw(-(self.pgid as i32)); // Negative for process group
|
let pgid = Pid::from_raw(-(self.pgid as i32)); // Negative for process group
|
||||||
|
|
||||||
match signal::kill(pgid, Signal::SIGTERM) {
|
match signal::kill(pgid, Signal::SIGTERM) {
|
||||||
|
|
@ -379,16 +375,12 @@ impl ManagedProcess {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Send SIGKILL to all processes in this session (via cgroup if available, or process group)
|
/// Send SIGKILL to all processes in this session
|
||||||
pub fn kill(&self) -> HostResult<()> {
|
pub fn kill(&self) -> HostResult<()> {
|
||||||
// If we have a cgroup, use it - this is the most reliable method
|
// First try to kill by command name - this catches snap apps and re-parented processes
|
||||||
if let Some(ref cgroup_path) = self.cgroup_path {
|
kill_by_command(&self.command_name, Signal::SIGKILL);
|
||||||
let pids = kill_cgroup(cgroup_path, Signal::SIGKILL);
|
|
||||||
info!(pids = ?pids, cgroup = %cgroup_path.display(), "Sent SIGKILL via cgroup");
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fallback: try to kill the process group
|
// Also try to kill the process group
|
||||||
let pgid = Pid::from_raw(-(self.pgid as i32));
|
let pgid = Pid::from_raw(-(self.pgid as i32));
|
||||||
|
|
||||||
match signal::kill(pgid, Signal::SIGKILL) {
|
match signal::kill(pgid, Signal::SIGKILL) {
|
||||||
|
|
@ -471,21 +463,15 @@ impl ManagedProcess {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Clean up resources associated with this process (especially cgroups)
|
/// Clean up resources associated with this process
|
||||||
pub fn cleanup(&self) {
|
pub fn cleanup(&self) {
|
||||||
if let Some(ref cgroup_path) = self.cgroup_path {
|
// Nothing to clean up for systemd scopes - systemd handles it
|
||||||
cleanup_session_cgroup(cgroup_path);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Drop for ManagedProcess {
|
impl Drop for ManagedProcess {
|
||||||
fn drop(&mut self) {
|
fn drop(&mut self) {
|
||||||
// Try to clean up the cgroup when the process struct is dropped
|
// Nothing special to do for systemd scopes - systemd cleans up automatically
|
||||||
if let Some(ref cgroup_path) = self.cgroup_path {
|
|
||||||
// Only try once, don't block in Drop
|
|
||||||
let _ = std::fs::remove_dir(cgroup_path);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -569,8 +569,25 @@ impl Daemon {
|
||||||
StopMode::Force => SessionEndReason::AdminStop,
|
StopMode::Force => SessionEndReason::AdminStop,
|
||||||
};
|
};
|
||||||
|
|
||||||
match eng.stop_current(reason, now_mono, now) {
|
match eng.stop_current(reason.clone(), now_mono, now) {
|
||||||
StopDecision::Stopped(_result) => {
|
StopDecision::Stopped(result) => {
|
||||||
|
// Broadcast SessionEnded event so UIs know to transition
|
||||||
|
info!(
|
||||||
|
session_id = %result.session_id,
|
||||||
|
reason = ?result.reason,
|
||||||
|
"Broadcasting SessionEnded from StopCurrent"
|
||||||
|
);
|
||||||
|
ipc.broadcast_event(Event::new(EventPayload::SessionEnded {
|
||||||
|
session_id: result.session_id,
|
||||||
|
entry_id: result.entry_id,
|
||||||
|
reason: result.reason,
|
||||||
|
duration: result.duration,
|
||||||
|
}));
|
||||||
|
|
||||||
|
// Also broadcast StateChanged so UIs can update their entry list
|
||||||
|
let snapshot = eng.get_state();
|
||||||
|
ipc.broadcast_event(Event::new(EventPayload::StateChanged(snapshot)));
|
||||||
|
|
||||||
drop(eng); // Release lock before host operations
|
drop(eng); // Release lock before host operations
|
||||||
|
|
||||||
// Stop the actual process
|
// Stop the actual process
|
||||||
|
|
|
||||||
16
run-dev
16
run-dev
|
|
@ -9,6 +9,16 @@ SOCKET_PATH="$DEV_RUNTIME/shepherd.sock"
|
||||||
|
|
||||||
mkdir -p "$DATA_DIR"
|
mkdir -p "$DATA_DIR"
|
||||||
|
|
||||||
|
# Kill any existing shepherd dev instances before starting
|
||||||
|
echo "Cleaning up any existing dev instances..."
|
||||||
|
pkill -f "sway -c ./sway.conf" 2>/dev/null || true
|
||||||
|
pkill -f "shepherdd" 2>/dev/null || true
|
||||||
|
pkill -f "shepherd-launcher" 2>/dev/null || true
|
||||||
|
pkill -f "shepherd-hud" 2>/dev/null || true
|
||||||
|
# Remove stale socket
|
||||||
|
rm -f "$SOCKET_PATH"
|
||||||
|
sleep 0.5
|
||||||
|
|
||||||
# Export environment variables for shepherd binaries
|
# Export environment variables for shepherd binaries
|
||||||
export SHEPHERD_SOCKET="$SOCKET_PATH"
|
export SHEPHERD_SOCKET="$SOCKET_PATH"
|
||||||
export SHEPHERD_DATA_DIR="$DATA_DIR"
|
export SHEPHERD_DATA_DIR="$DATA_DIR"
|
||||||
|
|
@ -27,6 +37,12 @@ cleanup() {
|
||||||
if [ ! -z "$SWAY_PID" ]; then
|
if [ ! -z "$SWAY_PID" ]; then
|
||||||
kill $SWAY_PID 2>/dev/null || true
|
kill $SWAY_PID 2>/dev/null || true
|
||||||
fi
|
fi
|
||||||
|
# Also explicitly kill any shepherd processes that might have escaped
|
||||||
|
pkill -f "shepherdd" 2>/dev/null || true
|
||||||
|
pkill -f "shepherd-launcher" 2>/dev/null || true
|
||||||
|
pkill -f "shepherd-hud" 2>/dev/null || true
|
||||||
|
# Remove socket
|
||||||
|
rm -f "$SOCKET_PATH"
|
||||||
}
|
}
|
||||||
trap cleanup EXIT
|
trap cleanup EXIT
|
||||||
|
|
||||||
|
|
|
||||||
34
setup-cgroups.sh
Executable file
34
setup-cgroups.sh
Executable file
|
|
@ -0,0 +1,34 @@
|
||||||
|
#!/bin/bash
|
||||||
|
# Setup cgroups v2 for shepherd-launcher
|
||||||
|
# This script must be run as root (or with sudo)
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
CGROUP_BASE="/sys/fs/cgroup/shepherd"
|
||||||
|
|
||||||
|
# Check if cgroups v2 is available
|
||||||
|
if [ ! -f /sys/fs/cgroup/cgroup.controllers ]; then
|
||||||
|
echo "Error: cgroups v2 is not available on this system"
|
||||||
|
echo "Make sure your kernel supports cgroups v2 and it's mounted"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Get the user who will run shepherd (default to SUDO_USER or current user)
|
||||||
|
SHEPHERD_USER="${1:-${SUDO_USER:-$(whoami)}}"
|
||||||
|
|
||||||
|
echo "Setting up cgroups for shepherd-launcher..."
|
||||||
|
echo "User: $SHEPHERD_USER"
|
||||||
|
|
||||||
|
# Create the shepherd cgroup directory
|
||||||
|
mkdir -p "$CGROUP_BASE"
|
||||||
|
|
||||||
|
# Set ownership so the shepherd daemon can create session cgroups
|
||||||
|
chown "$SHEPHERD_USER:$SHEPHERD_USER" "$CGROUP_BASE"
|
||||||
|
|
||||||
|
# Set permissions (owner can read/write/execute, others can read/execute)
|
||||||
|
chmod 755 "$CGROUP_BASE"
|
||||||
|
|
||||||
|
echo "Created $CGROUP_BASE with ownership $SHEPHERD_USER"
|
||||||
|
echo ""
|
||||||
|
echo "cgroups v2 setup complete!"
|
||||||
|
echo "The shepherd daemon can now create session cgroups for reliable process management."
|
||||||
Loading…
Reference in a new issue