WIP: fix running Minecraft
This commit is contained in:
parent
77a167e45f
commit
b179232a21
7 changed files with 316 additions and 209 deletions
27
README.md
27
README.md
|
|
@ -34,9 +34,30 @@ tl;dr:
|
|||
2. System dependencies:
|
||||
- **Ubuntu/Debian**: `apt install build-essential pkg-config libglib2.0-dev libgtk-4-dev libcairo2-dev libpango1.0-dev libgdk-pixbuf-xlib-2.0-dev libwayland-dev libx11-dev libxkbcommon-dev libgirepository1.0-dev libgtk4-layer-shell-dev librust-gtk4-layer-shell-sys-dev sway swayidle`
|
||||
3. Rust (`curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh`)
|
||||
4. binaries (TODO: deployable package that depends on Sway and installs the config)
|
||||
5. test session on login
|
||||
6. configure auto-login to this session
|
||||
4. Set up cgroups for process management (one-time, requires root):
|
||||
```bash
|
||||
sudo ./setup-cgroups.sh
|
||||
```
|
||||
5. binaries (TODO: deployable package that depends on Sway and installs the config)
|
||||
6. test session on login
|
||||
7. configure auto-login to this session
|
||||
|
||||
### cgroups Setup
|
||||
|
||||
The shepherd daemon uses Linux cgroups v2 to reliably terminate all processes
|
||||
when a session ends. This is essential for applications like Minecraft that
|
||||
spawn child processes which may escape traditional process group signals.
|
||||
|
||||
Run the setup script once after installation:
|
||||
|
||||
```bash
|
||||
sudo ./setup-cgroups.sh
|
||||
```
|
||||
|
||||
This creates `/sys/fs/cgroup/shepherd` with appropriate permissions for your
|
||||
user. The directory is not persistent across reboots on most systems, so you
|
||||
may want to add this to your system startup (e.g., in `/etc/rc.local` or a
|
||||
systemd unit).
|
||||
|
||||
## Usage
|
||||
|
||||
|
|
|
|||
|
|
@ -58,15 +58,15 @@ max_run_seconds = 3600 # 1 hour max
|
|||
daily_quota_seconds = 7200 # 2 hours per day
|
||||
cooldown_seconds = 300 # 5 minute cooldown after each session
|
||||
|
||||
# Example: Minecraft (via Prism Launcher)
|
||||
# Example: Minecraft (via snap mc-installer)
|
||||
[[entries]]
|
||||
id = "minecraft"
|
||||
label = "Minecraft"
|
||||
icon = "prismlauncher"
|
||||
icon = "minecraft"
|
||||
|
||||
[entries.kind]
|
||||
type = "process"
|
||||
argv = ["prismlauncher"]
|
||||
argv = ["mc-installer"]
|
||||
|
||||
[entries.availability]
|
||||
always = true # No time restrictions
|
||||
|
|
@ -106,26 +106,6 @@ always = true
|
|||
[entries.limits]
|
||||
max_run_seconds = 7200 # 2 hours
|
||||
|
||||
# Example: Web browser (restricted)
|
||||
[[entries]]
|
||||
id = "browser"
|
||||
label = "Web Browser"
|
||||
icon = "firefox"
|
||||
|
||||
[entries.kind]
|
||||
type = "process"
|
||||
argv = ["firefox", "-P", "kids"]
|
||||
|
||||
[entries.availability]
|
||||
[[entries.availability.windows]]
|
||||
days = ["sat", "sun"]
|
||||
start = "14:00"
|
||||
end = "17:00"
|
||||
|
||||
[entries.limits]
|
||||
max_run_seconds = 1800 # 30 minutes
|
||||
daily_quota_seconds = 3600 # 1 hour per day
|
||||
|
||||
# Example: Disabled entry
|
||||
[[entries]]
|
||||
id = "disabled-game"
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
//! Linux host adapter implementation
|
||||
|
||||
use async_trait::async_trait;
|
||||
use shepherd_api::{EntryKind, EntryKindTag};
|
||||
use shepherd_api::EntryKind;
|
||||
use shepherd_host_api::{
|
||||
ExitStatus, HostAdapter, HostCapabilities, HostError, HostEvent, HostHandlePayload,
|
||||
HostAdapter, HostCapabilities, HostError, HostEvent, HostHandlePayload,
|
||||
HostResult, HostSessionHandle, SpawnOptions, StopMode,
|
||||
};
|
||||
use shepherd_util::SessionId;
|
||||
|
|
@ -11,33 +11,40 @@ use std::collections::HashMap;
|
|||
use std::sync::{Arc, Mutex};
|
||||
use std::time::Duration;
|
||||
use tokio::sync::mpsc;
|
||||
use tracing::{debug, error, info, warn};
|
||||
use tracing::{info, warn};
|
||||
|
||||
use crate::process::{init_cgroup_base, ManagedProcess};
|
||||
use crate::process::{init, kill_by_command, kill_snap_cgroup, ManagedProcess};
|
||||
|
||||
/// Information tracked for each session for cleanup purposes
|
||||
#[derive(Clone, Debug)]
|
||||
struct SessionInfo {
|
||||
command_name: String,
|
||||
snap_name: Option<String>,
|
||||
}
|
||||
|
||||
/// Linux host adapter
|
||||
pub struct LinuxHost {
|
||||
capabilities: HostCapabilities,
|
||||
processes: Arc<Mutex<HashMap<u32, ManagedProcess>>>,
|
||||
/// Track session info for killing
|
||||
session_info: Arc<Mutex<HashMap<SessionId, SessionInfo>>>,
|
||||
event_tx: mpsc::UnboundedSender<HostEvent>,
|
||||
event_rx: Arc<Mutex<Option<mpsc::UnboundedReceiver<HostEvent>>>>,
|
||||
/// Whether cgroups are available for process management
|
||||
cgroups_enabled: bool,
|
||||
}
|
||||
|
||||
impl LinuxHost {
|
||||
pub fn new() -> Self {
|
||||
let (tx, rx) = mpsc::unbounded_channel();
|
||||
|
||||
// Try to initialize cgroups
|
||||
let cgroups_enabled = init_cgroup_base();
|
||||
// Initialize process management
|
||||
init();
|
||||
|
||||
Self {
|
||||
capabilities: HostCapabilities::linux_full(),
|
||||
processes: Arc::new(Mutex::new(HashMap::new())),
|
||||
session_info: Arc::new(Mutex::new(HashMap::new())),
|
||||
event_tx: tx,
|
||||
event_rx: Arc::new(Mutex::new(Some(rx))),
|
||||
cgroups_enabled,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -132,23 +139,27 @@ impl HostAdapter for LinuxHost {
|
|||
}
|
||||
};
|
||||
|
||||
// Use cgroups for process management if available
|
||||
let session_id_str = if self.cgroups_enabled {
|
||||
Some(session_id.to_string())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
// Get the command name for fallback killing
|
||||
let command_name = argv.first().cloned().unwrap_or_default();
|
||||
|
||||
let proc = ManagedProcess::spawn_with_session_id(
|
||||
let proc = ManagedProcess::spawn(
|
||||
&argv,
|
||||
&env,
|
||||
cwd.as_ref(),
|
||||
options.capture_stdout || options.capture_stderr,
|
||||
session_id_str.as_deref(),
|
||||
)?;
|
||||
|
||||
let pid = proc.pid;
|
||||
let pgid = proc.pgid;
|
||||
let snap_name = proc.snap_name.clone();
|
||||
|
||||
// Store the session info so we can use it for killing even after process exits
|
||||
let session_info_entry = SessionInfo {
|
||||
command_name: command_name.clone(),
|
||||
snap_name: snap_name.clone(),
|
||||
};
|
||||
self.session_info.lock().unwrap().insert(session_id.clone(), session_info_entry);
|
||||
info!(session_id = %session_id, command = %command_name, snap = ?snap_name, "Tracking session info");
|
||||
|
||||
let handle = HostSessionHandle::new(
|
||||
session_id,
|
||||
|
|
@ -163,26 +174,42 @@ impl HostAdapter for LinuxHost {
|
|||
}
|
||||
|
||||
async fn stop(&self, handle: &HostSessionHandle, mode: StopMode) -> HostResult<()> {
|
||||
let session_id = handle.session_id.clone();
|
||||
let (pid, _pgid) = match handle.payload() {
|
||||
HostHandlePayload::Linux { pid, pgid } => (*pid, *pgid),
|
||||
_ => return Err(HostError::SessionNotFound),
|
||||
};
|
||||
|
||||
// Check if process exists
|
||||
{
|
||||
let procs = self.processes.lock().unwrap();
|
||||
if !procs.contains_key(&pid) {
|
||||
return Err(HostError::SessionNotFound);
|
||||
}
|
||||
// Get the session's info for killing
|
||||
let session_info = self.session_info.lock().unwrap().get(&session_id).cloned();
|
||||
|
||||
// Check if we have session info OR a tracked process
|
||||
let has_process = self.processes.lock().unwrap().contains_key(&pid);
|
||||
|
||||
if session_info.is_none() && !has_process {
|
||||
warn!(session_id = %session_id, pid = pid, "No session info or tracked process found");
|
||||
return Err(HostError::SessionNotFound);
|
||||
}
|
||||
|
||||
match mode {
|
||||
StopMode::Graceful { timeout } => {
|
||||
// Send SIGTERM
|
||||
// If this is a snap app, use cgroup-based killing (most reliable)
|
||||
if let Some(ref info) = session_info {
|
||||
if let Some(ref snap) = info.snap_name {
|
||||
kill_snap_cgroup(snap, nix::sys::signal::Signal::SIGTERM);
|
||||
info!(snap = %snap, "Sent SIGTERM via snap cgroup");
|
||||
} else {
|
||||
// Fall back to command name for non-snap apps
|
||||
kill_by_command(&info.command_name, nix::sys::signal::Signal::SIGTERM);
|
||||
info!(command = %info.command_name, "Sent SIGTERM via command name");
|
||||
}
|
||||
}
|
||||
|
||||
// Also send SIGTERM via process handle
|
||||
{
|
||||
let procs = self.processes.lock().unwrap();
|
||||
if let Some(p) = procs.get(&pid) {
|
||||
p.terminate()?;
|
||||
let _ = p.terminate();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -190,32 +217,58 @@ impl HostAdapter for LinuxHost {
|
|||
let start = std::time::Instant::now();
|
||||
loop {
|
||||
if start.elapsed() >= timeout {
|
||||
// Force kill after timeout
|
||||
// Force kill after timeout using snap cgroup or command name
|
||||
if let Some(ref info) = session_info {
|
||||
if let Some(ref snap) = info.snap_name {
|
||||
kill_snap_cgroup(snap, nix::sys::signal::Signal::SIGKILL);
|
||||
info!(snap = %snap, "Sent SIGKILL via snap cgroup (timeout)");
|
||||
} else {
|
||||
kill_by_command(&info.command_name, nix::sys::signal::Signal::SIGKILL);
|
||||
info!(command = %info.command_name, "Sent SIGKILL via command name (timeout)");
|
||||
}
|
||||
}
|
||||
|
||||
// Also force kill via process handle
|
||||
let procs = self.processes.lock().unwrap();
|
||||
if let Some(p) = procs.get(&pid) {
|
||||
p.kill()?;
|
||||
let _ = p.kill();
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
{
|
||||
let procs = self.processes.lock().unwrap();
|
||||
if !procs.contains_key(&pid) {
|
||||
break;
|
||||
}
|
||||
// Check if process is still running
|
||||
let still_running = self.processes.lock().unwrap().contains_key(&pid);
|
||||
|
||||
if !still_running {
|
||||
break;
|
||||
}
|
||||
|
||||
tokio::time::sleep(Duration::from_millis(50)).await;
|
||||
}
|
||||
}
|
||||
StopMode::Force => {
|
||||
// Force kill via snap cgroup or command name
|
||||
if let Some(ref info) = session_info {
|
||||
if let Some(ref snap) = info.snap_name {
|
||||
kill_snap_cgroup(snap, nix::sys::signal::Signal::SIGKILL);
|
||||
info!(snap = %snap, "Sent SIGKILL via snap cgroup");
|
||||
} else {
|
||||
kill_by_command(&info.command_name, nix::sys::signal::Signal::SIGKILL);
|
||||
info!(command = %info.command_name, "Sent SIGKILL via command name");
|
||||
}
|
||||
}
|
||||
|
||||
// Also force kill via process handle
|
||||
let procs = self.processes.lock().unwrap();
|
||||
if let Some(p) = procs.get(&pid) {
|
||||
p.kill()?;
|
||||
let _ = p.kill();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Clean up the session info tracking
|
||||
self.session_info.lock().unwrap().remove(&session_id);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,146 +1,159 @@
|
|||
//! Process management utilities
|
||||
|
||||
use nix::sys::signal::{self, Signal};
|
||||
use nix::sys::wait::{waitpid, WaitPidFlag, WaitStatus};
|
||||
use nix::unistd::Pid;
|
||||
use std::collections::HashMap;
|
||||
use std::os::unix::process::CommandExt;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::{Child, Command, Stdio};
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
use shepherd_host_api::{ExitStatus, HostError, HostResult};
|
||||
|
||||
/// Base path for shepherd's cgroups
|
||||
const CGROUP_BASE: &str = "/sys/fs/cgroup/shepherd";
|
||||
/// Extract the snap name from a command path
|
||||
/// Examples:
|
||||
/// - "/snap/mc-installer/279/bin/mc-installer" -> Some("mc-installer")
|
||||
/// - "mc-installer" (if it's a snap) -> Some("mc-installer")
|
||||
/// - "/usr/bin/firefox" -> None
|
||||
fn extract_snap_name(program: &str) -> Option<String> {
|
||||
// Check if it's a path starting with /snap/
|
||||
if program.starts_with("/snap/") {
|
||||
// Format: /snap/<snap-name>/<revision>/...
|
||||
let parts: Vec<&str> = program.split('/').collect();
|
||||
if parts.len() >= 3 {
|
||||
return Some(parts[2].to_string());
|
||||
}
|
||||
}
|
||||
|
||||
/// Managed child process with process group and optional cgroup
|
||||
// Check if it looks like a snap command (no path, and we can verify via snap path)
|
||||
if !program.contains('/') {
|
||||
let snap_path = format!("/snap/bin/{}", program);
|
||||
if std::path::Path::new(&snap_path).exists() {
|
||||
return Some(program.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Managed child process with process group tracking
|
||||
pub struct ManagedProcess {
|
||||
pub child: Child,
|
||||
pub pid: u32,
|
||||
pub pgid: u32,
|
||||
/// The cgroup path if cgroups are enabled
|
||||
pub cgroup_path: Option<PathBuf>,
|
||||
/// The command name (for fallback killing via pkill)
|
||||
pub command_name: String,
|
||||
/// The snap name if this is a snap app (for cgroup-based killing)
|
||||
pub snap_name: Option<String>,
|
||||
}
|
||||
|
||||
/// Initialize the shepherd cgroup hierarchy (called once at startup)
|
||||
pub fn init_cgroup_base() -> bool {
|
||||
let base = Path::new(CGROUP_BASE);
|
||||
/// Initialize process management (called once at startup)
|
||||
pub fn init() {
|
||||
info!("Process management initialized");
|
||||
}
|
||||
|
||||
// Check if cgroups v2 is available
|
||||
if !Path::new("/sys/fs/cgroup/cgroup.controllers").exists() {
|
||||
info!("cgroups v2 not available, falling back to process group signals");
|
||||
/// Kill all processes in a snap's cgroup using systemd
|
||||
/// Snaps create scopes at: snap.<snap-name>.<snap-name>-<uuid>.scope
|
||||
/// Direct signals don't work due to AppArmor confinement, but systemctl --user does
|
||||
/// NOTE: We always use SIGKILL for snap apps because apps like Minecraft Launcher
|
||||
/// have self-restart behavior and will spawn new instances when receiving SIGTERM
|
||||
pub fn kill_snap_cgroup(snap_name: &str, _signal: Signal) -> bool {
|
||||
let uid = nix::unistd::getuid().as_raw();
|
||||
let base_path = format!(
|
||||
"/sys/fs/cgroup/user.slice/user-{}.slice/user@{}.service/app.slice",
|
||||
uid, uid
|
||||
);
|
||||
|
||||
// Find all scope directories matching this snap
|
||||
let pattern = format!("snap.{}.{}-", snap_name, snap_name);
|
||||
|
||||
let base = std::path::Path::new(&base_path);
|
||||
if !base.exists() {
|
||||
debug!(path = %base_path, "Snap cgroup base path doesn't exist");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Try to create our base cgroup
|
||||
if !base.exists() {
|
||||
if let Err(e) = std::fs::create_dir_all(base) {
|
||||
warn!(error = %e, "Failed to create shepherd cgroup base - running without cgroup support");
|
||||
return false;
|
||||
let mut stopped_any = false;
|
||||
|
||||
if let Ok(entries) = std::fs::read_dir(base) {
|
||||
for entry in entries.flatten() {
|
||||
let name = entry.file_name();
|
||||
let name_str = name.to_string_lossy();
|
||||
|
||||
if name_str.starts_with(&pattern) && name_str.ends_with(".scope") {
|
||||
let scope_name = name_str.to_string();
|
||||
|
||||
// Always use SIGKILL for snap apps to prevent self-restart behavior
|
||||
// Using systemctl kill --signal=KILL sends SIGKILL to all processes in scope
|
||||
let result = Command::new("systemctl")
|
||||
.args(["--user", "kill", "--signal=KILL", &scope_name])
|
||||
.output();
|
||||
|
||||
match result {
|
||||
Ok(output) => {
|
||||
if output.status.success() {
|
||||
info!(scope = %scope_name, "Killed snap scope via systemctl SIGKILL");
|
||||
stopped_any = true;
|
||||
} else {
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
warn!(scope = %scope_name, stderr = %stderr, "systemctl kill command failed");
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(scope = %scope_name, error = %e, "Failed to run systemctl");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
info!("cgroups v2 initialized at {}", CGROUP_BASE);
|
||||
true
|
||||
}
|
||||
|
||||
/// Create a cgroup for a session
|
||||
fn create_session_cgroup(session_id: &str) -> Option<PathBuf> {
|
||||
let cgroup_path = PathBuf::from(CGROUP_BASE).join(session_id);
|
||||
|
||||
if let Err(e) = std::fs::create_dir_all(&cgroup_path) {
|
||||
warn!(error = %e, path = %cgroup_path.display(), "Failed to create session cgroup");
|
||||
return None;
|
||||
if stopped_any {
|
||||
info!(snap = snap_name, "Killed snap scope(s) via systemctl SIGKILL");
|
||||
} else {
|
||||
debug!(snap = snap_name, "No snap scope found to kill");
|
||||
}
|
||||
|
||||
debug!(path = %cgroup_path.display(), "Created session cgroup");
|
||||
Some(cgroup_path)
|
||||
stopped_any
|
||||
}
|
||||
|
||||
/// Move a process into a cgroup
|
||||
fn move_to_cgroup(cgroup_path: &Path, pid: u32) -> bool {
|
||||
let procs_file = cgroup_path.join("cgroup.procs");
|
||||
/// Kill processes by command name using pkill
|
||||
pub fn kill_by_command(command_name: &str, signal: Signal) -> bool {
|
||||
let signal_name = match signal {
|
||||
Signal::SIGTERM => "TERM",
|
||||
Signal::SIGKILL => "KILL",
|
||||
_ => "TERM",
|
||||
};
|
||||
|
||||
if let Err(e) = std::fs::write(&procs_file, pid.to_string()) {
|
||||
warn!(error = %e, pid = pid, path = %procs_file.display(), "Failed to move process to cgroup");
|
||||
return false;
|
||||
}
|
||||
// Use pkill to find and kill processes by command name
|
||||
let result = Command::new("pkill")
|
||||
.args([&format!("-{}", signal_name), "-f", command_name])
|
||||
.output();
|
||||
|
||||
debug!(pid = pid, cgroup = %cgroup_path.display(), "Moved process to cgroup");
|
||||
true
|
||||
}
|
||||
|
||||
/// Get all PIDs in a cgroup
|
||||
fn get_cgroup_pids(cgroup_path: &Path) -> Vec<i32> {
|
||||
let procs_file = cgroup_path.join("cgroup.procs");
|
||||
|
||||
match std::fs::read_to_string(&procs_file) {
|
||||
Ok(contents) => {
|
||||
contents
|
||||
.lines()
|
||||
.filter_map(|line| line.trim().parse::<i32>().ok())
|
||||
.collect()
|
||||
match result {
|
||||
Ok(output) => {
|
||||
// pkill returns 0 if processes were found and signaled
|
||||
if output.status.success() {
|
||||
info!(command = command_name, signal = signal_name, "Killed processes by command name");
|
||||
true
|
||||
} else {
|
||||
// No processes found is not an error
|
||||
debug!(command = command_name, "No processes found matching command name");
|
||||
false
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
debug!(error = %e, path = %procs_file.display(), "Failed to read cgroup.procs");
|
||||
Vec::new()
|
||||
warn!(command = command_name, error = %e, "Failed to run pkill");
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Kill all processes in a cgroup
|
||||
fn kill_cgroup(cgroup_path: &Path, signal: Signal) -> Vec<i32> {
|
||||
let pids = get_cgroup_pids(cgroup_path);
|
||||
|
||||
for pid in &pids {
|
||||
let _ = signal::kill(Pid::from_raw(*pid), signal);
|
||||
}
|
||||
|
||||
if !pids.is_empty() {
|
||||
debug!(pids = ?pids, signal = ?signal, cgroup = %cgroup_path.display(), "Sent signal to cgroup processes");
|
||||
}
|
||||
|
||||
pids
|
||||
}
|
||||
|
||||
/// Remove a session cgroup (must be empty)
|
||||
fn cleanup_session_cgroup(cgroup_path: &Path) {
|
||||
// The cgroup must be empty before we can remove it
|
||||
// We'll try a few times in case processes are still exiting
|
||||
for _ in 0..5 {
|
||||
let pids = get_cgroup_pids(cgroup_path);
|
||||
if pids.is_empty() {
|
||||
if let Err(e) = std::fs::remove_dir(cgroup_path) {
|
||||
debug!(error = %e, path = %cgroup_path.display(), "Failed to remove session cgroup");
|
||||
} else {
|
||||
debug!(path = %cgroup_path.display(), "Removed session cgroup");
|
||||
}
|
||||
return;
|
||||
}
|
||||
std::thread::sleep(std::time::Duration::from_millis(100));
|
||||
}
|
||||
debug!(path = %cgroup_path.display(), "Cgroup still has processes, leaving cleanup for later");
|
||||
}
|
||||
|
||||
impl ManagedProcess {
|
||||
/// Spawn a new process in its own process group and optionally in a cgroup
|
||||
/// Spawn a new process in its own process group
|
||||
pub fn spawn(
|
||||
argv: &[String],
|
||||
env: &HashMap<String, String>,
|
||||
cwd: Option<&PathBuf>,
|
||||
cwd: Option<&std::path::PathBuf>,
|
||||
capture_output: bool,
|
||||
) -> HostResult<Self> {
|
||||
Self::spawn_with_session_id(argv, env, cwd, capture_output, None)
|
||||
}
|
||||
|
||||
/// Spawn a new process with an optional session ID for cgroup management
|
||||
pub fn spawn_with_session_id(
|
||||
argv: &[String],
|
||||
env: &HashMap<String, String>,
|
||||
cwd: Option<&PathBuf>,
|
||||
capture_output: bool,
|
||||
session_id: Option<&str>,
|
||||
) -> HostResult<Self> {
|
||||
if argv.is_empty() {
|
||||
return Err(HostError::SpawnFailed("Empty argv".into()));
|
||||
|
|
@ -260,12 +273,13 @@ impl ManagedProcess {
|
|||
|
||||
cmd.stdin(Stdio::null());
|
||||
|
||||
// Store the command name for later use in killing
|
||||
let command_name = program.to_string();
|
||||
|
||||
// Set up process group - this child becomes its own process group leader
|
||||
// SAFETY: This is safe in the pre-exec context
|
||||
unsafe {
|
||||
cmd.pre_exec(|| {
|
||||
// Create new session (which creates new process group)
|
||||
// This ensures the child is the leader of a new process group
|
||||
nix::unistd::setsid().map_err(|e| {
|
||||
std::io::Error::new(std::io::ErrorKind::Other, e.to_string())
|
||||
})?;
|
||||
|
|
@ -280,27 +294,13 @@ impl ManagedProcess {
|
|||
let pid = child.id();
|
||||
let pgid = pid; // After setsid, pid == pgid
|
||||
|
||||
// Try to create a cgroup for this session and move the process into it
|
||||
let cgroup_path = if let Some(sid) = session_id {
|
||||
if let Some(cg_path) = create_session_cgroup(sid) {
|
||||
if move_to_cgroup(&cg_path, pid) {
|
||||
info!(pid = pid, cgroup = %cg_path.display(), "Process moved to session cgroup");
|
||||
Some(cg_path)
|
||||
} else {
|
||||
// Cleanup the empty cgroup we created
|
||||
let _ = std::fs::remove_dir(&cg_path);
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
// Extract snap name from command if it's a snap app
|
||||
// Format: /snap/<snap-name>/... or just the snap command name
|
||||
let snap_name = extract_snap_name(program);
|
||||
|
||||
debug!(pid = pid, pgid = pgid, program = %program, has_cgroup = cgroup_path.is_some(), "Process spawned");
|
||||
info!(pid = pid, pgid = pgid, program = %program, snap = ?snap_name, "Process spawned");
|
||||
|
||||
Ok(Self { child, pid, pgid, cgroup_path })
|
||||
Ok(Self { child, pid, pgid, command_name, snap_name })
|
||||
}
|
||||
|
||||
/// Get all descendant PIDs of this process using /proc
|
||||
|
|
@ -343,16 +343,12 @@ impl ManagedProcess {
|
|||
descendants
|
||||
}
|
||||
|
||||
/// Send SIGTERM to all processes in this session (via cgroup if available, or process group)
|
||||
/// Send SIGTERM to all processes in this session
|
||||
pub fn terminate(&self) -> HostResult<()> {
|
||||
// If we have a cgroup, use it - this is the most reliable method
|
||||
if let Some(ref cgroup_path) = self.cgroup_path {
|
||||
let pids = kill_cgroup(cgroup_path, Signal::SIGTERM);
|
||||
info!(pids = ?pids, cgroup = %cgroup_path.display(), "Sent SIGTERM via cgroup");
|
||||
return Ok(());
|
||||
}
|
||||
// First try to kill by command name - this catches snap apps and re-parented processes
|
||||
kill_by_command(&self.command_name, Signal::SIGTERM);
|
||||
|
||||
// Fallback: try to kill the process group
|
||||
// Also try to kill the process group
|
||||
let pgid = Pid::from_raw(-(self.pgid as i32)); // Negative for process group
|
||||
|
||||
match signal::kill(pgid, Signal::SIGTERM) {
|
||||
|
|
@ -379,16 +375,12 @@ impl ManagedProcess {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// Send SIGKILL to all processes in this session (via cgroup if available, or process group)
|
||||
/// Send SIGKILL to all processes in this session
|
||||
pub fn kill(&self) -> HostResult<()> {
|
||||
// If we have a cgroup, use it - this is the most reliable method
|
||||
if let Some(ref cgroup_path) = self.cgroup_path {
|
||||
let pids = kill_cgroup(cgroup_path, Signal::SIGKILL);
|
||||
info!(pids = ?pids, cgroup = %cgroup_path.display(), "Sent SIGKILL via cgroup");
|
||||
return Ok(());
|
||||
}
|
||||
// First try to kill by command name - this catches snap apps and re-parented processes
|
||||
kill_by_command(&self.command_name, Signal::SIGKILL);
|
||||
|
||||
// Fallback: try to kill the process group
|
||||
// Also try to kill the process group
|
||||
let pgid = Pid::from_raw(-(self.pgid as i32));
|
||||
|
||||
match signal::kill(pgid, Signal::SIGKILL) {
|
||||
|
|
@ -471,21 +463,15 @@ impl ManagedProcess {
|
|||
}
|
||||
}
|
||||
|
||||
/// Clean up resources associated with this process (especially cgroups)
|
||||
/// Clean up resources associated with this process
|
||||
pub fn cleanup(&self) {
|
||||
if let Some(ref cgroup_path) = self.cgroup_path {
|
||||
cleanup_session_cgroup(cgroup_path);
|
||||
}
|
||||
// Nothing to clean up for systemd scopes - systemd handles it
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for ManagedProcess {
|
||||
fn drop(&mut self) {
|
||||
// Try to clean up the cgroup when the process struct is dropped
|
||||
if let Some(ref cgroup_path) = self.cgroup_path {
|
||||
// Only try once, don't block in Drop
|
||||
let _ = std::fs::remove_dir(cgroup_path);
|
||||
}
|
||||
// Nothing special to do for systemd scopes - systemd cleans up automatically
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -569,8 +569,25 @@ impl Daemon {
|
|||
StopMode::Force => SessionEndReason::AdminStop,
|
||||
};
|
||||
|
||||
match eng.stop_current(reason, now_mono, now) {
|
||||
StopDecision::Stopped(_result) => {
|
||||
match eng.stop_current(reason.clone(), now_mono, now) {
|
||||
StopDecision::Stopped(result) => {
|
||||
// Broadcast SessionEnded event so UIs know to transition
|
||||
info!(
|
||||
session_id = %result.session_id,
|
||||
reason = ?result.reason,
|
||||
"Broadcasting SessionEnded from StopCurrent"
|
||||
);
|
||||
ipc.broadcast_event(Event::new(EventPayload::SessionEnded {
|
||||
session_id: result.session_id,
|
||||
entry_id: result.entry_id,
|
||||
reason: result.reason,
|
||||
duration: result.duration,
|
||||
}));
|
||||
|
||||
// Also broadcast StateChanged so UIs can update their entry list
|
||||
let snapshot = eng.get_state();
|
||||
ipc.broadcast_event(Event::new(EventPayload::StateChanged(snapshot)));
|
||||
|
||||
drop(eng); // Release lock before host operations
|
||||
|
||||
// Stop the actual process
|
||||
|
|
|
|||
16
run-dev
16
run-dev
|
|
@ -9,6 +9,16 @@ SOCKET_PATH="$DEV_RUNTIME/shepherd.sock"
|
|||
|
||||
mkdir -p "$DATA_DIR"
|
||||
|
||||
# Kill any existing shepherd dev instances before starting
|
||||
echo "Cleaning up any existing dev instances..."
|
||||
pkill -f "sway -c ./sway.conf" 2>/dev/null || true
|
||||
pkill -f "shepherdd" 2>/dev/null || true
|
||||
pkill -f "shepherd-launcher" 2>/dev/null || true
|
||||
pkill -f "shepherd-hud" 2>/dev/null || true
|
||||
# Remove stale socket
|
||||
rm -f "$SOCKET_PATH"
|
||||
sleep 0.5
|
||||
|
||||
# Export environment variables for shepherd binaries
|
||||
export SHEPHERD_SOCKET="$SOCKET_PATH"
|
||||
export SHEPHERD_DATA_DIR="$DATA_DIR"
|
||||
|
|
@ -27,6 +37,12 @@ cleanup() {
|
|||
if [ ! -z "$SWAY_PID" ]; then
|
||||
kill $SWAY_PID 2>/dev/null || true
|
||||
fi
|
||||
# Also explicitly kill any shepherd processes that might have escaped
|
||||
pkill -f "shepherdd" 2>/dev/null || true
|
||||
pkill -f "shepherd-launcher" 2>/dev/null || true
|
||||
pkill -f "shepherd-hud" 2>/dev/null || true
|
||||
# Remove socket
|
||||
rm -f "$SOCKET_PATH"
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
|
|
|
|||
34
setup-cgroups.sh
Executable file
34
setup-cgroups.sh
Executable file
|
|
@ -0,0 +1,34 @@
|
|||
#!/bin/bash
|
||||
# Setup cgroups v2 for shepherd-launcher
|
||||
# This script must be run as root (or with sudo)
|
||||
|
||||
set -e
|
||||
|
||||
CGROUP_BASE="/sys/fs/cgroup/shepherd"
|
||||
|
||||
# Check if cgroups v2 is available
|
||||
if [ ! -f /sys/fs/cgroup/cgroup.controllers ]; then
|
||||
echo "Error: cgroups v2 is not available on this system"
|
||||
echo "Make sure your kernel supports cgroups v2 and it's mounted"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Get the user who will run shepherd (default to SUDO_USER or current user)
|
||||
SHEPHERD_USER="${1:-${SUDO_USER:-$(whoami)}}"
|
||||
|
||||
echo "Setting up cgroups for shepherd-launcher..."
|
||||
echo "User: $SHEPHERD_USER"
|
||||
|
||||
# Create the shepherd cgroup directory
|
||||
mkdir -p "$CGROUP_BASE"
|
||||
|
||||
# Set ownership so the shepherd daemon can create session cgroups
|
||||
chown "$SHEPHERD_USER:$SHEPHERD_USER" "$CGROUP_BASE"
|
||||
|
||||
# Set permissions (owner can read/write/execute, others can read/execute)
|
||||
chmod 755 "$CGROUP_BASE"
|
||||
|
||||
echo "Created $CGROUP_BASE with ownership $SHEPHERD_USER"
|
||||
echo ""
|
||||
echo "cgroups v2 setup complete!"
|
||||
echo "The shepherd daemon can now create session cgroups for reliable process management."
|
||||
Loading…
Reference in a new issue