Auto merge of #81825 - voidc:pidfd, r=joshtriplett

Add Linux-specific pidfd process extensions (take 2)

Continuation of #77168.
I addressed the following concerns from the original PR:

- make `CommandExt` and `ChildExt` sealed traits
- wrap file descriptors in `PidFd` struct representing ownership over the fd
- add `take_pidfd` to take the fd out of `Child`
- close fd when dropped

Tracking Issue: #82971
This commit is contained in:
bors 2021-08-01 16:45:47 +00:00
commit 4e21ef2a4e
8 changed files with 410 additions and 11 deletions

View File

@ -4,4 +4,5 @@
#![doc(cfg(target_os = "linux"))]
pub mod fs;
pub mod process;
pub mod raw;

View File

@ -0,0 +1,154 @@
//! Linux-specific extensions to primitives in the `std::process` module.
#![unstable(feature = "linux_pidfd", issue = "82971")]
use crate::io::Result;
use crate::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd, RawFd};
use crate::process;
#[cfg(not(doc))]
use crate::sys::fd::FileDesc;
use crate::sys_common::{AsInner, AsInnerMut, FromInner, IntoInner};
#[cfg(doc)]
struct FileDesc;
/// This type represents a file descriptor that refers to a process.
///
/// A `PidFd` can be obtained by setting the corresponding option on [`Command`]
/// with [`create_pidfd`]. Subsequently, the created pidfd can be retrieved
/// from the [`Child`] by calling [`pidfd`] or [`take_pidfd`].
///
/// Example:
/// ```no_run
/// #![feature(linux_pidfd)]
/// use std::os::linux::process::{CommandExt, ChildExt};
/// use std::process::Command;
///
/// let mut child = Command::new("echo")
/// .create_pidfd(true)
/// .spawn()
/// .expect("Failed to spawn child");
///
/// let pidfd = child
/// .take_pidfd()
/// .expect("Failed to retrieve pidfd");
///
/// // The file descriptor will be closed when `pidfd` is dropped.
/// ```
/// Refer to the man page of [`pidfd_open(2)`] for further details.
///
/// [`Command`]: process::Command
/// [`create_pidfd`]: CommandExt::create_pidfd
/// [`Child`]: process::Child
/// [`pidfd`]: fn@ChildExt::pidfd
/// [`take_pidfd`]: ChildExt::take_pidfd
/// [`pidfd_open(2)`]: https://man7.org/linux/man-pages/man2/pidfd_open.2.html
#[derive(Debug)]
pub struct PidFd {
inner: FileDesc,
}
impl AsInner<FileDesc> for PidFd {
fn as_inner(&self) -> &FileDesc {
&self.inner
}
}
impl FromInner<FileDesc> for PidFd {
fn from_inner(inner: FileDesc) -> PidFd {
PidFd { inner }
}
}
impl IntoInner<FileDesc> for PidFd {
fn into_inner(self) -> FileDesc {
self.inner
}
}
impl AsRawFd for PidFd {
fn as_raw_fd(&self) -> RawFd {
self.as_inner().raw()
}
}
impl FromRawFd for PidFd {
unsafe fn from_raw_fd(fd: RawFd) -> Self {
Self::from_inner(FileDesc::new(fd))
}
}
impl IntoRawFd for PidFd {
fn into_raw_fd(self) -> RawFd {
self.into_inner().into_raw()
}
}
mod private_child_ext {
pub trait Sealed {}
impl Sealed for crate::process::Child {}
}
/// Os-specific extensions for [`Child`]
///
/// [`Child`]: process::Child
pub trait ChildExt: private_child_ext::Sealed {
/// Obtains a reference to the [`PidFd`] created for this [`Child`], if available.
///
/// A pidfd will only be available if its creation was requested with
/// [`create_pidfd`] when the corresponding [`Command`] was created.
///
/// Even if requested, a pidfd may not be available due to an older
/// version of Linux being in use, or if some other error occurred.
///
/// [`Command`]: process::Command
/// [`create_pidfd`]: CommandExt::create_pidfd
/// [`Child`]: process::Child
fn pidfd(&self) -> Result<&PidFd>;
/// Takes ownership of the [`PidFd`] created for this [`Child`], if available.
///
/// A pidfd will only be available if its creation was requested with
/// [`create_pidfd`] when the corresponding [`Command`] was created.
///
/// Even if requested, a pidfd may not be available due to an older
/// version of Linux being in use, or if some other error occurred.
///
/// [`Command`]: process::Command
/// [`create_pidfd`]: CommandExt::create_pidfd
/// [`Child`]: process::Child
fn take_pidfd(&mut self) -> Result<PidFd>;
}
mod private_command_ext {
pub trait Sealed {}
impl Sealed for crate::process::Command {}
}
/// Os-specific extensions for [`Command`]
///
/// [`Command`]: process::Command
pub trait CommandExt: private_command_ext::Sealed {
/// Sets whether a [`PidFd`](struct@PidFd) should be created for the [`Child`]
/// spawned by this [`Command`].
/// By default, no pidfd will be created.
///
/// The pidfd can be retrieved from the child with [`pidfd`] or [`take_pidfd`].
///
/// A pidfd will only be created if it is possible to do so
/// in a guaranteed race-free manner (e.g. if the `clone3` system call
/// is supported). Otherwise, [`pidfd`] will return an error.
///
/// [`Command`]: process::Command
/// [`Child`]: process::Child
/// [`pidfd`]: fn@ChildExt::pidfd
/// [`take_pidfd`]: ChildExt::take_pidfd
fn create_pidfd(&mut self, val: bool) -> &mut process::Command;
}
impl CommandExt for process::Command {
fn create_pidfd(&mut self, val: bool) -> &mut process::Command {
self.as_inner_mut().create_pidfd(val);
self
}
}

View File

@ -166,7 +166,7 @@ use crate::sys_common::{AsInner, AsInnerMut, FromInner, IntoInner};
/// [`wait`]: Child::wait
#[stable(feature = "process", since = "1.0.0")]
pub struct Child {
handle: imp::Process,
pub(crate) handle: imp::Process,
/// The handle for writing to the child's standard input (stdin), if it has
/// been captured. To avoid partially moving

View File

@ -79,6 +79,8 @@ pub struct Command {
stdin: Option<Stdio>,
stdout: Option<Stdio>,
stderr: Option<Stdio>,
#[cfg(target_os = "linux")]
create_pidfd: bool,
}
// Create a new type for argv, so that we can make it `Send` and `Sync`
@ -124,6 +126,7 @@ pub enum Stdio {
}
impl Command {
#[cfg(not(target_os = "linux"))]
pub fn new(program: &OsStr) -> Command {
let mut saw_nul = false;
let program = os2c(program, &mut saw_nul);
@ -144,6 +147,28 @@ impl Command {
}
}
#[cfg(target_os = "linux")]
pub fn new(program: &OsStr) -> Command {
let mut saw_nul = false;
let program = os2c(program, &mut saw_nul);
Command {
argv: Argv(vec![program.as_ptr(), ptr::null()]),
args: vec![program.clone()],
program,
env: Default::default(),
cwd: None,
uid: None,
gid: None,
saw_nul,
closures: Vec::new(),
groups: None,
stdin: None,
stdout: None,
stderr: None,
create_pidfd: false,
}
}
pub fn set_arg_0(&mut self, arg: &OsStr) {
// Set a new arg0
let arg = os2c(arg, &mut self.saw_nul);
@ -177,6 +202,22 @@ impl Command {
self.groups = Some(Box::from(groups));
}
#[cfg(target_os = "linux")]
pub fn create_pidfd(&mut self, val: bool) {
self.create_pidfd = val;
}
#[cfg(not(target_os = "linux"))]
#[allow(dead_code)]
pub fn get_create_pidfd(&self) -> bool {
false
}
#[cfg(target_os = "linux")]
pub fn get_create_pidfd(&self) -> bool {
self.create_pidfd
}
pub fn saw_nul(&self) -> bool {
self.saw_nul
}

View File

@ -9,6 +9,12 @@ use crate::sys;
use crate::sys::cvt;
use crate::sys::process::process_common::*;
#[cfg(target_os = "linux")]
use crate::os::linux::process::PidFd;
#[cfg(target_os = "linux")]
use crate::sys::weak::syscall;
#[cfg(any(
target_os = "macos",
target_os = "freebsd",
@ -61,7 +67,8 @@ impl Command {
// a lock any more because the parent won't do anything and the child is
// in its own process. Thus the parent drops the lock guard while the child
// forgets it to avoid unlocking it on a new thread, which would be invalid.
let (env_lock, pid) = unsafe { (sys::os::env_read_lock(), cvt(libc::fork())?) };
let env_lock = sys::os::env_read_lock();
let (pid, pidfd) = unsafe { self.do_fork()? };
if pid == 0 {
crate::panic::always_abort();
@ -90,7 +97,7 @@ impl Command {
drop(env_lock);
drop(output);
let mut p = Process { pid, status: None };
let mut p = Process::new(pid, pidfd);
let mut bytes = [0; 8];
// loop to handle EINTR
@ -122,6 +129,92 @@ impl Command {
}
}
// Attempts to fork the process. If successful, returns Ok((0, -1))
// in the child, and Ok((child_pid, -1)) in the parent.
#[cfg(not(target_os = "linux"))]
unsafe fn do_fork(&mut self) -> Result<(pid_t, pid_t), io::Error> {
cvt(libc::fork()).map(|res| (res, -1))
}
// Attempts to fork the process. If successful, returns Ok((0, -1))
// in the child, and Ok((child_pid, child_pidfd)) in the parent.
#[cfg(target_os = "linux")]
unsafe fn do_fork(&mut self) -> Result<(pid_t, pid_t), io::Error> {
use crate::sync::atomic::{AtomicBool, Ordering};
static HAS_CLONE3: AtomicBool = AtomicBool::new(true);
const CLONE_PIDFD: u64 = 0x00001000;
#[repr(C)]
struct clone_args {
flags: u64,
pidfd: u64,
child_tid: u64,
parent_tid: u64,
exit_signal: u64,
stack: u64,
stack_size: u64,
tls: u64,
set_tid: u64,
set_tid_size: u64,
cgroup: u64,
}
syscall! {
fn clone3(cl_args: *mut clone_args, len: libc::size_t) -> libc::c_long
}
// If we fail to create a pidfd for any reason, this will
// stay as -1, which indicates an error.
let mut pidfd: pid_t = -1;
// Attempt to use the `clone3` syscall, which supports more arguments
// (in particular, the ability to create a pidfd). If this fails,
// we will fall through this block to a call to `fork()`
if HAS_CLONE3.load(Ordering::Relaxed) {
let mut flags = 0;
if self.get_create_pidfd() {
flags |= CLONE_PIDFD;
}
let mut args = clone_args {
flags,
pidfd: &mut pidfd as *mut pid_t as u64,
child_tid: 0,
parent_tid: 0,
exit_signal: libc::SIGCHLD as u64,
stack: 0,
stack_size: 0,
tls: 0,
set_tid: 0,
set_tid_size: 0,
cgroup: 0,
};
let args_ptr = &mut args as *mut clone_args;
let args_size = crate::mem::size_of::<clone_args>();
let res = cvt(clone3(args_ptr, args_size));
match res {
Ok(n) => return Ok((n as pid_t, pidfd)),
Err(e) => match e.raw_os_error() {
// Multiple threads can race to execute this store,
// but that's fine - that just means that multiple threads
// will have tried and failed to execute the same syscall,
// with no other side effects.
Some(libc::ENOSYS) => HAS_CLONE3.store(false, Ordering::Relaxed),
// Fallback to fork if `EPERM` is returned. (e.g. blocked by seccomp)
Some(libc::EPERM) => {}
_ => return Err(e),
},
}
}
// If we get here, the 'clone3' syscall does not exist
// or we do not have permission to call it
cvt(libc::fork()).map(|res| (res, pidfd))
}
pub fn exec(&mut self, default: Stdio) -> io::Error {
let envp = self.capture_env();
@ -308,6 +401,7 @@ impl Command {
|| (self.env_saw_path() && !self.program_is_path())
|| !self.get_closures().is_empty()
|| self.get_groups().is_some()
|| self.get_create_pidfd()
{
return Ok(None);
}
@ -352,7 +446,7 @@ impl Command {
None => None,
};
let mut p = Process { pid: 0, status: None };
let mut p = Process::new(0, -1);
struct PosixSpawnFileActions<'a>(&'a mut MaybeUninit<libc::posix_spawn_file_actions_t>);
@ -441,9 +535,27 @@ impl Command {
pub struct Process {
pid: pid_t,
status: Option<ExitStatus>,
// On Linux, stores the pidfd created for this child.
// This is None if the user did not request pidfd creation,
// or if the pidfd could not be created for some reason
// (e.g. the `clone3` syscall was not available).
#[cfg(target_os = "linux")]
pidfd: Option<PidFd>,
}
impl Process {
#[cfg(target_os = "linux")]
fn new(pid: pid_t, pidfd: pid_t) -> Self {
use crate::sys_common::FromInner;
let pidfd = (pidfd >= 0).then(|| PidFd::from_inner(sys::fd::FileDesc::new(pidfd)));
Process { pid, status: None, pidfd }
}
#[cfg(not(target_os = "linux"))]
fn new(pid: pid_t, _pidfd: pid_t) -> Self {
Process { pid, status: None }
}
pub fn id(&self) -> u32 {
self.pid as u32
}
@ -580,6 +692,24 @@ impl ExitStatusError {
}
}
#[cfg(target_os = "linux")]
#[unstable(feature = "linux_pidfd", issue = "82971")]
impl crate::os::linux::process::ChildExt for crate::process::Child {
fn pidfd(&self) -> io::Result<&PidFd> {
self.handle
.pidfd
.as_ref()
.ok_or_else(|| Error::new(ErrorKind::Other, "No pidfd was created."))
}
fn take_pidfd(&mut self) -> io::Result<PidFd> {
self.handle
.pidfd
.take()
.ok_or_else(|| Error::new(ErrorKind::Other, "No pidfd was created."))
}
}
#[cfg(test)]
#[path = "process_unix/tests.rs"]
mod tests;

View File

@ -0,0 +1,45 @@
// run-pass
// only-linux - pidfds are a linux-specific concept
#![feature(linux_pidfd)]
#![feature(rustc_private)]
extern crate libc;
use std::io::Error;
use std::os::linux::process::{ChildExt, CommandExt};
use std::process::Command;
fn has_clone3() -> bool {
let res = unsafe { libc::syscall(libc::SYS_clone3, 0, 0) };
let err = (res == -1)
.then(|| Error::last_os_error())
.expect("probe syscall should not succeed");
err.raw_os_error() != Some(libc::ENOSYS)
}
fn main() {
// pidfds require the clone3 syscall
if !has_clone3() {
return;
}
// We don't assert the precise value, since the standard library
// might have opened other file descriptors before our code runs.
let _ = Command::new("echo")
.create_pidfd(true)
.spawn()
.unwrap()
.pidfd().expect("failed to obtain pidfd");
let _ = Command::new("echo")
.create_pidfd(false)
.spawn()
.unwrap()
.pidfd().expect_err("pidfd should not have been created when create_pid(false) is set");
let _ = Command::new("echo")
.spawn()
.unwrap()
.pidfd().expect_err("pidfd should not have been created");
}

View File

@ -8,8 +8,6 @@
// ignore-sgx no processes
#![feature(process_exec, rustc_private)]
extern crate libc;
use std::env;
use std::io::Error;
use std::os::unix::process::CommandExt;
@ -17,6 +15,23 @@ use std::process::Command;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;
#[cfg(not(target_os = "linux"))]
fn getpid() -> u32 {
use std::process;
process::id()
}
/// We need to directly use the getpid syscall instead of using `process::id()`
/// because the libc wrapper might return incorrect values after a process was
/// forked.
#[cfg(target_os = "linux")]
fn getpid() -> u32 {
extern crate libc;
unsafe {
libc::syscall(libc::SYS_getpid) as _
}
}
fn main() {
if let Some(arg) = env::args().nth(1) {
match &arg[..] {
@ -68,14 +83,12 @@ fn main() {
};
assert_eq!(output.raw_os_error(), Some(102));
let pid = unsafe { libc::getpid() };
assert!(pid >= 0);
let pid = getpid();
let output = unsafe {
Command::new(&me)
.arg("empty")
.pre_exec(move || {
let child = libc::getpid();
assert!(child >= 0);
let child = getpid();
assert!(pid != child);
Ok(())
})

View File

@ -23,6 +23,21 @@ use std::sync::atomic::{AtomicU32, Ordering};
use libc::c_int;
#[cfg(not(target_os = "linux"))]
fn getpid() -> u32 {
process::id()
}
/// We need to directly use the getpid syscall instead of using `process::id()`
/// because the libc wrapper might return incorrect values after a process was
/// forked.
#[cfg(target_os = "linux")]
fn getpid() -> u32 {
unsafe {
libc::syscall(libc::SYS_getpid) as _
}
}
/// This stunt allocator allows us to spot heap allocations in the child.
struct PidChecking<A> {
parent: A,
@ -44,7 +59,7 @@ impl<A> PidChecking<A> {
fn check(&self) {
let require_pid = self.require_pid.load(Ordering::Acquire);
if require_pid != 0 {
let actual_pid = process::id();
let actual_pid = getpid();
if require_pid != actual_pid {
unsafe {
libc::raise(libc::SIGUSR1);