--- /dev/null
+pkg syscall (linux-386), type SysProcAttr struct, PidFD *int #51246
+pkg syscall (linux-386-cgo), type SysProcAttr struct, PidFD *int #51246
+pkg syscall (linux-amd64), type SysProcAttr struct, PidFD *int #51246
+pkg syscall (linux-amd64-cgo), type SysProcAttr struct, PidFD *int #51246
+pkg syscall (linux-arm), type SysProcAttr struct, PidFD *int #51246
+pkg syscall (linux-arm-cgo), type SysProcAttr struct, PidFD *int #51246
// instead of the glibc-specific "CALL 0x10(GS)".
#define INVOKE_SYSCALL INT $0x80
-// func rawVforkSyscall(trap, a1, a2 uintptr) (r1, err uintptr)
-TEXT ·rawVforkSyscall(SB),NOSPLIT|NOFRAME,$0-20
+// func rawVforkSyscall(trap, a1, a2, a3 uintptr) (r1, err uintptr)
+TEXT ·rawVforkSyscall(SB),NOSPLIT|NOFRAME,$0-24
MOVL trap+0(FP), AX // syscall entry
MOVL a1+4(FP), BX
MOVL a2+8(FP), CX
- MOVL $0, DX
+ MOVL a3+12(FP), DX
POPL SI // preserve return address
INVOKE_SYSCALL
PUSHL SI
CMPL AX, $0xfffff001
JLS ok
- MOVL $-1, r1+12(FP)
+ MOVL $-1, r1+16(FP)
NEGL AX
- MOVL AX, err+16(FP)
+ MOVL AX, err+20(FP)
RET
ok:
- MOVL AX, r1+12(FP)
- MOVL $0, err+16(FP)
+ MOVL AX, r1+16(FP)
+ MOVL $0, err+20(FP)
RET
// func rawSyscallNoError(trap uintptr, a1, a2, a3 uintptr) (r1, r2 uintptr);
#define SYS_gettimeofday 96
-// func rawVforkSyscall(trap, a1, a2 uintptr) (r1, err uintptr)
-TEXT ·rawVforkSyscall(SB),NOSPLIT|NOFRAME,$0-40
+// func rawVforkSyscall(trap, a1, a2, a3 uintptr) (r1, err uintptr)
+TEXT ·rawVforkSyscall(SB),NOSPLIT|NOFRAME,$0-48
MOVQ a1+8(FP), DI
MOVQ a2+16(FP), SI
- MOVQ $0, DX
+ MOVQ a3+24(FP), DX
MOVQ $0, R10
MOVQ $0, R8
MOVQ $0, R9
PUSHQ R12
CMPQ AX, $0xfffffffffffff001
JLS ok2
- MOVQ $-1, r1+24(FP)
+ MOVQ $-1, r1+32(FP)
NEGQ AX
- MOVQ AX, err+32(FP)
+ MOVQ AX, err+40(FP)
RET
ok2:
- MOVQ AX, r1+24(FP)
- MOVQ $0, err+32(FP)
+ MOVQ AX, r1+32(FP)
+ MOVQ $0, err+40(FP)
RET
// func rawSyscallNoError(trap, a1, a2, a3 uintptr) (r1, r2 uintptr)
BL runtime·exitsyscall(SB)
RET
-// func rawVforkSyscall(trap, a1, a2 uintptr) (r1, err uintptr)
-TEXT ·rawVforkSyscall(SB),NOSPLIT|NOFRAME,$0-20
+// func rawVforkSyscall(trap, a1, a2, a3 uintptr) (r1, err uintptr)
+TEXT ·rawVforkSyscall(SB),NOSPLIT|NOFRAME,$0-24
MOVW trap+0(FP), R7 // syscall entry
MOVW a1+4(FP), R0
MOVW a2+8(FP), R1
- MOVW $0, R2
+ MOVW a3+12(FP), R2
SWI $0
MOVW $0xfffff001, R1
CMP R1, R0
BLS ok
MOVW $-1, R1
- MOVW R1, r1+12(FP)
+ MOVW R1, r1+16(FP)
RSB $0, R0, R0
- MOVW R0, err+16(FP)
+ MOVW R0, err+20(FP)
RET
ok:
- MOVW R0, r1+12(FP)
+ MOVW R0, r1+16(FP)
MOVW $0, R0
- MOVW R0, err+16(FP)
+ MOVW R0, err+20(FP)
RET
// func rawSyscallNoError(trap uintptr, a1, a2, a3 uintptr) (r1, r2 uintptr);
#include "textflag.h"
-// func rawVforkSyscall(trap, a1, a2 uintptr) (r1, err uintptr)
-TEXT ·rawVforkSyscall(SB),NOSPLIT,$0-40
+// func rawVforkSyscall(trap, a1, a2, a3 uintptr) (r1, err uintptr)
+TEXT ·rawVforkSyscall(SB),NOSPLIT,$0-48
MOVD a1+8(FP), R0
MOVD a2+16(FP), R1
- MOVD $0, R2
+ MOVD a3+24(FP), R2
MOVD $0, R3
MOVD $0, R4
MOVD $0, R5
CMN $4095, R0
BCC ok
MOVD $-1, R4
- MOVD R4, r1+24(FP) // r1
+ MOVD R4, r1+32(FP) // r1
NEG R0, R0
- MOVD R0, err+32(FP) // errno
+ MOVD R0, err+40(FP) // errno
RET
ok:
- MOVD R0, r1+24(FP) // r1
- MOVD ZR, err+32(FP) // errno
+ MOVD R0, r1+32(FP) // r1
+ MOVD ZR, err+40(FP) // errno
RET
// func rawSyscallNoError(trap uintptr, a1, a2, a3 uintptr) (r1, r2 uintptr);
// System calls for loong64, Linux
//
-// func rawVforkSyscall(trap, a1, a2 uintptr) (r1, err uintptr)
-TEXT ·rawVforkSyscall(SB),NOSPLIT,$0-40
+// func rawVforkSyscall(trap, a1, a2, a3 uintptr) (r1, err uintptr)
+TEXT ·rawVforkSyscall(SB),NOSPLIT,$0-48
MOVV a1+8(FP), R4
MOVV a2+16(FP), R5
- MOVV $0, R6
+ MOVV a3+24(FP), R6
MOVV $0, R7
MOVV $0, R8
MOVV $0, R9
MOVW $-4096, R12
BGEU R12, R4, ok
MOVV $-1, R12
- MOVV R12, r1+24(FP) // r1
+ MOVV R12, r1+32(FP) // r1
SUBVU R4, R0, R4
- MOVV R4, err+32(FP) // errno
+ MOVV R4, err+40(FP) // errno
RET
ok:
- MOVV R4, r1+24(FP) // r1
- MOVV R0, err+32(FP) // errno
+ MOVV R4, r1+32(FP) // r1
+ MOVV R0, err+40(FP) // errno
RET
TEXT ·rawSyscallNoError(SB),NOSPLIT,$0-48
// System calls for mips64, Linux
//
-// func rawVforkSyscall(trap, a1, a2 uintptr) (r1, err uintptr)
-TEXT ·rawVforkSyscall(SB),NOSPLIT|NOFRAME,$0-40
+// func rawVforkSyscall(trap, a1, a2, a3 uintptr) (r1, err uintptr)
+TEXT ·rawVforkSyscall(SB),NOSPLIT|NOFRAME,$0-48
MOVV a1+8(FP), R4
MOVV a2+16(FP), R5
- MOVV R0, R6
+ MOVV a3+24(FP), R6
MOVV R0, R7
MOVV R0, R8
MOVV R0, R9
SYSCALL
BEQ R7, ok
MOVV $-1, R1
- MOVV R1, r1+24(FP) // r1
- MOVV R2, err+32(FP) // errno
+ MOVV R1, r1+32(FP) // r1
+ MOVV R2, err+40(FP) // errno
RET
ok:
- MOVV R2, r1+24(FP) // r1
- MOVV R0, err+32(FP) // errno
+ MOVV R2, r1+32(FP) // r1
+ MOVV R0, err+40(FP) // errno
RET
TEXT ·rawSyscallNoError(SB),NOSPLIT,$0-48
JAL runtime·exitsyscall(SB)
RET
-// func rawVforkSyscall(trap, a1, a2 uintptr) (r1, err uintptr)
-TEXT ·rawVforkSyscall(SB),NOSPLIT|NOFRAME,$0-20
+// func rawVforkSyscall(trap, a1, a2, a3 uintptr) (r1, err uintptr)
+TEXT ·rawVforkSyscall(SB),NOSPLIT|NOFRAME,$0-24
MOVW a1+4(FP), R4
MOVW a2+8(FP), R5
- MOVW R0, R6
+ MOVW a3+12(FP), R6
MOVW trap+0(FP), R2 // syscall entry
SYSCALL
BEQ R7, ok
MOVW $-1, R1
- MOVW R1, r1+12(FP) // r1
- MOVW R2, err+16(FP) // errno
+ MOVW R1, r1+16(FP) // r1
+ MOVW R2, err+20(FP) // errno
RET
ok:
- MOVW R2, r1+12(FP) // r1
- MOVW R0, err+16(FP) // errno
+ MOVW R2, r1+16(FP) // r1
+ MOVW R0, err+20(FP) // errno
RET
TEXT ·rawSyscallNoError(SB),NOSPLIT,$20-24
// System calls for ppc64, Linux
//
-// func rawVforkSyscall(trap, a1, a2 uintptr) (r1, err uintptr)
-TEXT ·rawVforkSyscall(SB),NOSPLIT|NOFRAME,$0-40
+// func rawVforkSyscall(trap, a1, a2, a3 uintptr) (r1, err uintptr)
+TEXT ·rawVforkSyscall(SB),NOSPLIT|NOFRAME,$0-48
MOVD a1+8(FP), R3
MOVD a2+16(FP), R4
- MOVD R0, R5
+ MOVD a3+24(FP), R5
MOVD R0, R6
MOVD R0, R7
MOVD R0, R8
SYSCALL R9
BVC ok
MOVD $-1, R4
- MOVD R4, r1+24(FP) // r1
- MOVD R3, err+32(FP) // errno
+ MOVD R4, r1+32(FP) // r1
+ MOVD R3, err+40(FP) // errno
RET
ok:
- MOVD R3, r1+24(FP) // r1
- MOVD R0, err+32(FP) // errno
+ MOVD R3, r1+32(FP) // r1
+ MOVD R0, err+40(FP) // errno
RET
TEXT ·rawSyscallNoError(SB),NOSPLIT,$0-48
// System calls for riscv64, Linux
//
-// func rawVforkSyscall(trap, a1, a2 uintptr) (r1, err uintptr)
-TEXT ·rawVforkSyscall(SB),NOSPLIT|NOFRAME,$0-40
+// func rawVforkSyscall(trap, a1, a2, a3 uintptr) (r1, err uintptr)
+TEXT ·rawVforkSyscall(SB),NOSPLIT|NOFRAME,$0-48
MOV a1+8(FP), A0
MOV a2+16(FP), A1
- MOV ZERO, A2
+ MOV a3+24(FP), A2
MOV ZERO, A3
MOV ZERO, A4
MOV ZERO, A5
ECALL
MOV $-4096, T0
BLTU T0, A0, err
- MOV A0, r1+24(FP) // r1
- MOV ZERO, err+32(FP) // errno
+ MOV A0, r1+32(FP) // r1
+ MOV ZERO, err+40(FP) // errno
RET
err:
MOV $-1, T0
- MOV T0, r1+24(FP) // r1
+ MOV T0, r1+32(FP) // r1
SUB A0, ZERO, A0
- MOV A0, err+32(FP) // errno
+ MOV A0, err+40(FP) // errno
RET
TEXT ·rawSyscallNoError(SB),NOSPLIT,$0-48
// System calls for s390x, Linux
//
-// func rawVforkSyscall(trap, a1, a2 uintptr) (r1, err uintptr)
-TEXT ·rawVforkSyscall(SB),NOSPLIT|NOFRAME,$0-40
+// func rawVforkSyscall(trap, a1, a2, a3 uintptr) (r1, err uintptr)
+TEXT ·rawVforkSyscall(SB),NOSPLIT|NOFRAME,$0-48
MOVD a1+8(FP), R2
MOVD a2+16(FP), R3
- MOVD $0, R4
+ MOVD a3+24(FP), R4
MOVD $0, R5
MOVD $0, R6
MOVD $0, R7
SYSCALL
MOVD $0xfffffffffffff001, R8
CMPUBLT R2, R8, ok2
- MOVD $-1, r1+24(FP)
+ MOVD $-1, r1+32(FP)
NEG R2, R2
- MOVD R2, err+32(FP) // errno
+ MOVD R2, err+40(FP) // errno
RET
ok2:
- MOVD R2, r1+24(FP)
- MOVD $0, err+32(FP) // errno
+ MOVD R2, r1+32(FP)
+ MOVD $0, err+40(FP) // errno
RET
// func rawSyscallNoError(trap, a1, a2, a3 uintptr) (r1, r2 uintptr)
AmbientCaps []uintptr // Ambient capabilities (Linux only)
UseCgroupFD bool // Whether to make use of the CgroupFD field.
CgroupFD int // File descriptor of a cgroup to put the new process into.
+ // PidFD, if not nil, is used to store the pidfd of a child, if the
+ // functionality is supported by the kernel, or -1. Note *PidFD is
+ // changed only if the process starts successfully.
+ PidFD *int
}
var (
none = [...]byte{'n', 'o', 'n', 'e', 0}
slash = [...]byte{'/', 0}
+
+ forceClone3 = false // Used by unit tests only.
)
// Implemented in runtime package.
uidmap, setgroups, gidmap []byte
clone3 *cloneArgs
pgrp int32
+ pidfd _C_int = -1
dirfd int
cred *Credential
ngroups, groups uintptr
if sys.Cloneflags&CLONE_NEWUSER == 0 && sys.Unshareflags&CLONE_NEWUSER == 0 {
flags |= CLONE_VFORK | CLONE_VM
}
+ if sys.PidFD != nil {
+ flags |= CLONE_PIDFD
+ }
// Whether to use clone3.
- if sys.UseCgroupFD {
- clone3 = &cloneArgs{
- flags: uint64(flags) | CLONE_INTO_CGROUP,
- exitSignal: uint64(SIGCHLD),
- cgroup: uint64(sys.CgroupFD),
- }
- } else if flags&CLONE_NEWTIME != 0 {
+ if sys.UseCgroupFD || flags&CLONE_NEWTIME != 0 || forceClone3 {
clone3 = &cloneArgs{
flags: uint64(flags),
exitSignal: uint64(SIGCHLD),
}
+ if sys.UseCgroupFD {
+ clone3.flags |= CLONE_INTO_CGROUP
+ clone3.cgroup = uint64(sys.CgroupFD)
+ }
+ if sys.PidFD != nil {
+ clone3.pidFD = uint64(uintptr(unsafe.Pointer(&pidfd)))
+ }
}
// About to call fork.
runtime_BeforeFork()
locked = true
if clone3 != nil {
- pid, err1 = rawVforkSyscall(_SYS_clone3, uintptr(unsafe.Pointer(clone3)), unsafe.Sizeof(*clone3))
+ pid, err1 = rawVforkSyscall(_SYS_clone3, uintptr(unsafe.Pointer(clone3)), unsafe.Sizeof(*clone3), 0)
} else {
flags |= uintptr(SIGCHLD)
if runtime.GOARCH == "s390x" {
// On Linux/s390, the first two arguments of clone(2) are swapped.
- pid, err1 = rawVforkSyscall(SYS_CLONE, 0, flags)
+ pid, err1 = rawVforkSyscall(SYS_CLONE, 0, flags, uintptr(unsafe.Pointer(&pidfd)))
} else {
- pid, err1 = rawVforkSyscall(SYS_CLONE, flags, 0)
+ pid, err1 = rawVforkSyscall(SYS_CLONE, flags, 0, uintptr(unsafe.Pointer(&pidfd)))
}
}
if err1 != 0 || pid != 0 {
// Fork succeeded, now in child.
+ if sys.PidFD != nil {
+ *sys.PidFD = int(pidfd)
+ }
+
// Enable the "keep capabilities" flag to set ambient capabilities later.
if len(sys.AmbientCaps) > 0 {
_, _, err1 = RawSyscall6(SYS_PRCTL, PR_SET_KEEPCAPS, 1, 0, 0, 0, 0)
"strings"
"syscall"
"testing"
+ "time"
"unsafe"
)
}
}
+func testPidFD(t *testing.T) error {
+ testenv.MustHaveExec(t)
+
+ if os.Getenv("GO_WANT_HELPER_PROCESS") == "1" {
+ // Child: wait for a signal.
+ time.Sleep(time.Hour)
+ }
+
+ exe, err := os.Executable()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ var pidfd int
+ cmd := testenv.Command(t, exe, "-test.run=^TestPidFD$")
+ cmd.Env = append(cmd.Environ(), "GO_WANT_HELPER_PROCESS=1")
+ cmd.SysProcAttr = &syscall.SysProcAttr{
+ PidFD: &pidfd,
+ }
+ if err := cmd.Start(); err != nil {
+ return err
+ }
+ defer func() {
+ cmd.Process.Kill()
+ cmd.Wait()
+ }()
+ t.Log("got pidfd:", pidfd)
+ // If pidfd is not supported by the kernel, -1 is returned.
+ if pidfd == -1 {
+ t.Skip("pidfd not supported")
+ }
+ defer syscall.Close(pidfd)
+
+ // Use pidfd to send a signal to the child.
+ sig := syscall.SIGINT
+ if _, _, e := syscall.Syscall(syscall.Sys_pidfd_send_signal, uintptr(pidfd), uintptr(sig), 0); e != 0 {
+ if e != syscall.EINVAL && testenv.SyscallIsNotSupported(e) {
+ t.Skip("pidfd_send_signal syscall not supported:", e)
+ }
+ t.Fatal("pidfd_send_signal syscall failed:", e)
+ }
+ // Check if the child received our signal.
+ err = cmd.Wait()
+ if cmd.ProcessState == nil || cmd.ProcessState.Sys().(syscall.WaitStatus).Signal() != sig {
+ t.Fatal("unexpected child error:", err)
+ }
+ return nil
+}
+
+func TestPidFD(t *testing.T) {
+ if err := testPidFD(t); err != nil {
+ t.Fatal("can't start a process:", err)
+ }
+}
+
+func TestPidFDClone3(t *testing.T) {
+ *syscall.ForceClone3 = true
+ defer func() { *syscall.ForceClone3 = false }()
+
+ if err := testPidFD(t); err != nil {
+ if testenv.SyscallIsNotSupported(err) {
+ t.Skip("clone3 not supported:", err)
+ }
+ t.Fatal("can't start a process:", err)
+ }
+}
+
type capHeader struct {
version uint32
pid int32
package syscall
-var RawSyscallNoError = rawSyscallNoError
+var (
+ RawSyscallNoError = rawSyscallNoError
+ ForceClone3 = &forceClone3
+)
-const Sys_GETEUID = sys_GETEUID
+const (
+ Sys_GETEUID = sys_GETEUID
+ Sys_pidfd_send_signal = _SYS_pidfd_send_signal
+)
}
func rawSyscallNoError(trap, a1, a2, a3 uintptr) (r1, r2 uintptr)
-func rawVforkSyscall(trap, a1, a2 uintptr) (r1 uintptr, err Errno)
+func rawVforkSyscall(trap, a1, a2, a3 uintptr) (r1 uintptr, err Errno)
/*
* Wrapped
import "unsafe"
const (
- _SYS_setgroups = SYS_SETGROUPS32
- _SYS_clone3 = 435
- _SYS_faccessat2 = 439
+ _SYS_setgroups = SYS_SETGROUPS32
+ _SYS_clone3 = 435
+ _SYS_faccessat2 = 439
+ _SYS_pidfd_send_signal = 424
)
func setTimespec(sec, nsec int64) Timespec {
)
const (
- _SYS_setgroups = SYS_SETGROUPS
- _SYS_clone3 = 435
- _SYS_faccessat2 = 439
+ _SYS_setgroups = SYS_SETGROUPS
+ _SYS_clone3 = 435
+ _SYS_faccessat2 = 439
+ _SYS_pidfd_send_signal = 424
)
//sys Dup2(oldfd int, newfd int) (err error)
import "unsafe"
const (
- _SYS_setgroups = SYS_SETGROUPS32
- _SYS_clone3 = 435
- _SYS_faccessat2 = 439
+ _SYS_setgroups = SYS_SETGROUPS32
+ _SYS_clone3 = 435
+ _SYS_faccessat2 = 439
+ _SYS_pidfd_send_signal = 424
)
func setTimespec(sec, nsec int64) Timespec {
import "unsafe"
const (
- _SYS_setgroups = SYS_SETGROUPS
- _SYS_clone3 = 435
- _SYS_faccessat2 = 439
+ _SYS_setgroups = SYS_SETGROUPS
+ _SYS_clone3 = 435
+ _SYS_faccessat2 = 439
+ _SYS_pidfd_send_signal = 424
)
//sys EpollWait(epfd int, events []EpollEvent, msec int) (n int, err error) = SYS_EPOLL_PWAIT
import "unsafe"
const (
- _SYS_setgroups = SYS_SETGROUPS
- _SYS_clone3 = 435
- _SYS_faccessat2 = 439
+ _SYS_setgroups = SYS_SETGROUPS
+ _SYS_clone3 = 435
+ _SYS_faccessat2 = 439
+ _SYS_pidfd_send_signal = 424
)
//sys EpollWait(epfd int, events []EpollEvent, msec int) (n int, err error) = SYS_EPOLL_PWAIT
)
const (
- _SYS_setgroups = SYS_SETGROUPS
- _SYS_clone3 = 5435
- _SYS_faccessat2 = 5439
+ _SYS_setgroups = SYS_SETGROUPS
+ _SYS_clone3 = 5435
+ _SYS_faccessat2 = 5439
+ _SYS_pidfd_send_signal = 5424
)
//sys Dup2(oldfd int, newfd int) (err error)
import "unsafe"
const (
- _SYS_setgroups = SYS_SETGROUPS
- _SYS_clone3 = 4435
- _SYS_faccessat2 = 4439
+ _SYS_setgroups = SYS_SETGROUPS
+ _SYS_clone3 = 4435
+ _SYS_faccessat2 = 4439
+ _SYS_pidfd_send_signal = 4424
)
func Syscall9(trap, a1, a2, a3, a4, a5, a6, a7, a8, a9 uintptr) (r1, r2 uintptr, err Errno)
)
const (
- _SYS_setgroups = SYS_SETGROUPS
- _SYS_clone3 = 435
- _SYS_faccessat2 = 439
+ _SYS_setgroups = SYS_SETGROUPS
+ _SYS_clone3 = 435
+ _SYS_faccessat2 = 439
+ _SYS_pidfd_send_signal = 424
)
//sys Dup2(oldfd int, newfd int) (err error)
import "unsafe"
const (
- _SYS_setgroups = SYS_SETGROUPS
- _SYS_clone3 = 435
- _SYS_faccessat2 = 439
+ _SYS_setgroups = SYS_SETGROUPS
+ _SYS_clone3 = 435
+ _SYS_faccessat2 = 439
+ _SYS_pidfd_send_signal = 424
)
//sys EpollWait(epfd int, events []EpollEvent, msec int) (n int, err error) = SYS_EPOLL_PWAIT
import "unsafe"
const (
- _SYS_setgroups = SYS_SETGROUPS
- _SYS_clone3 = 435
- _SYS_faccessat2 = 439
+ _SYS_setgroups = SYS_SETGROUPS
+ _SYS_clone3 = 435
+ _SYS_faccessat2 = 439
+ _SYS_pidfd_send_signal = 424
)
//sys Dup2(oldfd int, newfd int) (err error)