Skip to content

Commit 1f9669f

Browse files
authored
Merge pull request #5149 from kolyshkin/1.4-5025
[1.4] libct: fix resetting CPU affinity
2 parents c4dba84 + 1c78358 commit 1f9669f

File tree

2 files changed

+41
-22
lines changed

2 files changed

+41
-22
lines changed

internal/linux/linux.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package linux
22

33
import (
44
"os"
5+
"unsafe"
56

67
"golang.org/x/sys/unix"
78
)
@@ -65,6 +66,22 @@ func Recvfrom(fd int, p []byte, flags int) (n int, from unix.Sockaddr, err error
6566
return n, from, err
6667
}
6768

69+
// SchedSetaffinity wraps sched_setaffinity syscall without unix.CPUSet size limitation.
70+
func SchedSetaffinity(pid int, buf []byte) error {
71+
err := retryOnEINTR(func() error {
72+
_, _, errno := unix.Syscall(
73+
unix.SYS_SCHED_SETAFFINITY,
74+
uintptr(pid),
75+
uintptr(len(buf)),
76+
uintptr((unsafe.Pointer)(&buf[0])))
77+
if errno != 0 {
78+
return errno
79+
}
80+
return nil
81+
})
82+
return os.NewSyscallError("sched_setaffinity", err)
83+
}
84+
6885
// Sendmsg wraps [unix.Sendmsg].
6986
func Sendmsg(fd int, p, oob []byte, to unix.Sockaddr, flags int) error {
7087
err := retryOnEINTR(func() error {

libcontainer/process_linux.go

Lines changed: 24 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package libcontainer
22

33
import (
4+
"bytes"
45
"context"
56
"encoding/json"
67
"errors"
@@ -26,6 +27,7 @@ import (
2627

2728
"github.com/opencontainers/cgroups"
2829
"github.com/opencontainers/cgroups/fs2"
30+
"github.com/opencontainers/runc/internal/linux"
2931
"github.com/opencontainers/runc/libcontainer/configs"
3032
"github.com/opencontainers/runc/libcontainer/intelrdt"
3133
"github.com/opencontainers/runc/libcontainer/internal/userns"
@@ -165,33 +167,33 @@ type setnsProcess struct {
165167

166168
// tryResetCPUAffinity tries to reset the CPU affinity of the process
167169
// identified by pid to include all possible CPUs (notwithstanding cgroup
168-
// cpuset restrictions and isolated CPUs).
170+
// cpuset restrictions, isolated CPUs and CPU online status).
169171
func tryResetCPUAffinity(pid int) {
170-
// When resetting the CPU affinity, we want to match the configured cgroup
171-
// cpuset (or the default set of all CPUs, if no cpuset is configured)
172-
// rather than some more restrictive affinity we were spawned in (such as
173-
// one that may have been inherited from systemd). The cpuset cgroup used
174-
// to reconfigure the cpumask automatically for joining processes, but
175-
// kcommit da019032819a ("sched: Enforce user requested affinity") changed
176-
// this behaviour in Linux 6.2.
172+
// When resetting the CPU affinity, we want to allow all
173+
// possible CPUs in the system, including those not in
174+
// cpuset.cpus, online or even present (hot-plugged) at call
175+
// time. Using a cpumask any tighter this that may disallow
176+
// using those CPUs if they are added to cpuset.cpus later.
177177
//
178-
// Parsing cpuset.cpus.effective is quite inefficient (and looking at
179-
// things like /proc/stat would be wrong for most nested containers), but
180-
// luckily sched_setaffinity(2) will implicitly:
178+
// Note that sched_setaffinity(2) will implicitly:
179+
//
180+
// * Clamp the cpumask so that it matches the number of CPUs
181+
// supported by the kernel.
181182
//
182-
// * Clamp the cpumask so that it matches the current number of CPUs on
183-
// the system.
184183
// * Mask out any CPUs that are not a member of the target task's
185-
// configured cgroup cpuset.
184+
// configured cgroup cpuset. This is for task's effective affinity,
185+
// without forgetting masked-out CPUs should the cgroup cpuset
186+
// change later.
186187
//
187-
// So we can just pass a very large array of set cpumask bits and the
188-
// kernel will silently convert that to the correct value very cheaply.
189-
var cpuset unix.CPUSet
190-
cpuset.Fill() // set all bits
191-
if err := unix.SchedSetaffinity(pid, &cpuset); err != nil {
192-
logrus.WithError(
193-
os.NewSyscallError("sched_setaffinity", err),
194-
).Warnf("resetting the CPU affinity of pid %d failed -- the container process may inherit runc's CPU affinity", pid)
188+
// Therefore, preparing the cpumask, we can avoid reading
189+
// /sys/devices/system/cpu/possible and kernel_max.
190+
// Instead, we use a huge buffer similarly to go 1.25 runtime in
191+
// getCPUCount().
192+
const maxCPUs = 64 * 1024
193+
buf := bytes.Repeat([]byte{0xff}, maxCPUs/8)
194+
if err := linux.SchedSetaffinity(pid, buf); err != nil {
195+
logrus.WithError(err).Warnf("resetting the CPU affinity of pid %d failed -- the container process may inherit runc's CPU affinity", pid)
196+
return
195197
}
196198
}
197199

0 commit comments

Comments
 (0)