11package libcontainer
22
33import (
4+ "bytes"
45 "context"
56 "encoding/json"
67 "errors"
@@ -26,6 +27,7 @@ import (
2627
2728 "github.com/opencontainers/cgroups"
2829 "github.com/opencontainers/cgroups/fs2"
30+ "github.com/opencontainers/runc/internal/linux"
2931 "github.com/opencontainers/runc/libcontainer/configs"
3032 "github.com/opencontainers/runc/libcontainer/intelrdt"
3133 "github.com/opencontainers/runc/libcontainer/internal/userns"
@@ -165,33 +167,33 @@ type setnsProcess struct {
165167
166168// tryResetCPUAffinity tries to reset the CPU affinity of the process
167169// identified by pid to include all possible CPUs (notwithstanding cgroup
168- // cpuset restrictions and isolated CPUs).
170+ // cpuset restrictions, isolated CPUs and CPU online status ).
169171func tryResetCPUAffinity (pid int ) {
170- // When resetting the CPU affinity, we want to match the configured cgroup
171- // cpuset (or the default set of all CPUs, if no cpuset is configured)
172- // rather than some more restrictive affinity we were spawned in (such as
173- // one that may have been inherited from systemd). The cpuset cgroup used
174- // to reconfigure the cpumask automatically for joining processes, but
175- // kcommit da019032819a ("sched: Enforce user requested affinity") changed
176- // this behaviour in Linux 6.2.
172+ // When resetting the CPU affinity, we want to allow all
173+ // possible CPUs in the system, including those not in
174+ // cpuset.cpus, online or even present (hot-plugged) at call
175+ // time. Using a cpumask any tighter this that may disallow
176+ // using those CPUs if they are added to cpuset.cpus later.
177177 //
178- // Parsing cpuset.cpus.effective is quite inefficient (and looking at
179- // things like /proc/stat would be wrong for most nested containers), but
180- // luckily sched_setaffinity(2) will implicitly:
178+ // Note that sched_setaffinity(2) will implicitly:
179+ //
180+ // * Clamp the cpumask so that it matches the number of CPUs
181+ // supported by the kernel.
181182 //
182- // * Clamp the cpumask so that it matches the current number of CPUs on
183- // the system.
184183 // * Mask out any CPUs that are not a member of the target task's
185- // configured cgroup cpuset.
184+ // configured cgroup cpuset. This is for task's effective affinity,
185+ // without forgetting masked-out CPUs should the cgroup cpuset
186+ // change later.
186187 //
187- // So we can just pass a very large array of set cpumask bits and the
188- // kernel will silently convert that to the correct value very cheaply.
189- var cpuset unix.CPUSet
190- cpuset .Fill () // set all bits
191- if err := unix .SchedSetaffinity (pid , & cpuset ); err != nil {
192- logrus .WithError (
193- os .NewSyscallError ("sched_setaffinity" , err ),
194- ).Warnf ("resetting the CPU affinity of pid %d failed -- the container process may inherit runc's CPU affinity" , pid )
188+ // Therefore, preparing the cpumask, we can avoid reading
189+ // /sys/devices/system/cpu/possible and kernel_max.
190+ // Instead, we use a huge buffer similarly to go 1.25 runtime in
191+ // getCPUCount().
192+ const maxCPUs = 64 * 1024
193+ buf := bytes .Repeat ([]byte {0xff }, maxCPUs / 8 )
194+ if err := linux .SchedSetaffinity (pid , buf ); err != nil {
195+ logrus .WithError (err ).Warnf ("resetting the CPU affinity of pid %d failed -- the container process may inherit runc's CPU affinity" , pid )
196+ return
195197 }
196198}
197199
0 commit comments