Skip to content

Commit cb1a703

Browse files
committed
Check attributes under /sys can be read without stucking the system
1 parent b196516 commit cb1a703

3 files changed

Lines changed: 201 additions & 0 deletions

File tree

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
#!/bin/env python3
2+
# This file is part of Checkbox.
3+
#
4+
# Copyright 2026 Canonical Ltd.
5+
#
6+
# Checkbox is free software: you can redistribute it and/or modify
7+
# it under the terms of the GNU General Public License version 3,
8+
# as published by the Free Software Foundation.
9+
#
10+
# Checkbox is distributed in the hope that it will be useful,
11+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
12+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13+
# GNU General Public License for more details.
14+
#
15+
# You should have received a copy of the GNU General Public License
16+
# along with Checkbox. If not, see <http://www.gnu.org/licenses/>.
17+
import os
18+
import multiprocessing
19+
import sys
20+
21+
22+
def try_read_node(path: str) -> None:
23+
"""
24+
Attempts to read a single sysfs attribute.
25+
Isolated in a subprocess to protect against D-state hangs.
26+
"""
27+
try:
28+
with open(path, "r") as f:
29+
# only need the first byte to trigger the kernel 'show' function
30+
f.read(1)
31+
except Exception:
32+
pass
33+
34+
35+
def walk_devices(
36+
base_path: str = "/sys/devices", timeout: float = 10.0
37+
) -> bool:
38+
39+
# use os.walk but skip non-device directories if necessary
40+
failed = False
41+
for root, dirs, files in os.walk(base_path):
42+
for name in files:
43+
full_path = os.path.join(root, name)
44+
45+
# Skip known 'noisy' or non-hardware files to be efficient
46+
if name in ["uevent", "modalias", "resource"]:
47+
continue
48+
49+
if os.access(full_path, os.R_OK):
50+
p = multiprocessing.Process(
51+
target=try_read_node, args=(full_path,)
52+
)
53+
p.start()
54+
p.join(timeout)
55+
56+
if p.is_alive():
57+
failed = True
58+
print(full_path + " read timeout")
59+
p.terminate()
60+
p.join()
61+
# We stay silent on success to highlight the problem areas
62+
return failed
63+
64+
65+
if __name__ == "__main__":
66+
print(
67+
"Scanning /sys/devices for unresponsive attributes (Timeout: 10s)..."
68+
)
69+
if not walk_devices():
70+
print("All attributes under /sys/devices can be read")
71+
else:
72+
sys.exit(1)
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
#!/usr/bin/env python3
2+
# Copyright 2026 Canonical Ltd.
3+
#
4+
# This program is free software: you can redistribute it and/or modify
5+
# it under the terms of the GNU General Public License version 3,
6+
# as published by the Free Software Foundation.
7+
#
8+
# This program is distributed in the hope that it will be useful,
9+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11+
# GNU General Public License for more details.
12+
#
13+
# You should have received a copy of the GNU General Public License
14+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
15+
16+
import unittest
17+
import os
18+
import time
19+
from unittest.mock import patch, MagicMock, mock_open
20+
from check_hardware_attributes import walk_devices, try_read_node
21+
22+
23+
class TestSysfsScanner(unittest.TestCase):
24+
25+
@patch("builtins.open", new_callable=mock_open, read_data="test_data")
26+
def test_try_read_node_success(self, mock_file):
27+
"""Test that try_read_node successfully opens and reads a byte."""
28+
try_read_node("/fake/path")
29+
mock_file.assert_called_once_with("/fake/path", "r")
30+
mock_file().read.assert_called_once_with(1)
31+
32+
@patch("builtins.open", side_effect=Exception("Read Error"))
33+
def test_try_read_node_handles_exception(self, mock_file):
34+
"""Test that try_read_node catches and suppresses exceptions."""
35+
try:
36+
try_read_node("/fake/path")
37+
except Exception as e:
38+
self.fail("try_read_node raised {} unexpectedly!".format(e))
39+
40+
@patch("os.walk")
41+
@patch("os.access")
42+
@patch("multiprocessing.Process")
43+
def test_walk_devices_skips_noisy_files(
44+
self, mock_process, mock_access, mock_walk
45+
):
46+
"""Verify that uevent, modalias, and resource are ignored."""
47+
# Mocking os.walk to return a few files, including an excluded one
48+
mock_walk.return_value = [
49+
("/sys/devices", ("dir1",), ("uevent", "valid_node"))
50+
]
51+
mock_access.return_value = True
52+
53+
walk_devices("/sys/devices", timeout=0.1)
54+
55+
# Ensure Process was only called for 'valid_node', not 'uevent'
56+
self.assertEqual(mock_process.call_count, 1)
57+
_, args = mock_process.call_args
58+
self.assertIn("valid_node", args["args"][0])
59+
60+
@patch("os.walk")
61+
@patch("os.access")
62+
@patch("multiprocessing.Process")
63+
def test_walk_devices_skips_unreadable_files(
64+
self, mock_process, mock_access, mock_walk
65+
):
66+
"""Verify that files without read access are ignored."""
67+
# Mocking os.walk to return a file
68+
mock_walk.return_value = [("/sys/devices", (), ("restricted_node",))]
69+
70+
# Simulate os.access returning False (No Read Permission)
71+
mock_access.return_value = False
72+
73+
result = walk_devices("/sys/devices", timeout=0.1)
74+
75+
# Ensure Process was NEVER called because access was denied
76+
self.assertEqual(mock_process.call_count, 0)
77+
# Ensure result is success (0) because no hangs occurred
78+
self.assertEqual(result, False)
79+
80+
@patch("os.walk")
81+
@patch("os.access")
82+
@patch("multiprocessing.Process")
83+
def test_walk_devices_detects_hang(
84+
self, mock_process, mock_access, mock_walk
85+
):
86+
"""Simulate a subprocess hang and ensure failed status is returned."""
87+
mock_walk.return_value = [("/sys/devices", (), ("stuck_node",))]
88+
mock_access.return_value = True
89+
90+
# Create a mock process that appears alive after joining
91+
instance = mock_process.return_value
92+
instance.is_alive.return_value = True
93+
94+
# Test walk_devices
95+
with patch("builtins.print") as mock_print:
96+
result = walk_devices("/sys/devices", timeout=0.1)
97+
98+
# Verify status is failed (1) and path was printed
99+
self.assertEqual(result, True)
100+
mock_print.assert_called_with(
101+
"/sys/devices/stuck_node read timeout"
102+
)
103+
104+
@patch("os.walk")
105+
@patch("os.access")
106+
@patch("multiprocessing.Process")
107+
def test_walk_devices_success_path(
108+
self, mock_process, mock_access, mock_walk
109+
):
110+
"""Ensure result is 0 when all processes finish within timeout."""
111+
mock_walk.return_value = [("/sys/devices", (), ("healthy_node",))]
112+
mock_access.return_value = True
113+
114+
instance = mock_process.return_value
115+
instance.is_alive.return_value = False
116+
117+
result = walk_devices("/sys/devices", timeout=1.0)
118+
self.assertEqual(result, 0)
119+
120+
121+
if __name__ == "__main__":
122+
unittest.main()

providers/base/units/miscellanea/jobs.pxu

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -653,3 +653,10 @@ _steps:
653653
3. Boot into the recovered system without errors
654654
_verification:
655655
1. The system boots into the factory recovery system successfully.
656+
657+
plugin: shell
658+
category_id: com.canonical.plainbox::miscellanea
659+
estimated_duration: 120.0
660+
id: miscellanea/check-hardware-attributes
661+
command: check_hardware_attributes.py
662+
_summary: Check that all the attributes are able to read

0 commit comments

Comments
 (0)