"""Tests for the M4 health report's log scanner (synthetic input).""" import unittest from pathlib import Path from unittest import mock from rigdoctor.core import displays, health from rigdoctor.core.health import ( CRITICAL, INFO, WARNING, check_displays, check_memory_speed, check_nvidia_module, check_pcie_links, run_health_checks, scan_journal_text, ) # A real no-Xid freeze: the open-module VA-space storm captured on 2026-05-29. _VASPACE_LOG = """\ NVRM: nvCheckFailedNoLog: Check failed: 0 == (pMapNode->gpuMask & gpuMask) @ gpu_vaspace.c:4547 NVRM: dmaAllocMapping_GM107: can't update VA space for mapping @vaddr=0x4be00000 [drm:nv_drm_gem_alloc_nvkms_memory_ioctl [nvidia_drm]] *ERROR* Failed to allocate NVKMS memory for GEM object """ class HealthScanTests(unittest.TestCase): def test_xid_79_is_critical(self): text = "NVRM: Xid (PCI:0000:01:00): 79, pid=1234, GPU has fallen off the bus." findings = scan_journal_text(text) gpu = [f for f in findings if f.category == "GPU"] self.assertEqual(len(gpu), 1) self.assertIn("79", gpu[0].title) self.assertEqual(gpu[0].severity, CRITICAL) def test_xid_count_aggregates(self): text = "\n".join(["NVRM: Xid (PCI:0000:01:00): 79, foo"] * 3) gpu = [f for f in scan_journal_text(text) if f.category == "GPU"][0] self.assertIn("×3", gpu.title) def test_oom_and_panic_detected(self): text = "Out of memory: Killed process 999 (game)\nKernel panic - not syncing: x" cats = {f.category for f in scan_journal_text(text)} self.assertIn("Memory", cats) self.assertIn("Kernel", cats) def test_mce_critical(self): findings = scan_journal_text("mce: [Hardware Error]: Machine check events logged") self.assertTrue(any(f.severity == CRITICAL and f.category == "Hardware" for f in findings)) def test_clean_text_yields_no_findings(self): self.assertEqual(scan_journal_text("usb 1-1: new high-speed USB device\nbluetooth: ok"), []) def test_vaspace_freeze_detected_without_any_xid(self): findings = scan_journal_text(_VASPACE_LOG) gpu = [f for f in findings if f.category == "GPU"] self.assertEqual(len(gpu), 1) self.assertEqual(gpu[0].severity, WARNING) self.assertIn("VA-space", gpu[0].title) # It must NOT be misreported as an Xid finding (the log has no Xid at all). self.assertNotIn("Xid", gpu[0].title) self.assertIn("open kernel module", gpu[0].detail.lower()) def test_open_module_finding_when_open_loaded(self): with mock.patch("rigdoctor.core.health._nvidia_module_is_open", return_value=True): findings = check_nvidia_module() self.assertEqual(len(findings), 1) self.assertEqual(findings[0].severity, INFO) self.assertEqual(findings[0].category, "Driver") def test_no_module_finding_when_proprietary_or_absent(self): for state in (False, None): with mock.patch("rigdoctor.core.health._nvidia_module_is_open", return_value=state): self.assertEqual(check_nvidia_module(), []) def test_run_health_checks_returns_findings(self): # Runs against the real system; just assert it returns a sorted list of Findings. findings = run_health_checks() self.assertIsInstance(findings, list) severities = [f.severity for f in findings] order = {"critical": 0, "warning": 1, "info": 2, "ok": 3} ranks = [order.get(s, 9) for s in severities] self.assertEqual(ranks, sorted(ranks)) class PcieLinkCheckTests(unittest.TestCase): def _with_link(self, cur_g, cur_w, max_g, max_w): # one fake NVMe controller returning the given link tuple return (mock.patch("rigdoctor.core.inventory.nvme_controllers", return_value=[("nvme0", Path("/x"))]), mock.patch("rigdoctor.core.inventory.read_link", return_value=(cur_g, cur_w, max_g, max_w))) def test_reduced_width_is_a_warning_about_lane_sharing(self): ctrls, link = self._with_link(4, "2", 4, "4") # Gen4 x2 but supports x4 with ctrls, link: findings = check_pcie_links() self.assertEqual(len(findings), 1) self.assertEqual(findings[0].severity, WARNING) self.assertIn("lane-sharing", findings[0].detail) def test_reduced_speed_only_is_info(self): ctrls, link = self._with_link(3, "4", 4, "4") # Gen3 x4 but supports Gen4 with ctrls, link: findings = check_pcie_links() self.assertEqual(len(findings), 1) self.assertEqual(findings[0].severity, INFO) def test_full_speed_no_finding(self): ctrls, link = self._with_link(4, "4", 4, "4") with ctrls, link: self.assertEqual(check_pcie_links(), []) class DisplayCheckTests(unittest.TestCase): def test_lower_than_max_refresh_is_flagged(self): mon = displays.Monitor("DP-1", "Samsung LC34G55T", 3440, 1440, 60.0, 165.0) with mock.patch("rigdoctor.core.displays.collect", return_value=[mon]): findings = check_displays() self.assertEqual(len(findings), 1) self.assertEqual(findings[0].severity, INFO) self.assertIn("165", findings[0].title) def test_at_max_refresh_no_finding(self): mon = displays.Monitor("DP-1", "Samsung LC34G55T", 3440, 1440, 165.0, 165.0) with mock.patch("rigdoctor.core.displays.collect", return_value=[mon]): self.assertEqual(check_displays(), []) class MemorySpeedCheckTests(unittest.TestCase): def _dmi(self, configured, part): return {"memory": [{"Configured Memory Speed": configured, "Speed": configured, "Part Number": part}]} def test_flags_unapplied_expo(self): dmi = self._dmi("4800 MT/s", "CMK32GX5M2B5600Z36") with mock.patch("rigdoctor.core.elevation.privileged", return_value=None), \ mock.patch("rigdoctor.core.inventory._dmidecode", return_value=dmi): findings = check_memory_speed() self.assertEqual(len(findings), 1) self.assertEqual(findings[0].severity, INFO) self.assertIn("5600", findings[0].title) def test_no_flag_at_rated(self): dmi = self._dmi("5600 MT/s", "CMK32GX5M2B5600Z36") with mock.patch("rigdoctor.core.elevation.privileged", return_value=None), \ mock.patch("rigdoctor.core.inventory._dmidecode", return_value=dmi): self.assertEqual(check_memory_speed(), []) if __name__ == "__main__": unittest.main()