feat(m15): nvidia-smi snapshot + display logs + inventory in reports — 0.32.0

Expand diagnostic/report collection (all stored per-diagnostic, in the Report zip;
logs also fed to the AI on "Explain"):
- syslogs: nvidia-smi -q snapshot (driver/throttle/clocks/power/temps/PCIe/ECC/
  retired pages) + display-server log auto-detected — Xorg.0.log on X11, or the
  compositor user-journal slice (gnome-shell/kwin/sway/gamescope) on Wayland.
- diagstore: include the full M5 inventory (inventory.txt + .json) — invaluable
  for larger/shared debugging. inventory.collect() degrades gracefully (no root
  prompt). Best-effort throughout.
- Tests for nvidia/display + inventory in store; docs (M15/SPEC).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-22 14:16:23 +02:00
parent 984292c368
commit 4bd51a40c3
9 changed files with 166 additions and 27 deletions
+4
View File
@@ -47,11 +47,15 @@ class StoreTests(unittest.TestCase):
with mock.patch.object(diagstore, "enabled", return_value=True), \
mock.patch("rigdoctor.render.render_summary", return_value="SUMMARY-TEXT"), \
mock.patch("rigdoctor.core.gamelogs.collect", return_value="LOG-TEXT"), \
mock.patch("rigdoctor.core.syslogs.collect", return_value="SYS-LOG"), \
mock.patch("rigdoctor.core.inventory.collect", return_value=[]), \
mock.patch.object(diagstore.config, "DIAGNOSTICS_DIR", self.tmp / "diagnostics"):
directory = diagstore.store(FakeResult())
self.assertTrue((directory / "result.json").exists())
self.assertTrue((directory / "report.txt").exists())
self.assertEqual((directory / "gamelogs.txt").read_text(), "LOG-TEXT")
self.assertEqual((directory / "syslogs.txt").read_text(), "SYS-LOG")
self.assertTrue((directory / "inventory.txt").exists()) # inventory included for debugging
data = json.loads((directory / "result.json").read_text())
self.assertEqual(data["game"], "Path of Exile 2")
self.assertEqual(len(data["findings"]), 1)
+47 -6
View File
@@ -34,19 +34,60 @@ class CoredumpTests(unittest.TestCase):
self.assertIn("PathOfExile", out)
class NvidiaTests(unittest.TestCase):
def test_missing_tool(self):
with mock.patch("shutil.which", return_value=None):
self.assertEqual(syslogs.nvidia_snapshot(), "")
def test_snapshot_head_truncated(self):
with mock.patch("shutil.which", return_value="/usr/bin/nvidia-smi"), \
mock.patch.object(syslogs, "_run", return_value="DRIVER\n" + "x" * 99999):
out = syslogs.nvidia_snapshot(max_bytes=10)
self.assertEqual(out, "DRIVER\nxxx") # head, not tail
class DisplayTests(unittest.TestCase):
def test_session_type_env(self):
with mock.patch.dict("os.environ", {"XDG_SESSION_TYPE": "wayland"}):
self.assertEqual(syslogs._session_type(), "wayland")
def test_x11_tails_xorg_log(self):
import tempfile
from pathlib import Path
log = Path(tempfile.mkdtemp()) / "Xorg.0.log"
log.write_text("(EE) NVIDIA(GPU-0): something failed")
with mock.patch.object(syslogs, "_session_type", return_value="x11"), \
mock.patch.object(syslogs, "_xorg_log", return_value=log):
out = syslogs.display_log()
self.assertIn("(EE) NVIDIA", out)
def test_wayland_uses_user_journal(self):
with mock.patch.object(syslogs, "_session_type", return_value="wayland"), \
mock.patch("shutil.which", return_value="/usr/bin/journalctl"), \
mock.patch.object(syslogs, "_run", return_value="gnome-shell: GPU error") as run:
out = syslogs.display_log(since=1_000_000_000)
self.assertIn("GPU error", out)
cmd = run.call_args[0][0]
self.assertIn("--user", cmd)
self.assertTrue(any(a.startswith("_COMM=") for a in cmd))
class CollectTests(unittest.TestCase):
def test_collect_combines_sections(self):
with mock.patch.object(syslogs, "kernel_log", return_value="NVRM: Xid 79"), \
mock.patch.object(syslogs, "coredumps", return_value="game SIGSEGV"):
mock.patch.object(syslogs, "coredumps", return_value="game SIGSEGV"), \
mock.patch.object(syslogs, "nvidia_snapshot", return_value="Driver Version 595"), \
mock.patch.object(syslogs, "display_log", return_value="(EE) NVIDIA"):
out = syslogs.collect()
self.assertIn("Kernel log", out)
self.assertIn("Xid 79", out)
self.assertIn("Crashed processes", out)
self.assertIn("SIGSEGV", out)
for needle in ("Kernel log", "Xid 79", "Crashed processes", "SIGSEGV",
"NVIDIA snapshot", "595", "Display server log"):
self.assertIn(needle, out)
def test_collect_empty_when_nothing(self):
with mock.patch.object(syslogs, "kernel_log", return_value=""), \
mock.patch.object(syslogs, "coredumps", return_value=""):
mock.patch.object(syslogs, "coredumps", return_value=""), \
mock.patch.object(syslogs, "nvidia_snapshot", return_value=""), \
mock.patch.object(syslogs, "display_log", return_value=""):
self.assertEqual(syslogs.collect(), "")