2ee7763d00
AlertMonitor now scans the kernel log (journalctl -k) every ~30s and fires one-shot, cooldown-gated desktop alerts on critical events: NVIDIA Xid, OOM kills, CPU machine-checks, PCIe AER, and disk I/O errors — so users are warned the moment something goes wrong, not only on a temperature threshold. Disk I/O errors come from the kernel log (no root needed, unlike smartctl). Edge/spam protection reuses the existing cooldown model. syslogs.scan_critical() does the matching; init seeds last-scan to "now" so old boot logs don't alert on launch. Tests for the matcher + monitor gating/cooldown; Settings note updated. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
115 lines
5.0 KiB
Python
115 lines
5.0 KiB
Python
"""Tests for M15 session-scoped system-log collection (kernel + coredumps)."""
|
|
|
|
import unittest
|
|
from unittest import mock
|
|
|
|
from rigdoctor.core import syslogs
|
|
|
|
|
|
class KernelLogTests(unittest.TestCase):
|
|
def test_passes_since_and_tails(self):
|
|
with mock.patch("shutil.which", return_value="/usr/bin/journalctl"), \
|
|
mock.patch.object(syslogs, "_run", return_value="X" * 100 + "TAILLINE") as run:
|
|
out = syslogs.kernel_log(since=1_000_000_000, max_bytes=8)
|
|
self.assertEqual(out, "TAILLINE")
|
|
cmd = run.call_args[0][0]
|
|
self.assertIn("-k", cmd)
|
|
self.assertIn("--since", cmd)
|
|
|
|
def test_missing_tool_returns_empty(self):
|
|
with mock.patch("shutil.which", return_value=None):
|
|
self.assertEqual(syslogs.kernel_log(), "")
|
|
|
|
|
|
class CoredumpTests(unittest.TestCase):
|
|
def test_empty_when_no_coredumps(self):
|
|
with mock.patch("shutil.which", return_value="/usr/bin/coredumpctl"), \
|
|
mock.patch.object(syslogs, "_run", return_value="No coredumps found."):
|
|
self.assertEqual(syslogs.coredumps(), "")
|
|
|
|
def test_returns_list(self):
|
|
with mock.patch("shutil.which", return_value="/usr/bin/coredumpctl"), \
|
|
mock.patch.object(syslogs, "_run", return_value="TIME PID SIG EXE\n... SEGV PathOfExile"):
|
|
out = syslogs.coredumps()
|
|
self.assertIn("PathOfExile", out)
|
|
|
|
|
|
class NvidiaTests(unittest.TestCase):
|
|
def test_missing_tool(self):
|
|
with mock.patch("shutil.which", return_value=None):
|
|
self.assertEqual(syslogs.nvidia_snapshot(), "")
|
|
|
|
def test_snapshot_head_truncated(self):
|
|
with mock.patch("shutil.which", return_value="/usr/bin/nvidia-smi"), \
|
|
mock.patch.object(syslogs, "_run", return_value="DRIVER\n" + "x" * 99999):
|
|
out = syslogs.nvidia_snapshot(max_bytes=10)
|
|
self.assertEqual(out, "DRIVER\nxxx") # head, not tail
|
|
|
|
|
|
class DisplayTests(unittest.TestCase):
|
|
def test_session_type_env(self):
|
|
with mock.patch.dict("os.environ", {"XDG_SESSION_TYPE": "wayland"}):
|
|
self.assertEqual(syslogs._session_type(), "wayland")
|
|
|
|
def test_x11_tails_xorg_log(self):
|
|
import tempfile
|
|
from pathlib import Path
|
|
log = Path(tempfile.mkdtemp()) / "Xorg.0.log"
|
|
log.write_text("(EE) NVIDIA(GPU-0): something failed")
|
|
with mock.patch.object(syslogs, "_session_type", return_value="x11"), \
|
|
mock.patch.object(syslogs, "_xorg_log", return_value=log):
|
|
out = syslogs.display_log()
|
|
self.assertIn("(EE) NVIDIA", out)
|
|
|
|
def test_wayland_uses_user_journal(self):
|
|
with mock.patch.object(syslogs, "_session_type", return_value="wayland"), \
|
|
mock.patch("shutil.which", return_value="/usr/bin/journalctl"), \
|
|
mock.patch.object(syslogs, "_run", return_value="gnome-shell: GPU error") as run:
|
|
out = syslogs.display_log(since=1_000_000_000)
|
|
self.assertIn("GPU error", out)
|
|
cmd = run.call_args[0][0]
|
|
self.assertIn("--user", cmd)
|
|
self.assertTrue(any(a.startswith("_COMM=") for a in cmd))
|
|
|
|
|
|
class ScanCriticalTests(unittest.TestCase):
|
|
def test_matches_each_category(self):
|
|
text = "\n".join([
|
|
"NVRM: Xid (PCI:0000:01:00): 79, GPU has fallen off the bus",
|
|
"Out of memory: Killed process 1234 (PathOfExile)",
|
|
"mce: [Hardware Error]: CPU 0",
|
|
"pcieport 0000:00:01.0: AER: Corrected error received",
|
|
"blk_update_request: I/O error, dev sda, sector 99",
|
|
"this is a perfectly normal line",
|
|
])
|
|
labels = {label for label, _ in syslogs.scan_critical(text)}
|
|
self.assertEqual(labels, {
|
|
"GPU error (Xid)", "Out of memory", "CPU machine-check",
|
|
"PCIe error", "Disk I/O error"})
|
|
|
|
def test_clean_log_no_events(self):
|
|
self.assertEqual(syslogs.scan_critical("usb 1-2: new high-speed device\nsystemd: started"), [])
|
|
|
|
|
|
class CollectTests(unittest.TestCase):
|
|
def test_collect_combines_sections(self):
|
|
with mock.patch.object(syslogs, "kernel_log", return_value="NVRM: Xid 79"), \
|
|
mock.patch.object(syslogs, "coredumps", return_value="game SIGSEGV"), \
|
|
mock.patch.object(syslogs, "nvidia_snapshot", return_value="Driver Version 595"), \
|
|
mock.patch.object(syslogs, "display_log", return_value="(EE) NVIDIA"):
|
|
out = syslogs.collect()
|
|
for needle in ("Kernel log", "Xid 79", "Crashed processes", "SIGSEGV",
|
|
"NVIDIA snapshot", "595", "Display server log"):
|
|
self.assertIn(needle, out)
|
|
|
|
def test_collect_empty_when_nothing(self):
|
|
with mock.patch.object(syslogs, "kernel_log", return_value=""), \
|
|
mock.patch.object(syslogs, "coredumps", return_value=""), \
|
|
mock.patch.object(syslogs, "nvidia_snapshot", return_value=""), \
|
|
mock.patch.object(syslogs, "display_log", return_value=""):
|
|
self.assertEqual(syslogs.collect(), "")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main()
|